diff --git a/.gitignore b/.gitignore
index 8b8b79960..6fb9b523e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,5 @@ darwin.bazelrc
 nativelink.bazelrc
 *.log
 buck-out/
+.cargo/config.toml
+.claude/worktrees/
diff --git a/Cargo.lock b/Cargo.lock
index 6daa6a21d..d0f3bf339 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -23,9 +23,9 @@ dependencies = [
 
 [[package]]
 name = "aho-corasick"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
 dependencies = [
  "memchr",
 ]
@@ -36,15 +36,6 @@ version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
 
-[[package]]
-name = "android_system_properties"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "anstream"
 version = "0.6.21"
@@ -77,35 +68,38 @@ dependencies = [
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.4"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
 dependencies = [
- "windows-sys 0.60.2",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.10"
+version = "3.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
 name = "anyhow"
-version = "1.0.100"
+version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
 
 [[package]]
 name = "arc-swap"
-version = "1.7.1"
+version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
+checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5"
+dependencies = [
+ "rustversion",
+]
 
 [[package]]
 name = "arcstr"
@@ -137,9 +131,9 @@ dependencies = [
 
 [[package]]
 name = "async-lock"
-version = "3.4.1"
+version = "3.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc"
+checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311"
 dependencies = [
  "event-listener",
  "event-listener-strategy",
@@ -180,9 +174,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "aws-config"
-version = "1.8.8"
+version = "1.8.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37cf2b6af2a95a20e266782b4f76f1a5e12bf412a9db2de9c1e9123b9d8c0ad8"
+checksum = "8a8fc176d53d6fe85017f230405e3255cedb4a02221cb55ed6d76dccbbb099b2"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -199,7 +193,7 @@ dependencies = [
  "bytes",
  "fastrand",
  "hex",
- "http 1.3.1",
+ "http 1.4.0",
  "ring",
  "time",
  "tokio",
@@ -210,9 +204,9 @@ dependencies = [
 
 [[package]]
 name = "aws-credential-types"
-version = "1.2.8"
+version = "1.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "faf26925f4a5b59eb76722b63c2892b1d70d06fa053c72e4a100ec308c1d47bc"
+checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-runtime-api",
@@ -220,11 +214,33 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "aws-lc-rs"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9a7b350e3bb1767102698302bc37256cbd48422809984b98d292c40e2579aa9"
+dependencies = [
+ "aws-lc-sys",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-lc-sys"
+version = "0.37.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549"
+dependencies = [
+ "cc",
+ "cmake",
+ "dunce",
+ "fs_extra",
+]
+
 [[package]]
 name = "aws-runtime"
-version = "1.5.12"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa006bb32360ed90ac51203feafb9d02e3d21046e1fd3a450a404b90ea73e5d"
+checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75"
 dependencies = [
  "aws-credential-types",
  "aws-sigv4",
@@ -236,9 +252,12 @@ dependencies = [
  "aws-smithy-types",
  "aws-types",
  "bytes",
+ "bytes-utils",
  "fastrand",
  "http 0.2.12",
+ "http 1.4.0",
  "http-body 0.4.6",
+ "http-body 1.0.1",
  "percent-encoding",
  "pin-project-lite",
  "tracing",
@@ -247,9 +266,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-s3"
-version = "1.109.0"
+version = "1.124.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c6d81b75f8ff78882e70c5909804b44553d56136899fb4015a0a68ecc870e0e"
+checksum = "744c09d75dfec039a05cf8e117c995ded3b0baffa6eb83f3ed7075a01d8d8947"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -259,6 +278,7 @@ dependencies = [
  "aws-smithy-eventstream",
  "aws-smithy-http",
  "aws-smithy-json",
+ "aws-smithy-observability",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -269,10 +289,9 @@ dependencies = [
  "hex",
  "hmac",
  "http 0.2.12",
- "http 1.3.1",
- "http-body 0.4.6",
+ "http 1.4.0",
  "http-body 1.0.1",
- "lru 0.12.5",
+ "lru",
  "percent-encoding",
  "regex-lite",
  "sha2",
@@ -282,15 +301,16 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.86.0"
+version = "1.95.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a0abbfab841446cce6e87af853a3ba2cc1bc9afcd3f3550dd556c43d434c86d"
+checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
  "aws-smithy-json",
+ "aws-smithy-observability",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -298,21 +318,23 @@ dependencies = [
  "bytes",
  "fastrand",
  "http 0.2.12",
+ "http 1.4.0",
  "regex-lite",
  "tracing",
 ]
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.88.0"
+version = "1.97.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a68d675582afea0e94d38b6ca9c5aaae4ca14f1d36faa6edb19b42e687e70d7"
+checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
  "aws-smithy-json",
+ "aws-smithy-observability",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -320,21 +342,23 @@ dependencies = [
  "bytes",
  "fastrand",
  "http 0.2.12",
+ "http 1.4.0",
  "regex-lite",
  "tracing",
 ]
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.88.0"
+version = "1.99.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d30990923f4f675523c51eb1c0dec9b752fb267b36a61e83cbc219c9d86da715"
+checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
  "aws-smithy-http",
  "aws-smithy-json",
+ "aws-smithy-observability",
  "aws-smithy-query",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
@@ -343,15 +367,16 @@ dependencies = [
  "aws-types",
  "fastrand",
  "http 0.2.12",
+ "http 1.4.0",
  "regex-lite",
  "tracing",
 ]
 
 [[package]]
 name = "aws-sigv4"
-version = "1.3.5"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bffc03068fbb9c8dd5ce1c6fb240678a5cffb86fb2b7b1985c999c4b83c8df68"
+checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-eventstream",
@@ -363,7 +388,7 @@ dependencies = [
  "hex",
  "hmac",
  "http 0.2.12",
- "http 1.3.1",
+ "http 1.4.0",
  "percent-encoding",
  "sha2",
  "time",
@@ -372,9 +397,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-async"
-version = "1.2.6"
+version = "1.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c"
+checksum = "5cc50d0f63e714784b84223abd7abbc8577de8c35d699e0edd19f0a88a08ae13"
 dependencies = [
  "futures-util",
  "pin-project-lite",
@@ -383,17 +408,18 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-checksums"
-version = "0.63.9"
+version = "0.64.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "165d8583d8d906e2fb5511d29201d447cc710864f075debcdd9c31c265412806"
+checksum = "180dddf5ef0f52a2f99e2fada10e16ea610e507ef6148a42bdc4d5867596aa00"
 dependencies = [
  "aws-smithy-http",
  "aws-smithy-types",
  "bytes",
  "crc-fast",
  "hex",
- "http 0.2.12",
- "http-body 0.4.6",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
  "md-5",
  "pin-project-lite",
  "sha1",
@@ -403,9 +429,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-eventstream"
-version = "0.60.12"
+version = "0.60.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9656b85088f8d9dc7ad40f9a6c7228e1e8447cdf4b046c87e152e0805dea02fa"
+checksum = "1c0b3e587fbaa5d7f7e870544508af8ce82ea47cd30376e69e1e37c4ac746f79"
 dependencies = [
  "aws-smithy-types",
  "bytes",
@@ -414,9 +440,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http"
-version = "0.62.4"
+version = "0.63.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3feafd437c763db26aa04e0cc7591185d0961e64c61885bece0fb9d50ceac671"
+checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7"
 dependencies = [
  "aws-smithy-eventstream",
  "aws-smithy-runtime-api",
@@ -424,9 +450,10 @@ dependencies = [
  "bytes",
  "bytes-utils",
  "futures-core",
- "http 0.2.12",
- "http 1.3.1",
- "http-body 0.4.6",
+ "futures-util",
+ "http 1.4.0",
+ "http-body 1.0.1",
+ "http-body-util",
  "percent-encoding",
  "pin-project-lite",
  "pin-utils",
@@ -435,9 +462,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http-client"
-version = "1.1.3"
+version = "1.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1053b5e587e6fa40ce5a79ea27957b04ba660baa02b28b7436f64850152234f1"
+checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-protocol-test",
@@ -445,13 +472,13 @@ dependencies = [
  "aws-smithy-types",
  "bytes",
  "h2 0.3.27",
- "h2 0.4.12",
+ "h2 0.4.13",
  "http 0.2.12",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 0.4.6",
  "http-body 1.0.1",
  "hyper 0.14.32",
- "indexmap 2.12.0",
+ "indexmap",
  "pin-project-lite",
  "serde",
  "serde_json",
@@ -461,27 +488,27 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-json"
-version = "0.61.6"
+version = "0.62.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cff418fc8ec5cadf8173b10125f05c2e7e1d46771406187b2c878557d4503390"
+checksum = "27b3a779093e18cad88bbae08dc4261e1d95018c4c5b9356a52bcae7c0b6e9bb"
 dependencies = [
  "aws-smithy-types",
 ]
 
 [[package]]
 name = "aws-smithy-observability"
-version = "0.1.4"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc"
+checksum = "4d3f39d5bb871aaf461d59144557f16d5927a5248a983a40654d9cf3b9ba183b"
 dependencies = [
  "aws-smithy-runtime-api",
 ]
 
 [[package]]
 name = "aws-smithy-protocol-test"
-version = "0.63.5"
+version = "0.63.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09e4a766a447bf2aca69100278a6777cffcef2f97199f2443d481c698dd2887c"
+checksum = "dbd2bae1fe1f465dc0e1f8865c3b36867a34848178707a31f74f92279266c78d"
 dependencies = [
  "assert-json-diff",
  "aws-smithy-runtime-api",
@@ -493,14 +520,14 @@ dependencies = [
  "regex-lite",
  "roxmltree",
  "serde_json",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
 ]
 
 [[package]]
 name = "aws-smithy-query"
-version = "0.60.8"
+version = "0.60.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9"
+checksum = "05f76a580e3d8f8961e5d48763214025a2af65c2fa4cd1fb7f270a0e107a71b0"
 dependencies = [
  "aws-smithy-types",
  "urlencoding",
@@ -508,9 +535,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.9.3"
+version = "1.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40ab99739082da5347660c556689256438defae3bcefd66c52b095905730e404"
+checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
@@ -521,9 +548,10 @@ dependencies = [
  "bytes",
  "fastrand",
  "http 0.2.12",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 0.4.6",
  "http-body 1.0.1",
+ "http-body-util",
  "pin-project-lite",
  "pin-utils",
  "tokio",
@@ -533,15 +561,15 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.9.1"
+version = "1.11.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3683c5b152d2ad753607179ed71988e8cfd52964443b4f74fd8e552d0bbfeb46"
+checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-types",
  "bytes",
  "http 0.2.12",
- "http 1.3.1",
+ "http 1.4.0",
  "pin-project-lite",
  "tokio",
  "tracing",
@@ -550,16 +578,16 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.3.3"
+version = "1.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f5b3a7486f6690ba25952cabf1e7d75e34d69eaff5081904a47bc79074d6457"
+checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3"
 dependencies = [
  "base64-simd",
  "bytes",
  "bytes-utils",
  "futures-core",
  "http 0.2.12",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 0.4.6",
  "http-body 1.0.1",
  "http-body-util",
@@ -576,18 +604,18 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-xml"
-version = "0.60.11"
+version = "0.60.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9c34127e8c624bc2999f3b657e749c1393bedc9cd97b92a804db8ced4d2e163"
+checksum = "b53543b4b86ed43f051644f704a98c7291b3618b67adf057ee77a366fa52fcaa"
 dependencies = [
  "xmlparser",
 ]
 
 [[package]]
 name = "aws-types"
-version = "1.3.9"
+version = "1.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2fd329bf0e901ff3f60425691410c69094dc2a1f34b331f37bfc4e9ac1565a1"
+checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
@@ -599,14 +627,14 @@ dependencies = [
 
 [[package]]
 name = "axum"
-version = "0.8.6"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a18ed336352031311f4e0b4dd2ff392d4fbb370777c9d18d7fc9d7359f73871"
+checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
 dependencies = [
  "axum-core",
  "bytes",
  "futures-util",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 1.0.1",
  "http-body-util",
  "itoa",
@@ -617,20 +645,20 @@ dependencies = [
  "pin-project-lite",
  "serde_core",
  "sync_wrapper",
- "tower 0.5.2",
+ "tower",
  "tower-layer",
  "tower-service",
 ]
 
 [[package]]
 name = "axum-core"
-version = "0.5.5"
+version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22"
+checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
 dependencies = [
  "bytes",
  "futures-core",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 1.0.1",
  "http-body-util",
  "mime",
@@ -655,12 +683,6 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf"
 
-[[package]]
-name = "base64"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
-
 [[package]]
 name = "base64"
 version = "0.22.1"
@@ -679,9 +701,9 @@ dependencies = [
 
 [[package]]
 name = "base64ct"
-version = "1.8.0"
+version = "1.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"
+checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
 
 [[package]]
 name = "bincode"
@@ -695,15 +717,9 @@ dependencies = [
 
 [[package]]
 name = "bitflags"
-version = "1.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
-
-[[package]]
-name = "bitflags"
-version = "2.10.0"
+version = "2.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
 
 [[package]]
 name = "bitvec"
@@ -719,16 +735,18 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.8.2"
+version = "1.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
+checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d"
 dependencies = [
  "arrayref",
  "arrayvec",
  "cc",
  "cfg-if",
  "constant_time_eq",
+ "cpufeatures",
  "memmap2",
+ "rayon-core",
 ]
 
 [[package]]
@@ -756,12 +774,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7969a9ba84b0ff843813e7249eed1678d9b6607ce5a3b8f0a47af3fcf7978e6e"
 dependencies = [
  "ahash",
- "base64 0.22.1",
+ "base64",
  "bitvec",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
  "getrandom 0.3.4",
  "hex",
- "indexmap 2.12.0",
+ "indexmap",
  "js-sys",
  "once_cell",
  "rand 0.9.2",
@@ -774,15 +792,15 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.19.0"
+version = "3.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
 
 [[package]]
 name = "byte-unit"
-version = "5.1.6"
+version = "5.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1cd29c3c585209b0cbc7309bfe3ed7efd8c84c21b7af29c8bfae908f8777174"
+checksum = "8c6d47a4e2961fb8721bcfc54feae6455f2f64e7054f9bc67e875f0e77f4c58d"
 dependencies = [
  "rust_decimal",
  "utf8-width",
@@ -790,9 +808,9 @@ dependencies = [
 
 [[package]]
 name = "bytemuck"
-version = "1.24.0"
+version = "1.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4"
+checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
 
 [[package]]
 name = "byteorder"
@@ -837,9 +855,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.41"
+version = "1.2.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
+checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
 dependencies = [
  "find-msvc-tools",
  "jobserver",
@@ -867,14 +885,11 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
 
 [[package]]
 name = "chrono"
-version = "0.4.42"
+version = "0.4.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
+checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
 dependencies = [
- "iana-time-zone",
  "num-traits",
- "serde",
- "windows-link",
 ]
 
 [[package]]
@@ -906,9 +921,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.50"
+version = "4.5.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623"
+checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -916,9 +931,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.50"
+version = "4.5.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0"
+checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
 dependencies = [
  "anstream",
  "anstyle",
@@ -928,9 +943,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.49"
+version = "4.5.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -940,9 +955,18 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "0.7.6"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+
+[[package]]
+name = "cmake"
+version = "0.1.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
+dependencies = [
+ "cc",
+]
 
 [[package]]
 name = "colorchoice"
@@ -994,7 +1018,7 @@ version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
 dependencies = [
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
  "once_cell",
  "tiny-keccak",
 ]
@@ -1021,15 +1045,18 @@ dependencies = [
 
 [[package]]
 name = "constant_time_eq"
-version = "0.3.1"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
+checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
 
 [[package]]
 name = "convert_case"
-version = "0.4.0"
+version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
+checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9"
+dependencies = [
+ "unicode-segmentation",
+]
 
 [[package]]
 name = "cookie-factory"
@@ -1079,15 +1106,14 @@ checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
 
 [[package]]
 name = "crc-fast"
-version = "1.3.0"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6bf62af4cc77d8fe1c22dde4e721d87f2f54056139d8c412e1366b740305f56f"
+checksum = "2fd92aca2c6001b1bf5ba0ff84ee74ec8501b52bbef0cac80bf25a6c1d87a83d"
 dependencies = [
  "crc",
  "digest",
- "libc",
- "rand 0.9.2",
- "regex",
+ "rustversion",
+ "spin 0.10.0",
 ]
 
 [[package]]
@@ -1105,6 +1131,25 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.21"
@@ -1203,9 +1248,9 @@ dependencies = [
 
 [[package]]
 name = "data-encoding"
-version = "2.9.0"
+version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
+checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea"
 
 [[package]]
 name = "der"
@@ -1220,9 +1265,9 @@ dependencies = [
 
 [[package]]
 name = "deranged"
-version = "0.5.4"
+version = "0.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071"
+checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
 dependencies = [
  "powerfmt",
  "serde_core",
@@ -1252,32 +1297,20 @@ dependencies = [
 
 [[package]]
 name = "derive_more"
-version = "0.99.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
-dependencies = [
- "convert_case",
- "proc-macro2",
- "quote",
- "rustc_version",
- "syn",
-]
-
-[[package]]
-name = "derive_more"
-version = "2.1.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618"
+checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134"
 dependencies = [
  "derive_more-impl",
 ]
 
 [[package]]
 name = "derive_more-impl"
-version = "2.1.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b"
+checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb"
 dependencies = [
+ "convert_case",
  "proc-macro2",
  "quote",
  "rustc_version",
@@ -1315,10 +1348,10 @@ dependencies = [
 ]
 
 [[package]]
-name = "dyn-clone"
-version = "1.0.20"
+name = "dunce"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
+checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
 
 [[package]]
 name = "ecdsa"
@@ -1455,21 +1488,20 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d"
 
 [[package]]
 name = "filetime"
-version = "0.2.26"
+version = "0.2.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed"
+checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db"
 dependencies = [
  "cfg-if",
  "libc",
  "libredox",
- "windows-sys 0.60.2",
 ]
 
 [[package]]
 name = "find-msvc-tools"
-version = "0.1.4"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
 
 [[package]]
 name = "fixedbitset"
@@ -1479,9 +1511,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
 
 [[package]]
 name = "flate2"
-version = "1.1.4"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -1499,6 +1531,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.2"
@@ -1514,6 +1552,12 @@ version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d8866fac38f53fc87fa3ae1b09ddd723e0482f8fa74323518b4c59df2c55a00a"
 
+[[package]]
+name = "fs_extra"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
+
 [[package]]
 name = "funty"
 version = "2.0.0"
@@ -1522,9 +1566,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
 
 [[package]]
 name = "futures"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876"
+checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d"
 dependencies = [
  "futures-channel",
  "futures-core",
@@ -1537,9 +1581,9 @@ dependencies = [
 
 [[package]]
 name = "futures-channel"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
 dependencies = [
  "futures-core",
  "futures-sink",
@@ -1547,15 +1591,15 @@ dependencies = [
 
 [[package]]
 name = "futures-core"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
 
 [[package]]
 name = "futures-executor"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f"
+checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d"
 dependencies = [
  "futures-core",
  "futures-task",
@@ -1564,15 +1608,15 @@ dependencies = [
 
 [[package]]
 name = "futures-io"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
 
 [[package]]
 name = "futures-macro"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
+checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1581,21 +1625,21 @@ dependencies = [
 
 [[package]]
 name = "futures-sink"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
 
 [[package]]
 name = "futures-task"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
 
 [[package]]
 name = "futures-util"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
 dependencies = [
  "futures-channel",
  "futures-core",
@@ -1605,25 +1649,23 @@ dependencies = [
  "futures-task",
  "memchr",
  "pin-project-lite",
- "pin-utils",
  "slab",
 ]
 
 [[package]]
 name = "gcloud-auth"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5bdedbc36e6b9d8d79558fbf2ebc098745bc721e9d37d3e369558e420038e360"
+version = "1.3.0"
+source = "git+https://github.com/yoshidan/google-cloud-rust?rev=e0e790b9d4de1fbd7085dc98fde21eaf9573899a#e0e790b9d4de1fbd7085dc98fde21eaf9573899a"
 dependencies = [
  "async-trait",
- "base64 0.22.1",
+ "base64",
  "gcloud-metadata",
  "home",
  "jsonwebtoken",
  "reqwest",
  "serde",
  "serde_json",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "time",
  "token-source",
  "tokio",
@@ -1634,22 +1676,20 @@ dependencies = [
 [[package]]
 name = "gcloud-metadata"
 version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61f706788c1b58712c513e4d403234707fd255f49caa89d1c930197418b5fb2c"
+source = "git+https://github.com/yoshidan/google-cloud-rust?rev=e0e790b9d4de1fbd7085dc98fde21eaf9573899a#e0e790b9d4de1fbd7085dc98fde21eaf9573899a"
 dependencies = [
  "reqwest",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tokio",
 ]
 
 [[package]]
 name = "gcloud-storage"
-version = "1.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3515c85ca8d12aaf1104c9765f46d91a9ddd2a62b853fe12db109a40cde06e1"
+version = "1.3.0"
+source = "git+https://github.com/yoshidan/google-cloud-rust?rev=e0e790b9d4de1fbd7085dc98fde21eaf9573899a#e0e790b9d4de1fbd7085dc98fde21eaf9573899a"
 dependencies = [
  "anyhow",
- "base64 0.22.1",
+ "base64",
  "bytes",
  "futures-util",
  "gcloud-auth",
@@ -1665,7 +1705,7 @@ dependencies = [
  "serde",
  "serde_json",
  "sha2",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "time",
  "token-source",
  "tokio",
@@ -1686,9 +1726,9 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -1712,10 +1752,17 @@ dependencies = [
 ]
 
 [[package]]
-name = "glob"
-version = "0.3.3"
+name = "getrandom"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+ "wasip3",
+]
 
 [[package]]
 name = "group"
@@ -1740,7 +1787,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.12",
- "indexmap 2.12.0",
+ "indexmap",
  "slab",
  "tokio",
  "tokio-util",
@@ -1749,17 +1796,17 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.4.12"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
 dependencies = [
  "atomic-waker",
  "bytes",
  "fnv",
  "futures-core",
  "futures-sink",
- "http 1.3.1",
- "indexmap 2.12.0",
+ "http 1.4.0",
+ "indexmap",
  "slab",
  "tokio",
  "tokio-util",
@@ -1777,28 +1824,25 @@ dependencies = [
  "zerocopy",
 ]
 
-[[package]]
-name = "hashbrown"
-version = "0.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
-
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
- "allocator-api2",
- "equivalent",
- "foldhash",
+ "foldhash 0.1.5",
 ]
 
 [[package]]
 name = "hashbrown"
-version = "0.16.0"
+version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash 0.2.0",
+]
 
 [[package]]
 name = "heck"
@@ -1832,11 +1876,22 @@ dependencies = [
 
 [[package]]
 name = "home"
-version = "0.5.11"
+version = "0.5.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "hostname"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
+checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd"
 dependencies = [
- "windows-sys 0.59.0",
+ "cfg-if",
+ "libc",
+ "windows-link",
 ]
 
 [[package]]
@@ -1852,12 +1907,11 @@ dependencies = [
 
 [[package]]
 name = "http"
-version = "1.3.1"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
 dependencies = [
  "bytes",
- "fnv",
  "itoa",
 ]
 
@@ -1879,7 +1933,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
 dependencies = [
  "bytes",
- "http 1.3.1",
+ "http 1.4.0",
 ]
 
 [[package]]
@@ -1890,7 +1944,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
 dependencies = [
  "bytes",
  "futures-core",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 1.0.1",
  "pin-project-lite",
 ]
@@ -1939,16 +1993,16 @@ dependencies = [
 
 [[package]]
 name = "hyper"
-version = "1.7.0"
+version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e"
+checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
 dependencies = [
  "atomic-waker",
  "bytes",
  "futures-channel",
  "futures-core",
- "h2 0.4.12",
- "http 1.3.1",
+ "h2 0.4.13",
+ "http 1.4.0",
  "http-body 1.0.1",
  "httparse",
  "httpdate",
@@ -1966,8 +2020,8 @@ version = "0.27.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
 dependencies = [
- "http 1.3.1",
- "hyper 1.7.0",
+ "http 1.4.0",
+ "hyper 1.8.1",
  "hyper-util",
  "rustls",
  "rustls-native-certs",
@@ -1976,7 +2030,6 @@ dependencies = [
  "tokio",
  "tokio-rustls",
  "tower-service",
- "webpki-roots 1.0.3",
 ]
 
 [[package]]
@@ -1985,7 +2038,7 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0"
 dependencies = [
- "hyper 1.7.0",
+ "hyper 1.8.1",
  "hyper-util",
  "pin-project-lite",
  "tokio",
@@ -1994,57 +2047,32 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.17"
+version = "0.1.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
 dependencies = [
- "base64 0.22.1",
+ "base64",
  "bytes",
  "futures-channel",
- "futures-core",
  "futures-util",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 1.0.1",
- "hyper 1.7.0",
+ "hyper 1.8.1",
  "ipnet",
  "libc",
  "percent-encoding",
  "pin-project-lite",
- "socket2 0.6.1",
+ "socket2 0.6.2",
  "tokio",
  "tower-service",
  "tracing",
 ]
 
-[[package]]
-name = "iana-time-zone"
-version = "0.1.64"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
-dependencies = [
- "android_system_properties",
- "core-foundation-sys",
- "iana-time-zone-haiku",
- "js-sys",
- "log",
- "wasm-bindgen",
- "windows-core",
-]
-
-[[package]]
-name = "iana-time-zone-haiku"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
-dependencies = [
- "cc",
-]
-
 [[package]]
 name = "icu_collections"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
+checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
 dependencies = [
  "displaydoc",
  "potential_utf",
@@ -2055,9 +2083,9 @@ dependencies = [
 
 [[package]]
 name = "icu_locale_core"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
+checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
 dependencies = [
  "displaydoc",
  "litemap",
@@ -2068,11 +2096,10 @@ dependencies = [
 
 [[package]]
 name = "icu_normalizer"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
+checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
 dependencies = [
- "displaydoc",
  "icu_collections",
  "icu_normalizer_data",
  "icu_properties",
@@ -2083,42 +2110,38 @@ dependencies = [
 
 [[package]]
 name = "icu_normalizer_data"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
+checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
 
 [[package]]
 name = "icu_properties"
-version = "2.0.1"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
+checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
 dependencies = [
- "displaydoc",
  "icu_collections",
  "icu_locale_core",
  "icu_properties_data",
  "icu_provider",
- "potential_utf",
  "zerotrie",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_properties_data"
-version = "2.0.1"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
+checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
 
 [[package]]
 name = "icu_provider"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
+checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
 dependencies = [
  "displaydoc",
  "icu_locale_core",
- "stable_deref_trait",
- "tinystr",
  "writeable",
  "yoke",
  "zerofrom",
@@ -2126,6 +2149,12 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
 [[package]]
 name = "ident_case"
 version = "1.0.1"
@@ -2155,23 +2184,12 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "1.9.3"
+version = "2.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
-dependencies = [
- "autocfg",
- "hashbrown 0.12.3",
- "serde",
-]
-
-[[package]]
-name = "indexmap"
-version = "2.12.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
 dependencies = [
  "equivalent",
- "hashbrown 0.16.0",
+ "hashbrown 0.16.1",
  "serde",
  "serde_core",
 ]
@@ -2184,9 +2202,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
 
 [[package]]
 name = "iri-string"
-version = "0.7.8"
+version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2"
+checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a"
 dependencies = [
  "memchr",
  "serde",
@@ -2209,9 +2227,9 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.15"
+version = "1.0.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
 
 [[package]]
 name = "jni"
@@ -2247,9 +2265,9 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.81"
+version = "0.3.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305"
+checksum = "14dc6f6450b3f6d4ed5b16327f38fed626d375a886159ca555bd7822c0c3a5a6"
 dependencies = [
  "once_cell",
  "wasm-bindgen",
@@ -2261,9 +2279,9 @@ version = "10.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1"
 dependencies = [
- "base64 0.22.1",
+ "base64",
  "ed25519-dalek",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
  "hmac",
  "js-sys",
  "p256",
@@ -2284,20 +2302,26 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 dependencies = [
- "spin",
+ "spin 0.9.8",
 ]
 
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
 [[package]]
 name = "libc"
-version = "0.2.177"
+version = "0.2.182"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
+checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
 
 [[package]]
 name = "libm"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
+checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
 
 [[package]]
 name = "libmimalloc-sys"
@@ -2311,26 +2335,26 @@ dependencies = [
 
 [[package]]
 name = "libredox"
-version = "0.1.10"
+version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb"
+checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "libc",
- "redox_syscall",
+ "redox_syscall 0.7.2",
 ]
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.11.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
 [[package]]
 name = "litemap"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
+checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
 
 [[package]]
 name = "lock_api"
@@ -2343,24 +2367,18 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
-
-[[package]]
-name = "lru"
-version = "0.12.5"
+version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
-dependencies = [
- "hashbrown 0.15.5",
-]
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
 [[package]]
 name = "lru"
 version = "0.16.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a1dc47f592c06f33f8e3aea9591776ec7c9f9e4124778ff8a3c3b87159f7e593"
+dependencies = [
+ "hashbrown 0.16.1",
+]
 
 [[package]]
 name = "lru-slab"
@@ -2449,15 +2467,15 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.7.6"
+version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
 [[package]]
 name = "memmap2"
-version = "0.9.9"
+version = "0.9.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490"
+checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
 dependencies = [
  "libc",
 ]
@@ -2515,9 +2533,9 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.1.0"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
 dependencies = [
  "libc",
  "wasi",
@@ -2532,9 +2550,9 @@ checksum = "4e1d4c44418358edcac6e1d9ce59cea7fb38052429c7704033f1196f0c179e6a"
 
 [[package]]
 name = "mongocrypt"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22426d6318d19c5c0773f783f85375265d6a8f0fa76a733da8dc4355516ec63d"
+checksum = "8da0cd419a51a5fb44819e290fbdb0665a54f21dead8923446a799c7f4d26ad9"
 dependencies = [
  "bson",
  "mongocrypt-sys",
@@ -2544,25 +2562,22 @@ dependencies = [
 
 [[package]]
 name = "mongocrypt-sys"
-version = "0.1.4+1.12.0"
+version = "0.1.5+1.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dda42df21d035f88030aad8e877492fac814680e1d7336a57b2a091b989ae388"
+checksum = "224484c5d09285a7b8cb0a0c117e847ebd14cb6e4470ecf68cdb89c503b0edb9"
 
 [[package]]
 name = "mongodb"
-version = "3.3.0"
+version = "3.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "622f272c59e54a3c85f5902c6b8e7b1653a6b6681f45e4c42d6581301119a4b8"
+checksum = "803dd859e8afa084c255a8effd8000ff86f7c8076a50cd6d8c99e8f3496f75c2"
 dependencies = [
- "async-trait",
- "base64 0.13.1",
- "bitflags 1.3.2",
+ "base64",
+ "bitflags",
  "bson",
- "chrono",
  "derive-where",
- "derive_more 0.99.20",
+ "derive_more",
  "futures-core",
- "futures-executor",
  "futures-io",
  "futures-util",
  "hex",
@@ -2571,10 +2586,9 @@ dependencies = [
  "md-5",
  "mongocrypt",
  "mongodb-internal-macros",
- "once_cell",
  "pbkdf2",
  "percent-encoding",
- "rand 0.8.5",
+ "rand 0.9.2",
  "rustc_version_runtime",
  "rustls",
  "rustversion",
@@ -2583,24 +2597,24 @@ dependencies = [
  "serde_with",
  "sha1",
  "sha2",
- "socket2 0.5.10",
+ "socket2 0.6.2",
  "stringprep",
  "strsim",
  "take_mut",
- "thiserror 1.0.69",
+ "thiserror 2.0.18",
  "tokio",
  "tokio-rustls",
  "tokio-util",
  "typed-builder",
  "uuid",
- "webpki-roots 0.26.11",
+ "webpki-roots",
 ]
 
 [[package]]
 name = "mongodb-internal-macros"
-version = "3.3.0"
+version = "3.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "63981427a0f26b89632fd2574280e069d09fb2912a3138da15de0174d11dd077"
+checksum = "a973ef3dd3dbc6f6e65bbdecfd9ec5e781b9e7493b0f369a7c62e35d8e5ae2c8"
 dependencies = [
  "macro_magic",
  "proc-macro2",
@@ -2623,22 +2637,28 @@ dependencies = [
  "bytes",
  "clap",
  "futures",
- "hyper 1.7.0",
+ "hyper 1.8.1",
  "hyper-util",
  "mimalloc",
  "nativelink-config",
  "nativelink-error",
+ "nativelink-proto",
  "nativelink-scheduler",
  "nativelink-service",
  "nativelink-store",
  "nativelink-util",
  "nativelink-worker",
+ "prost",
+ "prost-types",
  "rand 0.9.2",
  "rustls-pki-types",
+ "sha2",
+ "socket2 0.5.10",
+ "tempfile",
  "tokio",
  "tokio-rustls",
- "tonic 0.13.1",
- "tower 0.5.2",
+ "tonic",
+ "tower",
  "tracing",
 ]
 
@@ -2672,7 +2692,7 @@ dependencies = [
  "serde",
  "serde_json5",
  "tokio",
- "tonic 0.13.1",
+ "tonic",
  "url",
  "uuid",
  "walkdir",
@@ -2711,12 +2731,14 @@ dependencies = [
 name = "nativelink-proto"
 version = "1.0.0-rc2"
 dependencies = [
- "derive_more 2.1.0",
+ "derive_more",
  "prost",
  "prost-build",
  "prost-types",
- "tonic 0.13.1",
+ "tonic",
  "tonic-build",
+ "tonic-prost",
+ "tonic-prost-build",
 ]
 
 [[package]]
@@ -2739,7 +2761,7 @@ dependencies = [
  "async-trait",
  "bytes",
  "futures",
- "lru 0.16.3",
+ "lru",
  "mock_instant",
  "nativelink-config",
  "nativelink-error",
@@ -2761,7 +2783,7 @@ dependencies = [
  "static_assertions",
  "tokio",
  "tokio-stream",
- "tonic 0.13.1",
+ "tonic",
  "tracing",
  "tracing-test",
  "uuid",
@@ -2778,7 +2800,7 @@ dependencies = [
  "futures",
  "hex",
  "http-body-util",
- "hyper 1.7.0",
+ "hyper 1.8.1",
  "hyper-util",
  "nativelink-config",
  "nativelink-error",
@@ -2800,8 +2822,9 @@ dependencies = [
  "sha2",
  "tokio",
  "tokio-stream",
- "tonic 0.13.1",
- "tower 0.5.2",
+ "tonic",
+ "tonic-prost",
+ "tower",
  "tracing",
  "tracing-test",
  "uuid",
@@ -2818,7 +2841,7 @@ dependencies = [
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
- "base64 0.22.1",
+ "base64",
  "bincode",
  "blake3",
  "byteorder",
@@ -2828,10 +2851,10 @@ dependencies = [
  "gcloud-auth",
  "gcloud-storage",
  "hex",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 1.0.1",
  "http-body-util",
- "hyper 1.7.0",
+ "hyper 1.8.1",
  "hyper-rustls",
  "hyper-util",
  "itertools",
@@ -2866,7 +2889,7 @@ dependencies = [
  "tokio",
  "tokio-stream",
  "tokio-util",
- "tonic 0.13.1",
+ "tonic",
  "tracing",
  "tracing-test",
  "url",
@@ -2878,18 +2901,18 @@ name = "nativelink-util"
 version = "1.0.0-rc2"
 dependencies = [
  "async-trait",
- "base64 0.22.1",
- "bitflags 2.10.0",
+ "base64",
+ "bitflags",
  "blake3",
  "bytes",
  "futures",
  "hex",
  "http-body-util",
  "humantime",
- "hyper 1.7.0",
+ "hyper 1.8.1",
  "hyper-util",
  "libc",
- "lru 0.16.3",
+ "lru",
  "mock_instant",
  "nativelink-config",
  "nativelink-error",
@@ -2909,6 +2932,7 @@ dependencies = [
  "prost",
  "prost-types",
  "rand 0.9.2",
+ "rayon",
  "rlimit",
  "serde",
  "serde_json",
@@ -2917,8 +2941,8 @@ dependencies = [
  "tokio",
  "tokio-stream",
  "tokio-util",
- "tonic 0.13.1",
- "tower 0.5.2",
+ "tonic",
+ "tower",
  "tracing",
  "tracing-opentelemetry",
  "tracing-subscriber",
@@ -2936,12 +2960,15 @@ dependencies = [
  "filetime",
  "formatx",
  "futures",
- "hyper 1.7.0",
+ "hostname",
+ "hyper 1.8.1",
+ "libc",
  "nativelink-config",
  "nativelink-error",
  "nativelink-macro",
  "nativelink-metric",
  "nativelink-proto",
+ "nativelink-service",
  "nativelink-store",
  "nativelink-util",
  "opentelemetry",
@@ -2959,7 +2986,8 @@ dependencies = [
  "tempfile",
  "tokio",
  "tokio-stream",
- "tonic 0.13.1",
+ "tonic",
+ "tonic-prost",
  "tracing",
  "tracing-test",
  "uuid",
@@ -3012,9 +3040,9 @@ dependencies = [
 
 [[package]]
 name = "num-conv"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
 
 [[package]]
 name = "num-integer"
@@ -3071,29 +3099,28 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
 [[package]]
 name = "openssl-probe"
-version = "0.1.6"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
 
 [[package]]
 name = "opentelemetry"
-version = "0.29.1"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c"
+checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0"
 dependencies = [
  "futures-core",
  "futures-sink",
  "js-sys",
  "pin-project-lite",
- "thiserror 2.0.17",
- "tracing",
+ "thiserror 2.0.18",
 ]
 
 [[package]]
 name = "opentelemetry-appender-tracing"
-version = "0.29.1"
+version = "0.31.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e716f864eb23007bdd9dc4aec381e188a1cee28eecf22066772b5fd822b9727d"
+checksum = "ef6a1ac5ca3accf562b8c306fa8483c85f4390f768185ab775f242f7fe8fdcc2"
 dependencies = [
  "opentelemetry",
  "tracing",
@@ -3103,66 +3130,64 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry-http"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed"
+checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d"
 dependencies = [
  "async-trait",
  "bytes",
- "http 1.3.1",
+ "http 1.4.0",
  "opentelemetry",
 ]
 
 [[package]]
 name = "opentelemetry-otlp"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656"
+checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf"
 dependencies = [
- "futures-core",
- "http 1.3.1",
+ "http 1.4.0",
  "opentelemetry",
  "opentelemetry-proto",
  "opentelemetry_sdk",
  "prost",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tokio",
- "tonic 0.12.3",
+ "tonic",
 ]
 
 [[package]]
 name = "opentelemetry-proto"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3"
+checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f"
 dependencies = [
  "opentelemetry",
  "opentelemetry_sdk",
  "prost",
- "tonic 0.12.3",
+ "tonic",
+ "tonic-prost",
 ]
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84b29a9f89f1a954936d5aa92f19b2feec3c8f3971d3e96206640db7f9706ae3"
+checksum = "e62e29dfe041afb8ed2a6c9737ab57db4907285d999ef8ad3a59092a36bdc846"
 
 [[package]]
 name = "opentelemetry_sdk"
-version = "0.29.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b"
+checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd"
 dependencies = [
  "futures-channel",
  "futures-executor",
  "futures-util",
- "glob",
  "opentelemetry",
  "percent-encoding",
  "rand 0.9.2",
- "serde_json",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
 ]
 
 [[package]]
@@ -3219,7 +3244,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
 dependencies = [
  "cfg-if",
  "libc",
- "redox_syscall",
+ "redox_syscall 0.5.18",
  "smallvec",
  "windows-link",
 ]
@@ -3230,14 +3255,14 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "edb45b6331bbdbb54c9a29413703e892ab94f83a31e4a546c778495a91e7fbca"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
 ]
 
 [[package]]
 name = "pbkdf2"
-version = "0.11.0"
+version = "0.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917"
+checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2"
 dependencies = [
  "digest",
 ]
@@ -3248,7 +3273,7 @@ version = "3.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be"
 dependencies = [
- "base64 0.22.1",
+ "base64",
  "serde_core",
 ]
 
@@ -3269,9 +3294,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
 
 [[package]]
 name = "pest"
-version = "2.8.3"
+version = "2.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "989e7521a040efde50c3ab6bbadafbe15ab6dc042686926be59ac35d74607df4"
+checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662"
 dependencies = [
  "memchr",
  "ucd-trie",
@@ -3279,9 +3304,9 @@ dependencies = [
 
 [[package]]
 name = "pest_derive"
-version = "2.8.3"
+version = "2.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "187da9a3030dbafabbbfb20cb323b976dc7b7ce91fcd84f2f74d6e31d378e2de"
+checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77"
 dependencies = [
  "pest",
  "pest_generator",
@@ -3289,9 +3314,9 @@ dependencies = [
 
 [[package]]
 name = "pest_generator"
-version = "2.8.3"
+version = "2.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49b401d98f5757ebe97a26085998d6c0eecec4995cad6ab7fc30ffdf4b052843"
+checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f"
 dependencies = [
  "pest",
  "pest_meta",
@@ -3302,9 +3327,9 @@ dependencies = [
 
 [[package]]
 name = "pest_meta"
-version = "2.8.3"
+version = "2.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72f27a2cfee9f9039c4d86faa5af122a0ac3851441a34865b8a043b46be0065a"
+checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220"
 dependencies = [
  "pest",
  "sha2",
@@ -3312,12 +3337,13 @@ dependencies = [
 
 [[package]]
 name = "petgraph"
-version = "0.7.1"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
+checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
 dependencies = [
  "fixedbitset",
- "indexmap 2.12.0",
+ "hashbrown 0.15.5",
+ "indexmap",
 ]
 
 [[package]]
@@ -3381,9 +3407,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
 [[package]]
 name = "potential_utf"
-version = "0.1.3"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a"
+checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
 dependencies = [
  "zerovec",
 ]
@@ -3434,18 +3460,18 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.101"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "prost"
-version = "0.13.5"
+version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
+checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568"
 dependencies = [
  "bytes",
  "prost-derive",
@@ -3453,15 +3479,14 @@ dependencies = [
 
 [[package]]
 name = "prost-build"
-version = "0.13.5"
+version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
+checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
 dependencies = [
  "heck",
  "itertools",
  "log",
  "multimap",
- "once_cell",
  "petgraph",
  "prettyplease",
  "prost",
@@ -3473,9 +3498,9 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.13.5"
+version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
+checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
 dependencies = [
  "anyhow",
  "itertools",
@@ -3486,9 +3511,9 @@ dependencies = [
 
 [[package]]
 name = "prost-types"
-version = "0.13.5"
+version = "0.14.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
+checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7"
 dependencies = [
  "prost",
 ]
@@ -3506,8 +3531,8 @@ dependencies = [
  "quinn-udp",
  "rustc-hash",
  "rustls",
- "socket2 0.6.1",
- "thiserror 2.0.17",
+ "socket2 0.6.2",
+ "thiserror 2.0.18",
  "tokio",
  "tracing",
  "web-time",
@@ -3519,6 +3544,7 @@ version = "0.11.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
 dependencies = [
+ "aws-lc-rs",
  "bytes",
  "getrandom 0.3.4",
  "lru-slab",
@@ -3528,7 +3554,7 @@ dependencies = [
  "rustls",
  "rustls-pki-types",
  "slab",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "tinyvec",
  "tracing",
  "web-time",
@@ -3543,16 +3569,16 @@ dependencies = [
  "cfg_aliases",
  "libc",
  "once_cell",
- "socket2 0.6.1",
+ "socket2 0.6.2",
  "tracing",
  "windows-sys 0.60.2",
 ]
 
 [[package]]
 name = "quote"
-version = "1.0.41"
+version = "1.0.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
 dependencies = [
  "proc-macro2",
 ]
@@ -3587,7 +3613,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 dependencies = [
  "rand_chacha 0.9.0",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
 ]
 
 [[package]]
@@ -3607,7 +3633,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
 dependencies = [
  "ppv-lite86",
- "rand_core 0.9.3",
+ "rand_core 0.9.5",
 ]
 
 [[package]]
@@ -3616,23 +3642,43 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
 ]
 
 [[package]]
 name = "rand_core"
-version = "0.9.3"
+version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
 dependencies = [
  "getrandom 0.3.4",
 ]
 
+[[package]]
+name = "rayon"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
 [[package]]
 name = "redis"
-version = "1.0.0"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47ba378d39b8053bffbfc2750220f5a24a06189b5129523d5db01618774e0239"
+checksum = "dbe7f6e08ce1c6a9b21684e643926f6fc3b683bc006cb89afd72a5e0eb16e3a2"
 dependencies = [
  "ahash",
  "arc-swap",
@@ -3651,7 +3697,7 @@ dependencies = [
  "rand 0.9.2",
  "ryu",
  "sha1_smol",
- "socket2 0.6.1",
+ "socket2 0.6.2",
  "tokio",
  "tokio-util",
  "url",
@@ -3674,14 +3720,14 @@ dependencies = [
 
 [[package]]
 name = "redis-test"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7a5cadf877f090eebfef0f4e8646c56531ab416b388410fe1c974f4e6e9cb20"
+checksum = "5143ae9e73f2ff0f3509af5e3a056b48bac2d1e1caa093257f20a9e68ef7534f"
 dependencies = [
  "futures",
  "rand 0.9.2",
  "redis",
- "socket2 0.6.1",
+ "socket2 0.6.2",
  "tempfile",
 ]
 
@@ -3691,34 +3737,23 @@ version = "0.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
 ]
 
 [[package]]
-name = "ref-cast"
-version = "1.0.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
-dependencies = [
- "ref-cast-impl",
-]
-
-[[package]]
-name = "ref-cast-impl"
-version = "1.0.25"
+name = "redox_syscall"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
+checksum = "6d94dd2f7cd932d4dc02cc8b2b50dfd38bd079a4e5d79198b99743d7fcf9a4b4"
 dependencies = [
- "proc-macro2",
- "quote",
- "syn",
+ "bitflags",
 ]
 
 [[package]]
 name = "regex"
-version = "1.12.2"
+version = "1.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3728,9 +3763,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.13"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3739,15 +3774,15 @@ dependencies = [
 
 [[package]]
 name = "regex-lite"
-version = "0.1.8"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da"
+checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.8"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 
 [[package]]
 name = "relative-path"
@@ -3760,19 +3795,19 @@ dependencies = [
 
 [[package]]
 name = "reqwest"
-version = "0.12.24"
+version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f"
+checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801"
 dependencies = [
- "base64 0.22.1",
+ "base64",
  "bytes",
  "encoding_rs",
  "futures-core",
  "futures-util",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 1.0.1",
  "http-body-util",
- "hyper 1.7.0",
+ "hyper 1.8.1",
  "hyper-rustls",
  "hyper-util",
  "js-sys",
@@ -3784,6 +3819,7 @@ dependencies = [
  "quinn",
  "rustls",
  "rustls-pki-types",
+ "rustls-platform-verifier",
  "serde",
  "serde_json",
  "serde_urlencoded",
@@ -3791,7 +3827,7 @@ dependencies = [
  "tokio",
  "tokio-rustls",
  "tokio-util",
- "tower 0.5.2",
+ "tower",
  "tower-http",
  "tower-service",
  "url",
@@ -3799,21 +3835,20 @@ dependencies = [
  "wasm-bindgen-futures",
  "wasm-streams",
  "web-sys",
- "webpki-roots 1.0.3",
 ]
 
 [[package]]
 name = "reqwest-middleware"
-version = "0.4.2"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57f17d28a6e6acfe1733fe24bcd30774d13bffa4b8a22535b4c8c98423088d4e"
+checksum = "199dda04a536b532d0cc04d7979e39b1c763ea749bf91507017069c00b96056f"
 dependencies = [
  "anyhow",
  "async-trait",
- "http 1.3.1",
+ "http 1.4.0",
  "reqwest",
  "serde",
- "thiserror 1.0.69",
+ "thiserror 2.0.18",
  "tower-service",
 ]
 
@@ -3835,7 +3870,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
 dependencies = [
  "cc",
  "cfg-if",
- "getrandom 0.2.16",
+ "getrandom 0.2.17",
  "libc",
  "untrusted",
  "windows-sys 0.52.0",
@@ -3881,9 +3916,9 @@ dependencies = [
 
 [[package]]
 name = "rust_decimal"
-version = "1.39.0"
+version = "1.40.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282"
+checksum = "61f703d19852dbf87cbc513643fa81428361eb6940f1ac14fd58155d295a3eb0"
 dependencies = [
  "arrayvec",
  "num-traits",
@@ -3916,11 +3951,11 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "1.1.2"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -3929,10 +3964,11 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.34"
+version = "0.23.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7"
+checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
 dependencies = [
+ "aws-lc-rs",
  "log",
  "once_cell",
  "ring",
@@ -3944,9 +3980,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-native-certs"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
 dependencies = [
  "openssl-probe",
  "rustls-pki-types",
@@ -3956,9 +3992,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.13.1"
+version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c"
+checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
 dependencies = [
  "web-time",
  "zeroize",
@@ -3993,10 +4029,11 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
 
 [[package]]
 name = "rustls-webpki"
-version = "0.103.7"
+version = "0.103.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf"
+checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
 dependencies = [
+ "aws-lc-rs",
  "ring",
  "rustls-pki-types",
  "untrusted",
@@ -4010,9 +4047,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
 
 [[package]]
 name = "ryu"
-version = "1.0.20"
+version = "1.0.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
 
 [[package]]
 name = "same-file"
@@ -4041,30 +4078,6 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "schemars"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
-dependencies = [
- "dyn-clone",
- "ref-cast",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "schemars"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
-dependencies = [
- "dyn-clone",
- "ref-cast",
- "serde",
- "serde_json",
-]
-
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -4093,11 +4106,11 @@ dependencies = [
 
 [[package]]
 name = "security-framework"
-version = "3.5.1"
+version = "3.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef"
+checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "core-foundation",
  "core-foundation-sys",
  "libc",
@@ -4106,9 +4119,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework-sys"
-version = "2.15.0"
+version = "2.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0"
+checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3"
 dependencies = [
  "core-foundation-sys",
  "libc",
@@ -4168,16 +4181,16 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.145"
+version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
- "indexmap 2.12.0",
+ "indexmap",
  "itoa",
  "memchr",
- "ryu",
  "serde",
  "serde_core",
+ "zmij",
 ]
 
 [[package]]
@@ -4205,28 +4218,19 @@ dependencies = [
 
 [[package]]
 name = "serde_with"
-version = "3.15.1"
+version = "3.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa66c845eee442168b2c8134fec70ac50dc20e760769c8ba0ad1319ca1959b04"
+checksum = "381b283ce7bc6b476d903296fb59d0d36633652b633b27f64db4fb46dcbfc3b9"
 dependencies = [
- "base64 0.22.1",
- "chrono",
- "hex",
- "indexmap 1.9.3",
- "indexmap 2.12.0",
- "schemars 0.9.0",
- "schemars 1.0.4",
  "serde_core",
- "serde_json",
  "serde_with_macros",
- "time",
 ]
 
 [[package]]
 name = "serde_with_macros"
-version = "3.15.1"
+version = "3.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b91a903660542fced4e99881aa481bdbaec1634568ee02e0b8bd57c64cb38955"
+checksum = "a6d4e30573c8cb306ed6ab1dca8423eec9a463ea0e155f45399455e0368b27e0"
 dependencies = [
  "darling",
  "proc-macro2",
@@ -4236,11 +4240,12 @@ dependencies = [
 
 [[package]]
 name = "serial_test"
-version = "3.2.0"
+version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b258109f244e1d6891bf1053a55d63a5cd4f8f4c30cf9a1280989f80e7a1fa9"
+checksum = "911bd979bf1070a3f3aa7b691a3b3e9968f339ceeec89e08c280a8a22207a32f"
 dependencies = [
- "futures",
+ "futures-executor",
+ "futures-util",
  "once_cell",
  "parking_lot",
  "scc",
@@ -4249,9 +4254,9 @@ dependencies = [
 
 [[package]]
 name = "serial_test_derive"
-version = "3.2.0"
+version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef"
+checksum = "0a7d91949b85b0d2fb687445e448b40d322b6b3e4af6b44a29b21d9a5f33e6d9"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4284,6 +4289,16 @@ dependencies = [
  "cfg-if",
  "cpufeatures",
  "digest",
+ "sha2-asm",
+]
+
+[[package]]
+name = "sha2-asm"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b845214d6175804686b2bd482bcffe96651bb2d1200742b712003504a2dac1ab"
+dependencies = [
+ "cc",
 ]
 
 [[package]]
@@ -4297,9 +4312,9 @@ dependencies = [
 
 [[package]]
 name = "shellexpand"
-version = "3.1.1"
+version = "3.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb"
+checksum = "32824fab5e16e6c4d86dc1ba84489390419a39f97699852b66480bb87d297ed8"
 
 [[package]]
 name = "shlex"
@@ -4309,10 +4324,11 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
 [[package]]
 name = "signal-hook-registry"
-version = "1.4.6"
+version = "1.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
 dependencies = [
+ "errno",
  "libc",
 ]
 
@@ -4328,27 +4344,27 @@ dependencies = [
 
 [[package]]
 name = "simd-adler32"
-version = "0.3.7"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
 
 [[package]]
 name = "simple_asn1"
-version = "0.6.3"
+version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb"
+checksum = "0d585997b0ac10be3c5ee635f1bab02d512760d14b7c468801ac8a01d9ae5f1d"
 dependencies = [
  "num-bigint",
  "num-traits",
- "thiserror 2.0.17",
+ "thiserror 2.0.18",
  "time",
 ]
 
 [[package]]
 name = "slab"
-version = "0.4.11"
+version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
 
 [[package]]
 name = "smallvec"
@@ -4368,9 +4384,9 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.6.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
 dependencies = [
  "libc",
  "windows-sys 0.60.2",
@@ -4382,6 +4398,12 @@ version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
+[[package]]
+name = "spin"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591"
+
 [[package]]
 name = "spki"
 version = "0.7.3"
@@ -4429,9 +4451,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
 
 [[package]]
 name = "syn"
-version = "2.0.107"
+version = "2.0.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4472,12 +4494,12 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
 
 [[package]]
 name = "tempfile"
-version = "3.23.0"
+version = "3.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
+checksum = "82a72c767771b47409d2345987fda8628641887d5466101319899796367354a0"
 dependencies = [
  "fastrand",
- "getrandom 0.3.4",
+ "getrandom 0.4.1",
  "once_cell",
  "rustix",
  "windows-sys 0.61.2",
@@ -4494,11 +4516,11 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "2.0.17"
+version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
 dependencies = [
- "thiserror-impl 2.0.17",
+ "thiserror-impl 2.0.18",
 ]
 
 [[package]]
@@ -4514,9 +4536,9 @@ dependencies = [
 
 [[package]]
 name = "thiserror-impl"
-version = "2.0.17"
+version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4534,30 +4556,30 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.44"
+version = "0.3.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
 dependencies = [
  "deranged",
  "itoa",
  "num-conv",
  "powerfmt",
- "serde",
+ "serde_core",
  "time-core",
  "time-macros",
 ]
 
 [[package]]
 name = "time-core"
-version = "0.1.6"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
 
 [[package]]
 name = "time-macros"
-version = "0.2.24"
+version = "0.2.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
 dependencies = [
  "num-conv",
  "time-core",
@@ -4574,9 +4596,9 @@ dependencies = [
 
 [[package]]
 name = "tinystr"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
+checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
 dependencies = [
  "displaydoc",
  "zerovec",
@@ -4608,9 +4630,9 @@ dependencies = [
 
 [[package]]
 name = "tokio"
-version = "1.48.0"
+version = "1.49.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
 dependencies = [
  "bytes",
  "libc",
@@ -4618,7 +4640,7 @@ dependencies = [
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
- "socket2 0.6.1",
+ "socket2 0.6.2",
  "tokio-macros",
  "windows-sys 0.61.2",
 ]
@@ -4646,9 +4668,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-stream"
-version = "0.1.17"
+version = "0.1.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
+checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
 dependencies = [
  "futures-core",
  "pin-project-lite",
@@ -4657,9 +4679,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.16"
+version = "0.7.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
 dependencies = [
  "bytes",
  "futures-core",
@@ -4671,106 +4693,85 @@ dependencies = [
 
 [[package]]
 name = "tonic"
-version = "0.12.3"
+version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
-dependencies = [
- "async-trait",
- "base64 0.22.1",
- "bytes",
- "http 1.3.1",
- "http-body 1.0.1",
- "http-body-util",
- "hyper 1.7.0",
- "hyper-timeout",
- "hyper-util",
- "percent-encoding",
- "pin-project",
- "prost",
- "tokio",
- "tokio-stream",
- "tower 0.4.13",
- "tower-layer",
- "tower-service",
- "tracing",
- "zstd",
-]
-
-[[package]]
-name = "tonic"
-version = "0.13.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
+checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec"
 dependencies = [
  "async-trait",
  "axum",
- "base64 0.22.1",
+ "base64",
  "bytes",
  "flate2",
- "h2 0.4.12",
- "http 1.3.1",
+ "h2 0.4.13",
+ "http 1.4.0",
  "http-body 1.0.1",
  "http-body-util",
- "hyper 1.7.0",
+ "hyper 1.8.1",
  "hyper-timeout",
  "hyper-util",
  "percent-encoding",
  "pin-project",
- "prost",
  "rustls-native-certs",
- "socket2 0.5.10",
+ "socket2 0.6.2",
+ "sync_wrapper",
  "tokio",
  "tokio-rustls",
  "tokio-stream",
- "tower 0.5.2",
+ "tower",
  "tower-layer",
  "tower-service",
  "tracing",
+ "zstd",
 ]
 
 [[package]]
 name = "tonic-build"
-version = "0.13.1"
+version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847"
+checksum = "1882ac3bf5ef12877d7ed57aad87e75154c11931c2ba7e6cde5e22d63522c734"
 dependencies = [
  "prettyplease",
  "proc-macro2",
- "prost-build",
- "prost-types",
  "quote",
  "syn",
 ]
 
 [[package]]
-name = "tower"
-version = "0.4.13"
+name = "tonic-prost"
+version = "0.14.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
+checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309"
 dependencies = [
- "futures-core",
- "futures-util",
- "indexmap 1.9.3",
- "pin-project",
- "pin-project-lite",
- "rand 0.8.5",
- "slab",
- "tokio",
- "tokio-util",
- "tower-layer",
- "tower-service",
- "tracing",
+ "bytes",
+ "prost",
+ "tonic",
+]
+
+[[package]]
+name = "tonic-prost-build"
+version = "0.14.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3144df636917574672e93d0f56d7edec49f90305749c668df5101751bb8f95a"
+dependencies = [
+ "prettyplease",
+ "proc-macro2",
+ "prost-build",
+ "prost-types",
+ "quote",
+ "syn",
+ "tempfile",
+ "tonic-build",
 ]
 
 [[package]]
 name = "tower"
-version = "0.5.2"
+version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
 dependencies = [
  "futures-core",
  "futures-util",
- "indexmap 2.12.0",
+ "indexmap",
  "pin-project-lite",
  "slab",
  "sync_wrapper",
@@ -4783,18 +4784,18 @@ dependencies = [
 
 [[package]]
 name = "tower-http"
-version = "0.6.6"
+version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
 dependencies = [
- "bitflags 2.10.0",
+ "bitflags",
  "bytes",
  "futures-util",
- "http 1.3.1",
+ "http 1.4.0",
  "http-body 1.0.1",
  "iri-string",
  "pin-project-lite",
- "tower 0.5.2",
+ "tower",
  "tower-layer",
  "tower-service",
 ]
@@ -4813,9 +4814,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
 
 [[package]]
 name = "tracing"
-version = "0.1.41"
+version = "0.1.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
 dependencies = [
  "pin-project-lite",
  "tracing-attributes",
@@ -4824,9 +4825,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-attributes"
-version = "0.1.30"
+version = "0.1.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4835,9 +4836,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-core"
-version = "0.1.34"
+version = "0.1.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
 dependencies = [
  "once_cell",
  "valuable",
@@ -4856,14 +4857,12 @@ dependencies = [
 
 [[package]]
 name = "tracing-opentelemetry"
-version = "0.30.0"
+version = "0.32.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444"
+checksum = "1ac28f2d093c6c477eaa76b23525478f38de514fa9aeb1285738d4b97a9552fc"
 dependencies = [
  "js-sys",
- "once_cell",
  "opentelemetry",
- "opentelemetry_sdk",
  "smallvec",
  "tracing",
  "tracing-core",
@@ -4883,9 +4882,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-subscriber"
-version = "0.3.20"
+version = "0.3.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
 dependencies = [
  "matchers",
  "nu-ansi-term",
@@ -4904,9 +4903,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-test"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68"
+checksum = "19a4c448db514d4f24c5ddb9f73f2ee71bfb24c526cf0c570ba142d1119e0051"
 dependencies = [
  "tracing-core",
  "tracing-subscriber",
@@ -4915,9 +4914,9 @@ dependencies = [
 
 [[package]]
 name = "tracing-test-macro"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568"
+checksum = "ad06847b7afb65c7866a36664b75c40b895e318cea4f71299f013fb22965329d"
 dependencies = [
  "quote",
  "syn",
@@ -4931,18 +4930,18 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
 
 [[package]]
 name = "typed-builder"
-version = "0.20.1"
+version = "0.22.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd9d30e3a08026c78f246b173243cf07b3696d274debd26680773b6773c2afc7"
+checksum = "398a3a3c918c96de527dc11e6e846cd549d4508030b8a33e1da12789c856b81a"
 dependencies = [
  "typed-builder-macro",
 ]
 
 [[package]]
 name = "typed-builder-macro"
-version = "0.20.1"
+version = "0.22.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28"
+checksum = "0e48cea23f68d1f78eb7bc092881b6bb88d3d6b5b7e6234f6f9c911da1ffb221"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4963,9 +4962,9 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
 
 [[package]]
 name = "unicase"
-version = "2.8.1"
+version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
+checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
 
 [[package]]
 name = "unicode-bidi"
@@ -4975,24 +4974,30 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.20"
+version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
 
 [[package]]
 name = "unicode-normalization"
-version = "0.1.24"
+version = "0.1.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
+checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
 dependencies = [
  "tinyvec",
 ]
 
 [[package]]
 name = "unicode-properties"
-version = "0.1.3"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
 
 [[package]]
 name = "unicode-xid"
@@ -5014,9 +5019,9 @@ checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
 
 [[package]]
 name = "url"
-version = "2.5.7"
+version = "2.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
 dependencies = [
  "form_urlencoded",
  "idna",
@@ -5032,9 +5037,9 @@ checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
 
 [[package]]
 name = "utf8-width"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
+checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091"
 
 [[package]]
 name = "utf8_iter"
@@ -5050,14 +5055,14 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.18.1"
+version = "1.21.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
+checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb"
 dependencies = [
  "atomic",
- "getrandom 0.3.4",
+ "getrandom 0.4.1",
  "js-sys",
- "serde",
+ "serde_core",
  "wasm-bindgen",
 ]
 
@@ -5106,47 +5111,43 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
 [[package]]
 name = "wasip2"
-version = "1.0.1+wasi-0.2.4"
+version = "1.0.2+wasi-0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
 dependencies = [
  "wit-bindgen",
 ]
 
 [[package]]
-name = "wasm-bindgen"
-version = "0.2.104"
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
 dependencies = [
- "cfg-if",
- "once_cell",
- "rustversion",
- "wasm-bindgen-macro",
- "wasm-bindgen-shared",
+ "wit-bindgen",
 ]
 
 [[package]]
-name = "wasm-bindgen-backend"
-version = "0.2.104"
+name = "wasm-bindgen"
+version = "0.2.113"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19"
+checksum = "60722a937f594b7fde9adb894d7c092fc1bb6612897c46368d18e7a20208eff2"
 dependencies = [
- "bumpalo",
- "log",
- "proc-macro2",
- "quote",
- "syn",
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.54"
+version = "0.4.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c"
+checksum = "8a89f4650b770e4521aa6573724e2aed4704372151bd0de9d16a3bbabb87441a"
 dependencies = [
  "cfg-if",
+ "futures-util",
  "js-sys",
  "once_cell",
  "wasm-bindgen",
@@ -5155,9 +5156,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.104"
+version = "0.2.113"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119"
+checksum = "0fac8c6395094b6b91c4af293f4c79371c163f9a6f56184d2c9a85f5a95f3950"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -5165,31 +5166,53 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.104"
+version = "0.2.113"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
+checksum = "ab3fabce6159dc20728033842636887e4877688ae94382766e00b180abac9d60"
 dependencies = [
+ "bumpalo",
  "proc-macro2",
  "quote",
  "syn",
- "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.104"
+version = "0.2.113"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1"
+checksum = "de0e091bdb824da87dc01d967388880d017a0a9bc4f3bdc0d86ee9f9336e3bb5"
 dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
 [[package]]
 name = "wasm-streams"
-version = "0.4.2"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
+checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb"
 dependencies = [
  "futures-util",
  "js-sys",
@@ -5198,11 +5221,23 @@ dependencies = [
  "web-sys",
 ]
 
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
 [[package]]
 name = "web-sys"
-version = "0.3.81"
+version = "0.3.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120"
+checksum = "705eceb4ce901230f8625bd1d665128056ccbe4b7408faa625eec1ba80f59a97"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -5220,27 +5255,18 @@ dependencies = [
 
 [[package]]
 name = "webpki-root-certs"
-version = "1.0.3"
+version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05d651ec480de84b762e7be71e6efa7461699c19d9e2c272c8d93455f567786e"
+checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca"
 dependencies = [
  "rustls-pki-types",
 ]
 
 [[package]]
 name = "webpki-roots"
-version = "0.26.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
-dependencies = [
- "webpki-roots 1.0.3",
-]
-
-[[package]]
-name = "webpki-roots"
-version = "1.0.3"
+version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8"
+checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed"
 dependencies = [
  "rustls-pki-types",
 ]
@@ -5254,65 +5280,12 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "windows-core"
-version = "0.62.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
-dependencies = [
- "windows-implement",
- "windows-interface",
- "windows-link",
- "windows-result",
- "windows-strings",
-]
-
-[[package]]
-name = "windows-implement"
-version = "0.60.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "windows-interface"
-version = "0.59.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "windows-link"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
-[[package]]
-name = "windows-result"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
-dependencies = [
- "windows-link",
-]
-
-[[package]]
-name = "windows-strings"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
-dependencies = [
- "windows-link",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.45.0"
@@ -5331,15 +5304,6 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
-[[package]]
-name = "windows-sys"
-version = "0.59.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
-dependencies = [
- "windows-targets 0.52.6",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.60.2"
@@ -5546,15 +5510,97 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
 
 [[package]]
 name = "wit-bindgen"
-version = "0.46.0"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
 
 [[package]]
 name = "writeable"
-version = "0.6.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
+checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
 
 [[package]]
 name = "wyz"
@@ -5585,11 +5631,10 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
 
 [[package]]
 name = "yoke"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
+checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
 dependencies = [
- "serde",
  "stable_deref_trait",
  "yoke-derive",
  "zerofrom",
@@ -5597,9 +5642,9 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
+checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -5609,18 +5654,18 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.8.27"
+version = "0.8.39"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
+checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.27"
+version = "0.8.39"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
+checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -5656,9 +5701,9 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
 
 [[package]]
 name = "zerotrie"
-version = "0.2.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
+checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
 dependencies = [
  "displaydoc",
  "yoke",
@@ -5667,9 +5712,9 @@ dependencies = [
 
 [[package]]
 name = "zerovec"
-version = "0.11.4"
+version = "0.11.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b"
+checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
 dependencies = [
  "yoke",
  "zerofrom",
@@ -5678,15 +5723,21 @@ dependencies = [
 
 [[package]]
 name = "zerovec-derive"
-version = "0.11.1"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
+checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
 dependencies = [
  "proc-macro2",
  "quote",
  "syn",
 ]
 
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+
 [[package]]
 name = "zstd"
 version = "0.13.3"
diff --git a/Cargo.toml b/Cargo.toml
index a94f54aee..e61093753 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,8 @@ rust-version = "1.87.0"
 version = "1.0.0-rc2"
 
 [profile.release]
-lto = true
+lto = "thin"
+codegen-units = 16
 
 # Prefer this profile in CI, for instance via `cargo test --all --profile=smol`.
 # It reduces the size of the `target` directory from ~12GB to ~1GB.
@@ -55,42 +56,61 @@ hyper = { version = "1.6.0", default-features = false }
 hyper-util = { version = "0.1.11", default-features = false, features = [
   "tracing",
 ] }
-mimalloc = { version = "0.1.44", default-features = false }
+mimalloc = { version = "0.1.44", default-features = false, features = ["override", "v3"] }
 rand = { version = "0.9.0", default-features = false, features = [
   "thread_rng",
 ] }
 rustls-pki-types = { version = "1.13.1", features = [
   "std",
 ], default-features = false }
+socket2 = { version = "0.5.10", default-features = false }
 tokio = { version = "1.44.1", features = [
   "fs",
   "io-util",
+  "parking_lot",
   "rt-multi-thread",
   "signal",
 ], default-features = false }
 tokio-rustls = { version = "0.26.2", default-features = false, features = [
-  "ring",
+  "aws_lc_rs",
 ] }
-tonic = { version = "0.13.0", features = [
-  "tls-ring",
+tonic = { version = "0.14.5", features = [
+  "gzip",
+  "tls-aws-lc",
   "transport",
+  "zstd",
 ], default-features = false }
 tower = { version = "0.5.2", default-features = false }
 tracing = { version = "0.1.41", default-features = false }
 
-[workspace.cargo-features-manager.keep]
+[dev-dependencies]
+nativelink-proto = { path = "nativelink-proto" }
+prost = { version = "0.14.3", default-features = false }
+prost-types = { version = "0.14.3", default-features = false }
+sha2 = { version = "0.10.8", default-features = false, features = ["asm"] }
+tempfile = { version = "3.15.0", default-features = false }
+tokio = { version = "1.44.1", features = [
+  "macros",
+  "rt-multi-thread",
+  "time",
+], default-features = false }
+tonic = { version = "0.14.5", features = [
+  "transport",
+], default-features = false }
+
+[workspace.metadata.cargo-features-manager.keep]
 async-lock = ["std"]
 aws-sdk-s3 = ["rt-tokio"]
 aws-smithy-runtime = ["test-util"]
 # This causes blake3 to detect SIMD capabilities at runtime.
-blake3 = ["std"]
+blake3 = ["std", "rayon"]
 pretty_assertions = ["std"]
 redis-test = ["aio"]
 serial_test = ["async"]
-tokio = ["fs", "io-util", "rt-multi-thread", "signal"]
+tokio = ["fs", "io-util", "parking_lot", "rt-multi-thread", "signal"]
 tokio-stream = ["fs"]
-tonic = ["tls", "transport"]
-tonic-build = ["prost"]
+tonic = ["gzip", "tls", "transport", "zstd"]
+tonic-build = []
 uuid = ["serde", "v4"]
 
 [workspace.lints.rust]
@@ -193,3 +213,10 @@ ref_option = { level = "allow", priority = 1 }
 too_many_lines = { level = "allow", priority = 1 }
 unused_async = { level = "allow", priority = 1 }
 unused_self = { level = "allow", priority = 1 }
+
+# Pin gcloud crates to unreleased main branch for reqwest 0.13 support.
+# Remove once gcloud-storage 1.3+ is published to crates.io.
+[patch.crates-io]
+gcloud-storage = { git = "https://github.com/yoshidan/google-cloud-rust", rev = "e0e790b9d4de1fbd7085dc98fde21eaf9573899a" }
+gcloud-auth = { git = "https://github.com/yoshidan/google-cloud-rust", rev = "e0e790b9d4de1fbd7085dc98fde21eaf9573899a" }
+gcloud-metadata = { git = "https://github.com/yoshidan/google-cloud-rust", rev = "e0e790b9d4de1fbd7085dc98fde21eaf9573899a" }
diff --git a/deployment-examples/docker-compose/docker-compose-multi-worker.yml b/deployment-examples/docker-compose/docker-compose-multi-worker.yml
index 80f13baa2..7ad1ed558 100644
--- a/deployment-examples/docker-compose/docker-compose-multi-worker.yml
+++ b/deployment-examples/docker-compose/docker-compose-multi-worker.yml
@@ -53,6 +53,8 @@ services:
       - cas-data:/data/cas  # Shared CAS volume
       - worker1-data:/data/worker1
       - ./worker-shared-cas.json5:/nativelink-config.json5
+    ports:
+      - "50181:50081"  # Peer CAS endpoint for blob sharing
     environment:
       - RUST_LOG=info
       - SCHEDULER_ENDPOINT=scheduler
@@ -78,6 +80,8 @@ services:
       - cas-data:/data/cas  # Shared CAS volume
       - worker2-data:/data/worker2
       - ./worker-shared-cas.json5:/nativelink-config.json5
+    ports:
+      - "50182:50081"  # Peer CAS endpoint for blob sharing
     environment:
       - RUST_LOG=info
       - SCHEDULER_ENDPOINT=scheduler
@@ -103,6 +107,8 @@ services:
       - cas-data:/data/cas  # Shared CAS volume
       - worker3-data:/data/worker3
       - ./worker-shared-cas.json5:/nativelink-config.json5
+    ports:
+      - "50183:50081"  # Peer CAS endpoint for blob sharing
     environment:
       - RUST_LOG=info
       - SCHEDULER_ENDPOINT=scheduler
diff --git a/deployment-examples/docker-compose/docker-compose.yml b/deployment-examples/docker-compose/docker-compose.yml
index f2cc124fb..b2b33da2f 100644
--- a/deployment-examples/docker-compose/docker-compose.yml
+++ b/deployment-examples/docker-compose/docker-compose.yml
@@ -70,6 +70,7 @@ services:
       RUST_LOG: ${RUST_LOG:-warn}
       CAS_ENDPOINT: nativelink_local_cas
       SCHEDULER_ENDPOINT: nativelink_scheduler
+    ports: [ "50081:50081/tcp" ]
     command: |
       nativelink /root/worker.json5
     depends_on:
diff --git a/deployment-examples/docker-compose/scheduler-multi-worker.json5 b/deployment-examples/docker-compose/scheduler-multi-worker.json5
index 18a28333f..a47deccc8 100644
--- a/deployment-examples/docker-compose/scheduler-multi-worker.json5
+++ b/deployment-examples/docker-compose/scheduler-multi-worker.json5
@@ -40,6 +40,8 @@
           "lre-rs": "priority",
           ISA: "exact",
         },
+        // Enable locality-aware scheduling by pointing at the CAS store.
+        cas_store: "GRPC_LOCAL_STORE",
       },
     },
   ],
diff --git a/deployment-examples/docker-compose/scheduler.json5 b/deployment-examples/docker-compose/scheduler.json5
index 18a28333f..11e1f2588 100644
--- a/deployment-examples/docker-compose/scheduler.json5
+++ b/deployment-examples/docker-compose/scheduler.json5
@@ -40,6 +40,10 @@
           "lre-rs": "priority",
           ISA: "exact",
         },
+        // Enable locality-aware scheduling by pointing at the CAS store.
+        // The scheduler will resolve input trees and score workers by
+        // how many input bytes they already have cached.
+        cas_store: "GRPC_LOCAL_STORE",
       },
     },
   ],
diff --git a/deployment-examples/docker-compose/test-multi-worker-simple.json5 b/deployment-examples/docker-compose/test-multi-worker-simple.json5
index 407a520eb..53e876209 100644
--- a/deployment-examples/docker-compose/test-multi-worker-simple.json5
+++ b/deployment-examples/docker-compose/test-multi-worker-simple.json5
@@ -52,6 +52,8 @@
         supported_platform_properties: {
           cpu_count: "minimum",
         },
+        // Enable locality-aware scheduling by pointing at the CAS store.
+        cas_store: "CAS",
       },
     },
   ],
@@ -63,6 +65,8 @@
           uri: "grpc://127.0.0.1:50061",
         },
         cas_fast_slow_store: "CAS",
+        // Expose a CAS server for peer-to-peer blob sharing.
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "AC",
         },
@@ -83,6 +87,7 @@
           uri: "grpc://127.0.0.1:50061",
         },
         cas_fast_slow_store: "CAS",
+        cas_server_port: 50082,
         upload_action_result: {
           ac_store: "AC",
         },
@@ -103,6 +108,7 @@
           uri: "grpc://127.0.0.1:50061",
         },
         cas_fast_slow_store: "CAS",
+        cas_server_port: 50083,
         upload_action_result: {
           ac_store: "AC",
         },
diff --git a/deployment-examples/docker-compose/worker-shared-cas.json5 b/deployment-examples/docker-compose/worker-shared-cas.json5
index 1198cde34..5c5a590b8 100644
--- a/deployment-examples/docker-compose/worker-shared-cas.json5
+++ b/deployment-examples/docker-compose/worker-shared-cas.json5
@@ -56,6 +56,9 @@
           uri: "grpc://${SCHEDULER_ENDPOINT:-127.0.0.1}:50061",
         },
         cas_fast_slow_store: "WORKER_FAST_SLOW_STORE",
+        // Expose a CAS server endpoint so other workers can fetch blobs
+        // directly from this worker (peer-to-peer blob sharing).
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "GRPC_LOCAL_AC_STORE",
         },
diff --git a/deployment-examples/docker-compose/worker.json5 b/deployment-examples/docker-compose/worker.json5
index fd2aac594..414bc75a8 100644
--- a/deployment-examples/docker-compose/worker.json5
+++ b/deployment-examples/docker-compose/worker.json5
@@ -57,6 +57,9 @@
           uri: "grpc://${SCHEDULER_ENDPOINT:-127.0.0.1}:50061",
         },
         cas_fast_slow_store: "WORKER_FAST_SLOW_STORE",
+        // Expose a CAS server endpoint so other workers can fetch blobs
+        // directly from this worker (peer-to-peer blob sharing).
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "GRPC_LOCAL_AC_STORE",
         },
diff --git a/integration_tests/buck2/buck2_cas.json5 b/integration_tests/buck2/buck2_cas.json5
index 963c6107e..5e27e510e 100644
--- a/integration_tests/buck2/buck2_cas.json5
+++ b/integration_tests/buck2/buck2_cas.json5
@@ -59,6 +59,8 @@
           "lre-rs": "priority",
           ISA: "exact",
         },
+        // Enable locality-aware scheduling.
+        cas_store: "WORKER_FAST_SLOW_STORE",
       },
     },
   ],
@@ -69,6 +71,8 @@
           uri: "grpc://127.0.0.1:50061",
         },
         cas_fast_slow_store: "WORKER_FAST_SLOW_STORE",
+        // Expose a CAS server for peer-to-peer blob sharing.
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "AC_MAIN_STORE",
         },
diff --git a/integration_tests/buildstream/buildstream_cas.json5 b/integration_tests/buildstream/buildstream_cas.json5
index 591d4df43..6c52482fc 100644
--- a/integration_tests/buildstream/buildstream_cas.json5
+++ b/integration_tests/buildstream/buildstream_cas.json5
@@ -61,6 +61,8 @@
           "lre-rs": "priority",
           ISA: "exact",
         },
+        // Enable locality-aware scheduling.
+        cas_store: "WORKER_FAST_SLOW_STORE",
       },
     },
   ],
@@ -71,6 +73,8 @@
           uri: "grpc://127.0.0.1:50061",
         },
         cas_fast_slow_store: "WORKER_FAST_SLOW_STORE",
+        // Expose a CAS server for peer-to-peer blob sharing.
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "AC_MAIN_STORE",
         },
diff --git a/integration_tests/mongo/mongo.json5 b/integration_tests/mongo/mongo.json5
index 80e11d494..13e96880a 100644
--- a/integration_tests/mongo/mongo.json5
+++ b/integration_tests/mongo/mongo.json5
@@ -74,6 +74,8 @@
         },
         max_job_retries: 3,
         worker_timeout_s: 300,
+        // Enable locality-aware scheduling.
+        cas_store: "PRODUCTION_CAS",
       },
     },
   ],
diff --git a/kubernetes/components/worker/worker.json5 b/kubernetes/components/worker/worker.json5
index d68c57d55..ca12bfefb 100644
--- a/kubernetes/components/worker/worker.json5
+++ b/kubernetes/components/worker/worker.json5
@@ -56,6 +56,8 @@
           uri: "grpc://${NATIVELINK_ENDPOINT:-127.0.0.1}:50061",
         },
         cas_fast_slow_store: "WORKER_FAST_SLOW_STORE",
+        // Expose a CAS server for peer-to-peer blob sharing.
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "GRPC_LOCAL_AC_STORE",
         },
diff --git a/kubernetes/nativelink/nativelink-config.json5 b/kubernetes/nativelink/nativelink-config.json5
index 630d1505f..d95892291 100644
--- a/kubernetes/nativelink/nativelink-config.json5
+++ b/kubernetes/nativelink/nativelink-config.json5
@@ -117,6 +117,8 @@
           "lre-rs": "priority",
           ISA: "exact",
         },
+        // Enable locality-aware scheduling.
+        cas_store: "CAS_MAIN_STORE",
       },
     },
   ],
diff --git a/local-remote-execution/rust/aarch64-darwin.BUILD.bazel b/local-remote-execution/rust/aarch64-darwin.BUILD.bazel
index ac97014eb..a4098069c 100644
--- a/local-remote-execution/rust/aarch64-darwin.BUILD.bazel
+++ b/local-remote-execution/rust/aarch64-darwin.BUILD.bazel
@@ -43,42 +43,42 @@ filegroup(
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-apple-darwin/codegen-backends/*.so",
-            "lib/rustlib/aarch64-apple-darwin/bin/rust-lld",
+            "lib/rustlib/aarch64-apple-darwin/bin/**",
             "lib/rustlib/aarch64-apple-darwin/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:aarch64-unknown-linux-gnu": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-unknown-linux-gnu/codegen-backends/*.so",
-            "lib/rustlib/aarch64-unknown-linux-gnu/bin/rust-lld",
+            "lib/rustlib/aarch64-unknown-linux-gnu/bin/**",
             "lib/rustlib/aarch64-unknown-linux-gnu/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:aarch64-unknown-linux-musl": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-unknown-linux-musl/codegen-backends/*.so",
-            "lib/rustlib/aarch64-unknown-linux-musl/bin/rust-lld",
+            "lib/rustlib/aarch64-unknown-linux-musl/bin/**",
             "lib/rustlib/aarch64-unknown-linux-musl/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-apple-darwin": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-apple-darwin/codegen-backends/*.so",
-            "lib/rustlib/x86_64-apple-darwin/bin/rust-lld",
+            "lib/rustlib/x86_64-apple-darwin/bin/**",
             "lib/rustlib/x86_64-apple-darwin/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-unknown-linux-gnu": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-unknown-linux-gnu/codegen-backends/*.so",
-            "lib/rustlib/x86_64-unknown-linux-gnu/bin/rust-lld",
+            "lib/rustlib/x86_64-unknown-linux-gnu/bin/**",
             "lib/rustlib/x86_64-unknown-linux-gnu/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-unknown-linux-musl": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-unknown-linux-musl/codegen-backends/*.so",
-            "lib/rustlib/x86_64-unknown-linux-musl/bin/rust-lld",
+            "lib/rustlib/x86_64-unknown-linux-musl/bin/**",
             "lib/rustlib/x86_64-unknown-linux-musl/lib/*.so",
         ], allow_empty = True),
     }),
diff --git a/local-remote-execution/rust/aarch64-linux.BUILD.bazel b/local-remote-execution/rust/aarch64-linux.BUILD.bazel
index 54f9171d7..a69b7264b 100644
--- a/local-remote-execution/rust/aarch64-linux.BUILD.bazel
+++ b/local-remote-execution/rust/aarch64-linux.BUILD.bazel
@@ -43,28 +43,28 @@ filegroup(
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-unknown-linux-gnu/codegen-backends/*.so",
-            "lib/rustlib/aarch64-unknown-linux-gnu/bin/rust-lld",
+            "lib/rustlib/aarch64-unknown-linux-gnu/bin/**",
             "lib/rustlib/aarch64-unknown-linux-gnu/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:aarch64-unknown-linux-musl": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-unknown-linux-musl/codegen-backends/*.so",
-            "lib/rustlib/aarch64-unknown-linux-musl/bin/rust-lld",
+            "lib/rustlib/aarch64-unknown-linux-musl/bin/**",
             "lib/rustlib/aarch64-unknown-linux-musl/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-unknown-linux-gnu": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-unknown-linux-gnu/codegen-backends/*.so",
-            "lib/rustlib/x86_64-unknown-linux-gnu/bin/rust-lld",
+            "lib/rustlib/x86_64-unknown-linux-gnu/bin/**",
             "lib/rustlib/x86_64-unknown-linux-gnu/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-unknown-linux-musl": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-unknown-linux-musl/codegen-backends/*.so",
-            "lib/rustlib/x86_64-unknown-linux-musl/bin/rust-lld",
+            "lib/rustlib/x86_64-unknown-linux-musl/bin/**",
             "lib/rustlib/x86_64-unknown-linux-musl/lib/*.so",
         ], allow_empty = True),
     }),
diff --git a/local-remote-execution/rust/x86_64-darwin.BUILD.bazel b/local-remote-execution/rust/x86_64-darwin.BUILD.bazel
index fcff515c0..27c2130b4 100644
--- a/local-remote-execution/rust/x86_64-darwin.BUILD.bazel
+++ b/local-remote-execution/rust/x86_64-darwin.BUILD.bazel
@@ -43,42 +43,42 @@ filegroup(
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-apple-darwin/codegen-backends/*.so",
-            "lib/rustlib/aarch64-apple-darwin/bin/rust-lld",
+            "lib/rustlib/aarch64-apple-darwin/bin/**",
             "lib/rustlib/aarch64-apple-darwin/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:aarch64-unknown-linux-gnu": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-unknown-linux-gnu/codegen-backends/*.so",
-            "lib/rustlib/aarch64-unknown-linux-gnu/bin/rust-lld",
+            "lib/rustlib/aarch64-unknown-linux-gnu/bin/**",
             "lib/rustlib/aarch64-unknown-linux-gnu/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:aarch64-unknown-linux-musl": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-unknown-linux-musl/codegen-backends/*.so",
-            "lib/rustlib/aarch64-unknown-linux-musl/bin/rust-lld",
+            "lib/rustlib/aarch64-unknown-linux-musl/bin/**",
             "lib/rustlib/aarch64-unknown-linux-musl/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-apple-darwin": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-apple-darwin/codegen-backends/*.so",
-            "lib/rustlib/x86_64-apple-darwin/bin/rust-lld",
+            "lib/rustlib/x86_64-apple-darwin/bin/**",
             "lib/rustlib/x86_64-apple-darwin/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-unknown-linux-gnu": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-unknown-linux-gnu/codegen-backends/*.so",
-            "lib/rustlib/x86_64-unknown-linux-gnu/bin/rust-lld",
+            "lib/rustlib/x86_64-unknown-linux-gnu/bin/**",
             "lib/rustlib/x86_64-unknown-linux-gnu/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-unknown-linux-musl": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-unknown-linux-musl/codegen-backends/*.so",
-            "lib/rustlib/x86_64-unknown-linux-musl/bin/rust-lld",
+            "lib/rustlib/x86_64-unknown-linux-musl/bin/**",
             "lib/rustlib/x86_64-unknown-linux-musl/lib/*.so",
         ], allow_empty = True),
     }),
diff --git a/local-remote-execution/rust/x86_64-linux.BUILD.bazel b/local-remote-execution/rust/x86_64-linux.BUILD.bazel
index 9fdc08f2f..32909a27a 100644
--- a/local-remote-execution/rust/x86_64-linux.BUILD.bazel
+++ b/local-remote-execution/rust/x86_64-linux.BUILD.bazel
@@ -43,28 +43,28 @@ filegroup(
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-unknown-linux-gnu/codegen-backends/*.so",
-            "lib/rustlib/aarch64-unknown-linux-gnu/bin/rust-lld",
+            "lib/rustlib/aarch64-unknown-linux-gnu/bin/**",
             "lib/rustlib/aarch64-unknown-linux-gnu/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:aarch64-unknown-linux-musl": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/aarch64-unknown-linux-musl/codegen-backends/*.so",
-            "lib/rustlib/aarch64-unknown-linux-musl/bin/rust-lld",
+            "lib/rustlib/aarch64-unknown-linux-musl/bin/**",
             "lib/rustlib/aarch64-unknown-linux-musl/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-unknown-linux-gnu": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-unknown-linux-gnu/codegen-backends/*.so",
-            "lib/rustlib/x86_64-unknown-linux-gnu/bin/rust-lld",
+            "lib/rustlib/x86_64-unknown-linux-gnu/bin/**",
             "lib/rustlib/x86_64-unknown-linux-gnu/lib/*.so",
         ], allow_empty = True),
         "@local-remote-execution//rust/triple:x86_64-unknown-linux-musl": glob([
             "bin/*.so",
             "lib/*.so",
             "lib/rustlib/x86_64-unknown-linux-musl/codegen-backends/*.so",
-            "lib/rustlib/x86_64-unknown-linux-musl/bin/rust-lld",
+            "lib/rustlib/x86_64-unknown-linux-musl/bin/**",
             "lib/rustlib/x86_64-unknown-linux-musl/lib/*.so",
         ], allow_empty = True),
     }),
diff --git a/nativelink-config/examples/basic_cas.json5 b/nativelink-config/examples/basic_cas.json5
index 4d7278204..c7d52d4ab 100644
--- a/nativelink-config/examples/basic_cas.json5
+++ b/nativelink-config/examples/basic_cas.json5
@@ -62,6 +62,10 @@
           ISA: "exact",
           InputRootAbsolutePath: "ignore", // used by chromium builds, but we can drop it
         },
+        // Enable locality-aware scheduling. The scheduler resolves input
+        // trees and scores workers by how many input bytes they already
+        // have cached.
+        cas_store: "WORKER_FAST_SLOW_STORE",
       },
     },
   ],
@@ -72,6 +76,8 @@
           uri: "grpc://127.0.0.1:50061",
         },
         cas_fast_slow_store: "WORKER_FAST_SLOW_STORE",
+        // Expose a CAS server for peer-to-peer blob sharing.
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "AC_MAIN_STORE",
         },
diff --git a/nativelink-config/examples/filesystem_cas.json5 b/nativelink-config/examples/filesystem_cas.json5
index 29e8f92e7..f4617c754 100644
--- a/nativelink-config/examples/filesystem_cas.json5
+++ b/nativelink-config/examples/filesystem_cas.json5
@@ -116,6 +116,8 @@
           "lre-rs": "priority",
           ISA: "exact",
         },
+        // Enable locality-aware scheduling.
+        cas_store: "CAS_MAIN_STORE",
       },
     },
   ],
diff --git a/nativelink-config/examples/gcs_backend.json5 b/nativelink-config/examples/gcs_backend.json5
index 2fcd8cc6f..1ec07cce0 100644
--- a/nativelink-config/examples/gcs_backend.json5
+++ b/nativelink-config/examples/gcs_backend.json5
@@ -119,6 +119,8 @@
           docker_image: "priority",
           "lre-rs": "priority",
         },
+        // Enable locality-aware scheduling.
+        cas_store: "CAS_MAIN_STORE",
       },
     },
   ],
diff --git a/nativelink-config/examples/mongo.json5 b/nativelink-config/examples/mongo.json5
index 74d2168f1..28ed275b9 100644
--- a/nativelink-config/examples/mongo.json5
+++ b/nativelink-config/examples/mongo.json5
@@ -91,6 +91,8 @@
         },
         max_job_retries: 3,
         worker_timeout_s: 300,
+        // Enable locality-aware scheduling.
+        cas_store: "PRODUCTION_CAS",
       },
     },
   ],
diff --git a/nativelink-config/examples/ontap_backend.json5 b/nativelink-config/examples/ontap_backend.json5
index d54bfc27b..40b4f8c49 100644
--- a/nativelink-config/examples/ontap_backend.json5
+++ b/nativelink-config/examples/ontap_backend.json5
@@ -138,6 +138,8 @@
           "lre-rs": "priority",
           ISA: "exact",
         },
+        // Enable locality-aware scheduling.
+        cas_store: "CAS_MAIN_STORE",
       },
     },
   ],
diff --git a/nativelink-config/examples/s3_backend_with_local_fast_cas.json5 b/nativelink-config/examples/s3_backend_with_local_fast_cas.json5
index 4d9abf276..2c6f6b26a 100644
--- a/nativelink-config/examples/s3_backend_with_local_fast_cas.json5
+++ b/nativelink-config/examples/s3_backend_with_local_fast_cas.json5
@@ -140,6 +140,8 @@
           "lre-rs": "priority",
           ISA: "exact",
         },
+        // Enable locality-aware scheduling.
+        cas_store: "CAS_MAIN_STORE",
       },
     },
   ],
diff --git a/nativelink-config/examples/worker_with_redis_scheduler.json5 b/nativelink-config/examples/worker_with_redis_scheduler.json5
index 85d845850..207fddc23 100644
--- a/nativelink-config/examples/worker_with_redis_scheduler.json5
+++ b/nativelink-config/examples/worker_with_redis_scheduler.json5
@@ -69,6 +69,8 @@
             redis_store: "SCHEDULER_REDIS_STORE",
           },
         },
+        // Enable locality-aware scheduling.
+        cas_store: "WORKER_FAST_SLOW_STORE",
       },
     },
   ],
@@ -80,6 +82,8 @@
         },
         max_inflight_tasks: 5,
         cas_fast_slow_store: "WORKER_FAST_SLOW_STORE",
+        // Expose a CAS server for peer-to-peer blob sharing.
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "AC_MAIN_STORE",
         },
diff --git a/nativelink-config/src/cas_server.rs b/nativelink-config/src/cas_server.rs
index 70616694d..0a5eb8edc 100644
--- a/nativelink-config/src/cas_server.rs
+++ b/nativelink-config/src/cas_server.rs
@@ -63,8 +63,11 @@ pub enum HttpCompressionAlgorithm {
     #[default]
     None,
 
-    /// Zlib compression.
+    /// Gzip compression.
     Gzip,
+
+    /// Zstandard compression.
+    Zstd,
 }
 
 /// Note: Compressing data in the cloud rarely has a benefit, since most
@@ -192,7 +195,7 @@ pub struct ByteStreamConfig {
     /// 16KiB - 64KiB is optimal.
     ///
     ///
-    /// Default: 64KiB
+    /// Default: 64MiB
     #[serde(
         default,
         deserialize_with = "convert_data_size_with_shellexpand",
@@ -518,11 +521,18 @@ pub struct HttpListener {
     #[serde(default)]
     pub advanced_http: HttpServerConfig,
 
-    /// Maximum number of bytes to decode on each grpc stream chunk.
+    /// Maximum number of bytes to decode on each inbound gRPC message.
     /// Default: 4 MiB
     #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
     pub max_decoding_message_size: usize,
 
+    /// Maximum number of bytes to encode on each outbound gRPC message.
+    /// Default: 4 MiB (matches Bazel's Java gRPC client inbound limit).
+    /// Workers with a higher `max_decoding_message_size` should use a
+    /// separate listener with this value raised accordingly.
+    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
+    pub max_encoding_message_size: usize,
+
     /// Tls Configuration for this server.
     /// If not set, the server will not use TLS.
     ///
@@ -820,6 +830,25 @@ pub struct LocalWorkerConfig {
     /// them from CAS for every action.
     /// Default: None (directory cache disabled)
     pub directory_cache: Option<DirectoryCacheConfig>,
+
+    /// If set, the worker will start a CAS + ByteStream gRPC server on
+    /// 0.0.0.0:<port> and advertise grpc://<hostname>:<port> to the
+    /// scheduler and other workers for peer-to-peer blob sharing.
+    /// The hostname is resolved at runtime via gethostname().
+    /// Example: 50081
+    /// Default: None (no peer CAS server)
+    #[serde(default)]
+    pub cas_server_port: Option<u16>,
+
+    /// How often (in milliseconds) the worker should send a periodic
+    /// BlobsAvailable snapshot to the scheduler, reporting which blobs
+    /// are in the local CAS cache and their LRU timestamps.
+    /// Interval in milliseconds. Default: 0 (uses built-in default of
+    /// 500ms).
+    ///
+    /// Default: 0
+    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
+    pub blobs_available_interval_ms: u64,
 }
 
 #[derive(Deserialize, Serialize, Debug, Clone)]
diff --git a/nativelink-config/src/schedulers.rs b/nativelink-config/src/schedulers.rs
index 36b267c47..8ce90fcdd 100644
--- a/nativelink-config/src/schedulers.rs
+++ b/nativelink-config/src/schedulers.rs
@@ -160,6 +160,28 @@ pub struct SimpleSpec {
         deserialize_with = "convert_duration_with_shellexpand_and_negative"
     )]
     pub worker_match_logging_interval_s: i64,
+
+    /// Maximum number of actions that can be matched to workers for a single
+    /// client (identified by `instance_name`) in one matching cycle. When
+    /// multiple clients are competing for workers, this prevents one client
+    /// from monopolizing all available workers by round-robin interleaving
+    /// actions from different clients.
+    ///
+    /// Set to 0 to disable fair scheduling (unlimited matches per client
+    /// per cycle). Default: 0 (disabled).
+    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
+    pub max_matches_per_client_per_cycle: usize,
+
+    /// Name of the CAS store used for resolving input trees during
+    /// locality-aware scheduling. When set, the scheduler resolves the
+    /// full input tree for each action and scores workers by how many
+    /// input bytes they already have cached.
+    ///
+    /// This should reference a CAS store in the `stores` section.
+    /// If not set, locality-aware tree scoring is disabled (only the
+    /// action affinity tier is used).
+    #[serde(default)]
+    pub cas_store: Option<StoreRefName>,
 }
 
 #[derive(Deserialize, Serialize, Debug)]
diff --git a/nativelink-config/src/stores.rs b/nativelink-config/src/stores.rs
index 59ecb7afa..8e38cfa9d 100644
--- a/nativelink-config/src/stores.rs
+++ b/nativelink-config/src/stores.rs
@@ -578,7 +578,7 @@ pub struct RefSpec {
     pub name: String,
 }
 
-#[derive(Serialize, Deserialize, Debug, Default, Clone)]
+#[derive(Serialize, Deserialize, Debug, Clone)]
 #[serde(deny_unknown_fields)]
 pub struct FilesystemSpec {
     /// Path on the system where to store the actual content. This is where
@@ -599,7 +599,7 @@ pub struct FilesystemSpec {
 
     /// Buffer size to use when reading files. Generally this should be left
     /// to the default value except for testing.
-    /// Default: 32k.
+    /// Default: 256k.
     #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
     pub read_buffer_size: u32,
 
@@ -624,6 +624,41 @@ pub struct FilesystemSpec {
     /// Default: 0
     #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
     pub max_concurrent_writes: usize,
+
+    /// If true, use sync_data() instead of sync_all() when flushing writes
+    /// to disk. sync_data() only syncs the file data without metadata
+    /// (timestamps, permissions), which is faster. For content-addressed
+    /// storage where the content is verified by hash, metadata sync is
+    /// unnecessary and this significantly reduces write latency.
+    /// Default: true
+    #[serde(default = "default_sync_data_only")]
+    pub sync_data_only: bool,
+
+    /// If true, skip writes when a blob with the same key already exists
+    /// in the store. This is safe for content-addressed storage (CAS) where
+    /// identical keys guarantee identical content. Do NOT enable this for
+    /// stores where the same key can hold different content (e.g. action
+    /// cache).
+    /// When a duplicate write is skipped, the existing entry's access time
+    /// is updated in the LRU to prevent premature eviction.
+    /// Default: false
+    #[serde(default)]
+    pub content_is_immutable: bool,
+}
+
+impl Default for FilesystemSpec {
+    fn default() -> Self {
+        Self {
+            content_path: String::new(),
+            temp_path: String::new(),
+            read_buffer_size: 0,
+            eviction_policy: None,
+            block_size: 0,
+            max_concurrent_writes: 0,
+            sync_data_only: true,
+            content_is_immutable: false,
+        }
+    }
 }
 
 // NetApp ONTAP S3 Spec
@@ -1095,6 +1130,32 @@ pub struct GrpcEndpoint {
     /// If not set or 0, defaults to 20 seconds.
     #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
     pub http2_keepalive_timeout_s: u64,
+
+    /// Whether to set TCP_NODELAY on the connection socket.
+    /// Disables Nagle's algorithm, reducing latency for small writes.
+    /// Default: true
+    #[serde(default = "default_tcp_nodelay")]
+    pub tcp_nodelay: bool,
+}
+
+fn default_sync_data_only() -> bool {
+    true
+}
+
+fn default_tcp_nodelay() -> bool {
+    true
+}
+
+fn default_batch_update_threshold_bytes() -> u64 {
+    1_048_576
+}
+
+fn default_batch_coalesce_delay_ms() -> u64 {
+    10
+}
+
+const fn default_connections_per_endpoint() -> usize {
+    32
 }
 
 #[derive(Serialize, Deserialize, Debug, Clone)]
@@ -1121,8 +1182,8 @@ pub struct GrpcSpec {
     pub max_concurrent_requests: usize,
 
     /// The number of connections to make to each specified endpoint to balance
-    /// the load over multiple TCP connections.  Default 1.
-    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
+    /// the load over multiple TCP connections.  Default 16.
+    #[serde(default = "default_connections_per_endpoint", deserialize_with = "convert_numeric_with_shellexpand")]
     pub connections_per_endpoint: usize,
 
     /// Maximum time (seconds) allowed for a single RPC request (e.g. a
@@ -1132,6 +1193,35 @@ pub struct GrpcSpec {
     /// Default: 120 (seconds)
     #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
     pub rpc_timeout_s: u64,
+
+    /// Maximum blob size (in bytes) for using BatchUpdateBlobs instead of
+    /// ByteStream.Write. Blobs at or below this size skip per-blob streaming
+    /// overhead (UUID generation, resource_name, streaming setup). Only
+    /// applies to CAS stores, not AC.
+    ///
+    /// Set to 0 to disable (all uploads use ByteStream.Write).
+    ///
+    /// Default: 1048576 (1 MiB)
+    #[serde(
+        default = "default_batch_update_threshold_bytes",
+        deserialize_with = "convert_numeric_with_shellexpand"
+    )]
+    pub batch_update_threshold_bytes: u64,
+
+    /// Time window (in milliseconds) to coalesce multiple small blob uploads
+    /// into a single BatchUpdateBlobs RPC. Requires
+    /// `batch_update_threshold_bytes > 0`.
+    ///
+    /// When > 0, incoming small uploads are buffered for up to this duration
+    /// before being sent as one batch. When 0, each small upload is sent
+    /// immediately as a single-element BatchUpdateBlobs RPC.
+    ///
+    /// Default: 10 (milliseconds)
+    #[serde(
+        default = "default_batch_coalesce_delay_ms",
+        deserialize_with = "convert_numeric_with_shellexpand"
+    )]
+    pub batch_coalesce_delay_ms: u64,
 }
 
 /// The possible error codes that might occur on an upstream request.
diff --git a/nativelink-error/Cargo.toml b/nativelink-error/Cargo.toml
index 13581368b..3b8b2a976 100644
--- a/nativelink-error/Cargo.toml
+++ b/nativelink-error/Cargo.toml
@@ -14,8 +14,8 @@ version = "1.0.0-rc2"
 nativelink-metric = { path = "../nativelink-metric" }
 nativelink-proto = { path = "../nativelink-proto" }
 
-prost = { version = "0.13.5", default-features = false }
-prost-types = { version = "0.13.5", default-features = false }
+prost = { version = "0.14.3", default-features = false }
+prost-types = { version = "0.14.3", default-features = false }
 redis = { version = "1.0.0", default-features = false }
 rustls-pki-types = { version = "1.13.1", default-features = false }
 serde = { version = "1.0.219", default-features = false }
@@ -26,8 +26,8 @@ tokio = { version = "1.44.1", features = [
   "rt-multi-thread",
   "signal",
 ], default-features = false }
-tonic = { version = "0.13.0", features = [
-  "tls-ring",
+tonic = { version = "0.14.5", features = [
+  "tls-aws-lc",
   "transport",
 ], default-features = false }
 url = { version = "2.5.7", default-features = false }
diff --git a/nativelink-error/src/lib.rs b/nativelink-error/src/lib.rs
index 04df9e64a..a6bcddbbd 100644
--- a/nativelink-error/src/lib.rs
+++ b/nativelink-error/src/lib.rs
@@ -56,6 +56,8 @@ pub struct Error {
     #[serde(with = "CodeDef")]
     pub code: Code,
     pub messages: Vec<String>,
+    #[serde(skip)]
+    pub details: Vec<prost_types::Any>,
 }
 
 impl MetricsComponent for Error {
@@ -71,7 +73,11 @@ impl MetricsComponent for Error {
 impl Error {
     #[must_use]
     pub const fn new_with_messages(code: Code, messages: Vec<String>) -> Self {
-        Self { code, messages }
+        Self {
+            code,
+            messages,
+            details: Vec::new(),
+        }
     }
 
     #[must_use]
@@ -131,7 +137,7 @@ impl From<Error> for nativelink_proto::google::rpc::Status {
         Self {
             code: val.code as i32,
             message: val.message_string(),
-            details: vec![],
+            details: val.details,
         }
     }
 }
@@ -141,6 +147,7 @@ impl From<nativelink_proto::google::rpc::Status> for Error {
         Self {
             code: val.code.into(),
             messages: vec![val.message],
+            details: val.details,
         }
     }
 }
@@ -156,6 +163,10 @@ impl core::fmt::Display for Error {
             builder.field("messages", &self.messages);
         }
 
+        if !self.details.is_empty() {
+            builder.field("details", &self.details);
+        }
+
         builder.finish()
     }
 }
@@ -252,6 +263,7 @@ impl From<std::io::Error> for Error {
         Self {
             code: err.kind().into_code(),
             messages: vec![err.to_string()],
+            details: Vec::new(),
         }
     }
 }
@@ -405,6 +417,7 @@ impl<T> ResultExt<T> for Option<T> {
             let mut error = Error {
                 code: Code::Internal,
                 messages: vec![],
+                details: Vec::new(),
             };
             let (code, message) = tip_fn(&error);
             error.code = code;
@@ -486,3 +499,69 @@ pub enum CodeDef {
     // NOTE: Additional codes must be added to stores.rs in ErrorCodes and also
     // in both match statements in retry.rs.
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn error_to_rpc_status_preserves_details() {
+        let detail = prost_types::Any {
+            type_url: "type.googleapis.com/google.rpc.PreconditionFailure".into(),
+            value: vec![1, 2, 3], // Dummy bytes
+        };
+        let err = Error {
+            code: Code::FailedPrecondition,
+            messages: vec!["missing blob".into()],
+            details: vec![detail.clone()],
+        };
+        let status: nativelink_proto::google::rpc::Status = err.into();
+        assert_eq!(status.code, Code::FailedPrecondition as i32);
+        assert_eq!(status.details.len(), 1);
+        assert_eq!(status.details[0].type_url, detail.type_url);
+        assert_eq!(status.details[0].value, detail.value);
+    }
+
+    #[test]
+    fn rpc_status_to_error_preserves_details() {
+        let detail = prost_types::Any {
+            type_url: "type.googleapis.com/google.rpc.PreconditionFailure".into(),
+            value: vec![4, 5, 6],
+        };
+        let status = nativelink_proto::google::rpc::Status {
+            code: Code::FailedPrecondition as i32,
+            message: "test".into(),
+            details: vec![detail.clone()],
+        };
+        let err: Error = status.into();
+        assert_eq!(err.code, Code::FailedPrecondition);
+        assert_eq!(err.details.len(), 1);
+        assert_eq!(err.details[0].type_url, detail.type_url);
+        assert_eq!(err.details[0].value, detail.value);
+    }
+
+    #[test]
+    fn error_details_roundtrip_through_rpc_status() {
+        let detail = prost_types::Any {
+            type_url: "type.googleapis.com/google.rpc.PreconditionFailure".into(),
+            value: vec![10, 20, 30],
+        };
+        let original = Error {
+            code: Code::FailedPrecondition,
+            messages: vec!["missing".into()],
+            details: vec![detail],
+        };
+        let status: nativelink_proto::google::rpc::Status = original.clone().into();
+        let roundtripped: Error = status.into();
+        assert_eq!(roundtripped.code, original.code);
+        assert_eq!(roundtripped.details.len(), original.details.len());
+        assert_eq!(roundtripped.details[0].type_url, original.details[0].type_url);
+        assert_eq!(roundtripped.details[0].value, original.details[0].value);
+    }
+
+    #[test]
+    fn make_err_macro_has_empty_details() {
+        let err = make_err!(Code::Internal, "something failed");
+        assert!(err.details.is_empty());
+    }
+}
diff --git a/nativelink-metric/src/lib.rs b/nativelink-metric/src/lib.rs
index 5661f14b0..b885262dd 100644
--- a/nativelink-metric/src/lib.rs
+++ b/nativelink-metric/src/lib.rs
@@ -458,6 +458,18 @@ impl<T: MetricsComponent> MetricsComponent for async_lock::Mutex<T> {
     }
 }
 
+impl<T: MetricsComponent> MetricsComponent for async_lock::RwLock<T> {
+    fn publish(
+        &self,
+        kind: MetricKind,
+        field_metadata: MetricFieldData,
+    ) -> Result<MetricPublishKnownKindData, Error> {
+        // It is safe to block in the publishing thread.
+        let lock = self.read_blocking();
+        lock.publish(kind, field_metadata)
+    }
+}
+
 impl<T: MetricsComponent> MetricsComponent for parking_lot::Mutex<T> {
     fn publish(
         &self,
diff --git a/nativelink-proto/Cargo.toml b/nativelink-proto/Cargo.toml
index fb9a08ad3..4174b44f3 100644
--- a/nativelink-proto/Cargo.toml
+++ b/nativelink-proto/Cargo.toml
@@ -12,20 +12,19 @@ path = "genproto/lib.rs"
 derive_more = { version = "2.0.1", default-features = false, features = [
   "debug",
 ] }
-prost = { version = "0.13.5", default-features = false }
-prost-types = { version = "0.13.5", default-features = false }
-tonic = { version = "0.13.0", features = [
+prost = { version = "0.14.3", default-features = false }
+prost-types = { version = "0.14.3", default-features = false }
+tonic = { version = "0.14.5", features = [
   "codegen",
-  "prost",
-  "tls-ring",
+  "tls-aws-lc",
   "transport",
 ], default-features = false }
+tonic-prost = { version = "0.14.5", default-features = false }
 
 [dev-dependencies]
-prost-build = { version = "0.13.5", default-features = false }
-tonic-build = { version = "0.13.0", features = [
-  "prost",
-], default-features = false }
+prost-build = { version = "0.14.3", default-features = false }
+tonic-build = { version = "0.14.5", default-features = false }
+tonic-prost-build = { version = "0.14.5", default-features = false }
 
 [package.metadata.cargo-machete]
 # Used by gen_protos_tool.rs
diff --git a/nativelink-proto/com/github/trace_machina/nativelink/remote_execution/worker_api.proto b/nativelink-proto/com/github/trace_machina/nativelink/remote_execution/worker_api.proto
index d736d1624..d472505b2 100644
--- a/nativelink-proto/com/github/trace_machina/nativelink/remote_execution/worker_api.proto
+++ b/nativelink-proto/com/github/trace_machina/nativelink/remote_execution/worker_api.proto
@@ -44,6 +44,9 @@ service WorkerApi {
 /// Request object for keep alive requests.
 message KeepAliveRequest {
     reserved 1; // NextId.
+    /// CPU load percentage: load_avg_1m / num_cpus * 100.
+    /// 0 means unknown (old workers that don't report load).
+    uint32 cpu_load_pct = 2;
 }
 
 /// Request object for going away requests.
@@ -75,7 +78,86 @@ message ConnectWorkerRequest {
     /// The default (0) means unlimited.
     uint64 max_inflight_tasks = 3;
 
-    reserved 4; // NextId.
+    /// This worker's CAS gRPC endpoint for peer blob serving.
+    /// If set, other workers can fetch blobs directly from this worker.
+    /// Example: "grpc://192.168.191.5:50081"
+    string cas_endpoint = 5;
+
+    reserved 4;
+    reserved 6;
+}
+
+/// Per-digest info including LRU access time for cache eviction heuristics.
+message BlobDigestInfo {
+    /// The digest of the blob.
+    build.bazel.remote.execution.v2.Digest digest = 1;
+    /// The last time this blob was accessed in the worker's local cache.
+    /// Seconds since UNIX epoch. The scheduler can use this to estimate
+    /// how close a blob is to eviction (lower = more likely to be evicted).
+    int64 last_access_timestamp = 2;
+}
+
+/// Notification that blobs are available on a worker for peer serving.
+message BlobsAvailableNotification {
+    /// The worker's CAS endpoint where these blobs can be fetched.
+    string worker_cas_endpoint = 1;
+    /// The digests of newly available blobs (kept for backward compat / simple notifications).
+    repeated build.bazel.remote.execution.v2.Digest digests = 2;
+    /// If true, this is a full snapshot of all blobs in the worker's cache.
+    /// The server should replace its entire view for this endpoint with the
+    /// contents of this message (digest_infos + digests). If false, this is
+    /// an incremental update (new blobs only).
+    bool is_full_snapshot = 3;
+    /// Digests that have been evicted from the worker since the last update.
+    /// Only meaningful when is_full_snapshot == false.
+    repeated build.bazel.remote.execution.v2.Digest evicted_digests = 4;
+    /// Per-digest info with LRU timestamps. When present, the server should
+    /// prefer this over the plain `digests` field.
+    repeated BlobDigestInfo digest_infos = 5;
+    /// CPU load percentage: load_avg_1m / num_cpus * 100.
+    /// 0 means unknown (old workers that don't report load).
+    uint32 cpu_load_pct = 6;
+    /// Digests of input root directories that are cached in this worker's
+    /// directory cache. The scheduler can give routing preference to workers
+    /// that already have the action's input_root_digest cached.
+    /// Also used for the full subtree snapshot (when is_full_subtree_snapshot=true,
+    /// this contains ALL directory digests including subtrees).
+    repeated build.bazel.remote.execution.v2.Digest cached_directory_digests = 7;
+
+    /// Delta-encoded subtree updates since last notification.
+    /// When a cache entry is added, send ALL directory digests in its merkle tree.
+    /// When a cache entry is evicted, send ALL directory digests that were removed
+    /// (only those no longer present in ANY cached entry's merkle tree).
+    repeated build.bazel.remote.execution.v2.Digest added_subtree_digests = 8;
+    repeated build.bazel.remote.execution.v2.Digest removed_subtree_digests = 9;
+
+    /// True on the first notification after (re)connect — scheduler should
+    /// replace its cached_subtree_digests state rather than applying a delta.
+    /// In this case, cached_directory_digests (field 7) contains the full set
+    /// of all subtree digests.
+    bool is_full_subtree_snapshot = 10;
+}
+
+/// Notification that blobs have been evicted from a worker.
+message BlobsEvictedNotification {
+    /// The worker's CAS endpoint from which these blobs were evicted.
+    string worker_cas_endpoint = 1;
+    /// The digests of evicted blobs.
+    repeated build.bazel.remote.execution.v2.Digest digests = 2;
+}
+
+/// Request to touch (update access time) blobs on a worker to prevent eviction.
+message TouchBlobsRequest {
+    /// The digests of blobs to touch.
+    repeated build.bazel.remote.execution.v2.Digest digests = 1;
+}
+
+/// A hint that a specific digest is available on one or more peer workers.
+message PeerHint {
+    /// The digest available on peers.
+    build.bazel.remote.execution.v2.Digest digest = 1;
+    /// gRPC endpoints of workers that have this blob.
+    repeated string peer_endpoints = 2;
 }
 
 /// The result of an ExecutionRequest.
@@ -106,6 +188,9 @@ message ExecuteResult {
 message ExecuteComplete {
     /// The operation ID that was executed.
     string operation_id = 1;
+    /// CPU load percentage: load_avg_1m / num_cpus * 100.
+    /// 0 means unknown (old workers that don't report load).
+    uint32 cpu_load_pct = 2;
 }
 
 /// Result sent back from the server when a node connects.
@@ -146,8 +231,12 @@ message UpdateForWorker {
 
         /// Instructs the worker to kill a specific running operation.
         KillOperationRequest kill_operation_request = 5;
+
+        /// Instructs the worker to touch (update access time) on blobs
+        /// to prevent premature eviction.
+        TouchBlobsRequest touch_blobs = 7;
     }
-    reserved 6; // NextId.
+    reserved 6; // Previously NextId, now reserved.
 }
 
 /// Communication from the worker to the scheduler.
@@ -182,8 +271,14 @@ message UpdateForScheduler {
 
         /// Notify that the execution has completed, but result is uploading.
         ExecuteComplete execute_complete = 5;
+
+        /// Notifies the scheduler that new blobs are available on this worker.
+        BlobsAvailableNotification blobs_available = 7;
+
+        /// Notifies the scheduler that blobs have been evicted from this worker.
+        BlobsEvictedNotification blobs_evicted = 8;
     }
-    reserved 6; // NextId.
+    reserved 6; // Previously NextId, now reserved.
 }
 
 message StartExecute {
@@ -204,7 +299,11 @@ message StartExecute {
     /// The ID of the worker that is executing the action.
     string worker_id = 6;
 
-    reserved 7; // NextId.
+    /// Hints about input blobs available on peer workers.
+    /// Workers should try these peers first before falling back to server CAS.
+    repeated PeerHint peer_hints = 8;
+
+    reserved 9; // NextId.
 }
 
 /// This is a special message used to save actions into the CAS that can be used
diff --git a/nativelink-proto/genproto/build.bazel.remote.asset.v1.pb.rs b/nativelink-proto/genproto/build.bazel.remote.asset.v1.pb.rs
index c2a863a12..b88f92115 100644
--- a/nativelink-proto/genproto/build.bazel.remote.asset.v1.pb.rs
+++ b/nativelink-proto/genproto/build.bazel.remote.asset.v1.pb.rs
@@ -531,7 +531,7 @@ pub mod fetch_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.asset.v1.Fetch/FetchBlob",
             );
@@ -557,7 +557,7 @@ pub mod fetch_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.asset.v1.Fetch/FetchDirectory",
             );
@@ -709,7 +709,7 @@ pub mod push_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.asset.v1.Push/PushBlob",
             );
@@ -733,7 +733,7 @@ pub mod push_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.asset.v1.Push/PushDirectory",
             );
@@ -943,7 +943,7 @@ pub mod fetch_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = FetchBlobSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -988,7 +988,7 @@ pub mod fetch_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = FetchDirectorySvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1216,7 +1216,7 @@ pub mod push_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = PushBlobSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -1261,7 +1261,7 @@ pub mod push_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = PushDirectorySvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
diff --git a/nativelink-proto/genproto/build.bazel.remote.execution.v2.pb.rs b/nativelink-proto/genproto/build.bazel.remote.execution.v2.pb.rs
index f6e831311..c033f959e 100644
--- a/nativelink-proto/genproto/build.bazel.remote.execution.v2.pb.rs
+++ b/nativelink-proto/genproto/build.bazel.remote.execution.v2.pb.rs
@@ -2052,7 +2052,7 @@ pub mod execution_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.Execution/Execute",
             );
@@ -2099,7 +2099,7 @@ pub mod execution_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.Execution/WaitExecution",
             );
@@ -2235,7 +2235,7 @@ pub mod action_cache_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.ActionCache/GetActionResult",
             );
@@ -2280,7 +2280,7 @@ pub mod action_cache_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.ActionCache/UpdateActionResult",
             );
@@ -2545,7 +2545,7 @@ pub mod content_addressable_storage_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.ContentAddressableStorage/FindMissingBlobs",
             );
@@ -2597,7 +2597,7 @@ pub mod content_addressable_storage_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.ContentAddressableStorage/BatchUpdateBlobs",
             );
@@ -2646,7 +2646,7 @@ pub mod content_addressable_storage_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.ContentAddressableStorage/BatchReadBlobs",
             );
@@ -2698,7 +2698,7 @@ pub mod content_addressable_storage_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.ContentAddressableStorage/GetTree",
             );
@@ -2825,7 +2825,7 @@ pub mod capabilities_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/build.bazel.remote.execution.v2.Capabilities/GetCapabilities",
             );
@@ -3086,7 +3086,7 @@ pub mod execution_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = ExecuteSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -3132,7 +3132,7 @@ pub mod execution_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = WaitExecutionSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -3362,7 +3362,7 @@ pub mod action_cache_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = GetActionResultSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -3408,7 +3408,7 @@ pub mod action_cache_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = UpdateActionResultSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -3837,7 +3837,7 @@ pub mod content_addressable_storage_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = FindMissingBlobsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -3886,7 +3886,7 @@ pub mod content_addressable_storage_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = BatchUpdateBlobsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -3935,7 +3935,7 @@ pub mod content_addressable_storage_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = BatchReadBlobsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -3982,7 +3982,7 @@ pub mod content_addressable_storage_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = GetTreeSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -4180,7 +4180,7 @@ pub mod capabilities_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = GetCapabilitiesSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
diff --git a/nativelink-proto/genproto/com.github.trace_machina.nativelink.remote_execution.pb.rs b/nativelink-proto/genproto/com.github.trace_machina.nativelink.remote_execution.pb.rs
index c4a53f73f..6e60964f4 100644
--- a/nativelink-proto/genproto/com.github.trace_machina.nativelink.remote_execution.pb.rs
+++ b/nativelink-proto/genproto/com.github.trace_machina.nativelink.remote_execution.pb.rs
@@ -15,7 +15,12 @@
 // This file is @generated by prost-build.
 /// / Request object for keep alive requests.
 #[derive(Clone, Copy, PartialEq, ::prost::Message)]
-pub struct KeepAliveRequest {}
+pub struct KeepAliveRequest {
+    /// / CPU load percentage: load_avg_1m / num_cpus * 100.
+    /// / 0 means unknown (old workers that don't report load).
+    #[prost(uint32, tag = "2")]
+    pub cpu_load_pct: u32,
+}
 /// / Request object for going away requests.
 #[derive(Clone, Copy, PartialEq, ::prost::Message)]
 pub struct GoingAwayRequest {}
@@ -46,6 +51,117 @@ pub struct ConnectWorkerRequest {
     /// / The default (0) means unlimited.
     #[prost(uint64, tag = "3")]
     pub max_inflight_tasks: u64,
+    /// / This worker's CAS gRPC endpoint for peer blob serving.
+    /// / If set, other workers can fetch blobs directly from this worker.
+    /// / Example: "grpc://192.168.191.5:50081"
+    #[prost(string, tag = "5")]
+    pub cas_endpoint: ::prost::alloc::string::String,
+}
+/// / Per-digest info including LRU access time for cache eviction heuristics.
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BlobDigestInfo {
+    /// / The digest of the blob.
+    #[prost(message, optional, tag = "1")]
+    pub digest: ::core::option::Option<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+    /// / The last time this blob was accessed in the worker's local cache.
+    /// / Seconds since UNIX epoch. The scheduler can use this to estimate
+    /// / how close a blob is to eviction (lower = more likely to be evicted).
+    #[prost(int64, tag = "2")]
+    pub last_access_timestamp: i64,
+}
+/// / Notification that blobs are available on a worker for peer serving.
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BlobsAvailableNotification {
+    /// / The worker's CAS endpoint where these blobs can be fetched.
+    #[prost(string, tag = "1")]
+    pub worker_cas_endpoint: ::prost::alloc::string::String,
+    /// / The digests of newly available blobs (kept for backward compat / simple notifications).
+    #[prost(message, repeated, tag = "2")]
+    pub digests: ::prost::alloc::vec::Vec<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+    /// / If true, this is a full snapshot of all blobs in the worker's cache.
+    /// / The server should replace its entire view for this endpoint with the
+    /// / contents of this message (digest_infos + digests). If false, this is
+    /// / an incremental update (new blobs only).
+    #[prost(bool, tag = "3")]
+    pub is_full_snapshot: bool,
+    /// / Digests that have been evicted from the worker since the last update.
+    /// / Only meaningful when is_full_snapshot == false.
+    #[prost(message, repeated, tag = "4")]
+    pub evicted_digests: ::prost::alloc::vec::Vec<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+    /// / Per-digest info with LRU timestamps. When present, the server should
+    /// / prefer this over the plain `digests` field.
+    #[prost(message, repeated, tag = "5")]
+    pub digest_infos: ::prost::alloc::vec::Vec<BlobDigestInfo>,
+    /// / CPU load percentage: load_avg_1m / num_cpus * 100.
+    /// / 0 means unknown (old workers that don't report load).
+    #[prost(uint32, tag = "6")]
+    pub cpu_load_pct: u32,
+    /// / Digests of input root directories that are cached in this worker's
+    /// / directory cache. The scheduler can give routing preference to workers
+    /// / that already have the action's input_root_digest cached.
+    /// / Also used for the full subtree snapshot (when is_full_subtree_snapshot=true,
+    /// / this contains ALL directory digests including subtrees).
+    #[prost(message, repeated, tag = "7")]
+    pub cached_directory_digests: ::prost::alloc::vec::Vec<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+    /// / Delta-encoded subtree updates since last notification.
+    /// / When a cache entry is added, send ALL directory digests in its merkle tree.
+    /// / When a cache entry is evicted, send ALL directory digests that were removed
+    /// / (only those no longer present in ANY cached entry's merkle tree).
+    #[prost(message, repeated, tag = "8")]
+    pub added_subtree_digests: ::prost::alloc::vec::Vec<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+    #[prost(message, repeated, tag = "9")]
+    pub removed_subtree_digests: ::prost::alloc::vec::Vec<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+    /// / True on the first notification after (re)connect — scheduler should
+    /// / replace its cached_subtree_digests state rather than applying a delta.
+    /// / In this case, cached_directory_digests (field 7) contains the full set
+    /// / of all subtree digests.
+    #[prost(bool, tag = "10")]
+    pub is_full_subtree_snapshot: bool,
+}
+/// / Notification that blobs have been evicted from a worker.
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct BlobsEvictedNotification {
+    /// / The worker's CAS endpoint from which these blobs were evicted.
+    #[prost(string, tag = "1")]
+    pub worker_cas_endpoint: ::prost::alloc::string::String,
+    /// / The digests of evicted blobs.
+    #[prost(message, repeated, tag = "2")]
+    pub digests: ::prost::alloc::vec::Vec<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+}
+/// / Request to touch (update access time) blobs on a worker to prevent eviction.
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct TouchBlobsRequest {
+    /// / The digests of blobs to touch.
+    #[prost(message, repeated, tag = "1")]
+    pub digests: ::prost::alloc::vec::Vec<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+}
+/// / A hint that a specific digest is available on one or more peer workers.
+#[derive(Clone, PartialEq, ::prost::Message)]
+pub struct PeerHint {
+    /// / The digest available on peers.
+    #[prost(message, optional, tag = "1")]
+    pub digest: ::core::option::Option<
+        super::super::super::super::super::build::bazel::remote::execution::v2::Digest,
+    >,
+    /// / gRPC endpoints of workers that have this blob.
+    #[prost(string, repeated, tag = "2")]
+    pub peer_endpoints: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
 }
 /// / The result of an ExecutionRequest.
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -85,6 +201,10 @@ pub struct ExecuteComplete {
     /// / The operation ID that was executed.
     #[prost(string, tag = "1")]
     pub operation_id: ::prost::alloc::string::String,
+    /// / CPU load percentage: load_avg_1m / num_cpus * 100.
+    /// / 0 means unknown (old workers that don't report load).
+    #[prost(uint32, tag = "2")]
+    pub cpu_load_pct: u32,
 }
 /// / Result sent back from the server when a node connects.
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -103,7 +223,7 @@ pub struct KillOperationRequest {
 /// / Communication from the scheduler to the worker.
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct UpdateForWorker {
-    #[prost(oneof = "update_for_worker::Update", tags = "1, 2, 3, 4, 5")]
+    #[prost(oneof = "update_for_worker::Update", tags = "1, 2, 3, 4, 5, 7")]
     pub update: ::core::option::Option<update_for_worker::Update>,
 }
 /// Nested message and enum types in `UpdateForWorker`.
@@ -132,12 +252,16 @@ pub mod update_for_worker {
         /// / Instructs the worker to kill a specific running operation.
         #[prost(message, tag = "5")]
         KillOperationRequest(super::KillOperationRequest),
+        /// / Instructs the worker to touch (update access time) on blobs
+        /// / to prevent premature eviction.
+        #[prost(message, tag = "7")]
+        TouchBlobs(super::TouchBlobsRequest),
     }
 }
 /// / Communication from the worker to the scheduler.
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct UpdateForScheduler {
-    #[prost(oneof = "update_for_scheduler::Update", tags = "1, 2, 3, 4, 5")]
+    #[prost(oneof = "update_for_scheduler::Update", tags = "1, 2, 3, 4, 5, 7, 8")]
     pub update: ::core::option::Option<update_for_scheduler::Update>,
 }
 /// Nested message and enum types in `UpdateForScheduler`.
@@ -174,6 +298,12 @@ pub mod update_for_scheduler {
         /// / Notify that the execution has completed, but result is uploading.
         #[prost(message, tag = "5")]
         ExecuteComplete(super::ExecuteComplete),
+        /// / Notifies the scheduler that new blobs are available on this worker.
+        #[prost(message, tag = "7")]
+        BlobsAvailable(super::BlobsAvailableNotification),
+        /// / Notifies the scheduler that blobs have been evicted from this worker.
+        #[prost(message, tag = "8")]
+        BlobsEvicted(super::BlobsEvictedNotification),
     }
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
@@ -199,6 +329,10 @@ pub struct StartExecute {
     /// / The ID of the worker that is executing the action.
     #[prost(string, tag = "6")]
     pub worker_id: ::prost::alloc::string::String,
+    /// / Hints about input blobs available on peer workers.
+    /// / Workers should try these peers first before falling back to server CAS.
+    #[prost(message, repeated, tag = "8")]
+    pub peer_hints: ::prost::alloc::vec::Vec<PeerHint>,
 }
 /// / This is a special message used to save actions into the CAS that can be used
 /// / by programs like bb_browswer to inspect the history of a build.
@@ -328,7 +462,7 @@ pub mod worker_api_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/com.github.trace_machina.nativelink.remote_execution.WorkerApi/ConnectWorker",
             );
@@ -496,7 +630,7 @@ pub mod worker_api_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = ConnectWorkerSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
diff --git a/nativelink-proto/genproto/google.bytestream.pb.rs b/nativelink-proto/genproto/google.bytestream.pb.rs
index d0229a041..fe14f6bb4 100644
--- a/nativelink-proto/genproto/google.bytestream.pb.rs
+++ b/nativelink-proto/genproto/google.bytestream.pb.rs
@@ -232,7 +232,7 @@ pub mod byte_stream_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.bytestream.ByteStream/Read",
             );
@@ -275,7 +275,7 @@ pub mod byte_stream_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.bytestream.ByteStream/Write",
             );
@@ -313,7 +313,7 @@ pub mod byte_stream_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.bytestream.ByteStream/QueryWriteStatus",
             );
@@ -530,7 +530,7 @@ pub mod byte_stream_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = ReadSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -577,7 +577,7 @@ pub mod byte_stream_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = WriteSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -622,7 +622,7 @@ pub mod byte_stream_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = QueryWriteStatusSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
diff --git a/nativelink-proto/genproto/google.devtools.build.v1.pb.rs b/nativelink-proto/genproto/google.devtools.build.v1.pb.rs
index 94d70d8f6..a0f46a41a 100644
--- a/nativelink-proto/genproto/google.devtools.build.v1.pb.rs
+++ b/nativelink-proto/genproto/google.devtools.build.v1.pb.rs
@@ -633,7 +633,7 @@ pub mod publish_build_event_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.devtools.build.v1.PublishBuildEvent/PublishLifecycleEvent",
             );
@@ -668,7 +668,7 @@ pub mod publish_build_event_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.devtools.build.v1.PublishBuildEvent/PublishBuildToolEventStream",
             );
@@ -857,7 +857,7 @@ pub mod publish_build_event_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = PublishLifecycleEventSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -912,7 +912,7 @@ pub mod publish_build_event_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = PublishBuildToolEventStreamSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
diff --git a/nativelink-proto/genproto/google.longrunning.pb.rs b/nativelink-proto/genproto/google.longrunning.pb.rs
index fec578107..aafbbb9b2 100644
--- a/nativelink-proto/genproto/google.longrunning.pb.rs
+++ b/nativelink-proto/genproto/google.longrunning.pb.rs
@@ -267,7 +267,7 @@ pub mod operations_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.longrunning.Operations/ListOperations",
             );
@@ -293,7 +293,7 @@ pub mod operations_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.longrunning.Operations/GetOperation",
             );
@@ -320,7 +320,7 @@ pub mod operations_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.longrunning.Operations/DeleteOperation",
             );
@@ -353,7 +353,7 @@ pub mod operations_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.longrunning.Operations/CancelOperation",
             );
@@ -385,7 +385,7 @@ pub mod operations_client {
                         format!("Service was not ready: {}", e.into()),
                     )
                 })?;
-            let codec = tonic::codec::ProstCodec::default();
+            let codec = tonic_prost::ProstCodec::default();
             let path = http::uri::PathAndQuery::from_static(
                 "/google.longrunning.Operations/WaitOperation",
             );
@@ -586,7 +586,7 @@ pub mod operations_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = ListOperationsSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -631,7 +631,7 @@ pub mod operations_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = GetOperationSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -676,7 +676,7 @@ pub mod operations_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = DeleteOperationSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -721,7 +721,7 @@ pub mod operations_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = CancelOperationSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
@@ -766,7 +766,7 @@ pub mod operations_server {
                     let inner = self.inner.clone();
                     let fut = async move {
                         let method = WaitOperationSvc(inner);
-                        let codec = tonic::codec::ProstCodec::default();
+                        let codec = tonic_prost::ProstCodec::default();
                         let mut grpc = tonic::server::Grpc::new(codec)
                             .apply_compression_config(
                                 accept_compression_encodings,
diff --git a/nativelink-scheduler/Cargo.toml b/nativelink-scheduler/Cargo.toml
index 5f98f9fd8..920e28972 100644
--- a/nativelink-scheduler/Cargo.toml
+++ b/nativelink-scheduler/Cargo.toml
@@ -20,13 +20,13 @@ bytes = { version = "1.10.1", default-features = false }
 futures = { version = "0.3.31", default-features = false }
 lru = { version = "0.16.0", default-features = false }
 mock_instant = { version = "0.5.3", default-features = false }
-opentelemetry = { version = "0.29.1", default-features = false }
-opentelemetry-semantic-conventions = { version = "0.29.0", default-features = false, features = [
+opentelemetry = { version = "0.31.0", default-features = false }
+opentelemetry-semantic-conventions = { version = "0.31.0", default-features = false, features = [
   "default",
   "semconv_experimental",
 ] }
 parking_lot = { version = "0.12.3", default-features = false }
-prost = { version = "0.13.5", default-features = false }
+prost = { version = "0.14.3", default-features = false }
 redis = { version = "1.0.0", default-features = false }
 scopeguard = { version = "1.2.0", default-features = false }
 serde = { version = "1.0.219", features = ["rc"], default-features = false }
@@ -41,8 +41,8 @@ tokio = { version = "1.44.1", features = [
 tokio-stream = { version = "0.1.17", features = [
   "fs",
 ], default-features = false }
-tonic = { version = "0.13.0", features = [
-  "tls-ring",
+tonic = { version = "0.14.5", features = [
+  "tls-aws-lc",
   "transport",
 ], default-features = false }
 tracing = { version = "0.1.41", default-features = false }
diff --git a/nativelink-scheduler/src/api_worker_scheduler.rs b/nativelink-scheduler/src/api_worker_scheduler.rs
index edfe56c67..435a8c404 100644
--- a/nativelink-scheduler/src/api_worker_scheduler.rs
+++ b/nativelink-scheduler/src/api_worker_scheduler.rs
@@ -12,13 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use core::num::NonZeroUsize;
 use core::ops::{Deref, DerefMut};
 use core::sync::atomic::{AtomicU64, Ordering};
 use core::time::Duration;
+use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
-use std::time::{Instant, UNIX_EPOCH};
+use std::time::{Instant, SystemTime, UNIX_EPOCH};
 
-use async_lock::Mutex;
+use async_lock::RwLock;
 use lru::LruCache;
 use nativelink_config::schedulers::WorkerAllocationStrategy;
 use nativelink_error::{Code, Error, ResultExt, error_if, make_err, make_input_err};
@@ -26,13 +28,22 @@ use nativelink_metric::{
     MetricFieldData, MetricKind, MetricPublishKnownKindData, MetricsComponent,
     RootMetricsComponent, group,
 };
+use nativelink_proto::build::bazel::remote::execution::v2::Directory;
+use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
+    PeerHint, StartExecute, UpdateForWorker, update_for_worker,
+};
+use nativelink_util::blob_locality_map::SharedBlobLocalityMap;
 use nativelink_util::action_messages::{OperationId, WorkerId};
+use nativelink_util::common::DigestInfo;
 use nativelink_util::operation_state_manager::{UpdateOperationType, WorkerStateManager};
 use nativelink_util::platform_properties::PlatformProperties;
 use nativelink_util::shutdown_guard::ShutdownGuard;
+use nativelink_util::store_trait::{Store, StoreKey, StoreLike};
+use prost::Message;
 use tokio::sync::Notify;
+use tokio::sync::mpsc::UnboundedSender;
 use tonic::async_trait;
-use tracing::{error, info, trace, warn};
+use tracing::{debug, error, info, trace, warn};
 
 /// Metrics for tracking scheduler performance.
 #[derive(Debug, Default)]
@@ -60,7 +71,10 @@ pub struct SchedulerMetrics {
 }
 
 use crate::platform_property_manager::PlatformPropertyManager;
-use crate::worker::{ActionInfoWithProps, Worker, WorkerTimestamp, WorkerUpdate};
+use crate::worker::{
+    ActionInfoWithProps, PendingActionInfoData, Worker, WorkerTimestamp, WorkerUpdate,
+    reduce_platform_properties,
+};
 use crate::worker_capability_index::WorkerCapabilityIndex;
 use crate::worker_registry::SharedWorkerRegistry;
 use crate::worker_scheduler::WorkerScheduler;
@@ -123,6 +137,10 @@ struct ApiWorkerSchedulerImpl {
     /// Used to accelerate `find_worker_for_action` by filtering candidates
     /// based on properties before doing linear scan.
     capability_index: WorkerCapabilityIndex,
+
+    /// Reverse map: CAS endpoint → WorkerId.
+    /// Updated when workers are added/removed.
+    endpoint_to_worker: HashMap<String, WorkerId>,
 }
 
 impl core::fmt::Debug for ApiWorkerSchedulerImpl {
@@ -136,6 +154,7 @@ impl core::fmt::Debug for ApiWorkerSchedulerImpl {
                 &self.capability_index.worker_count(),
             )
             .field("worker_registry", &self.worker_registry)
+            .field("endpoint_to_worker_len", &self.endpoint_to_worker.len())
             .finish_non_exhaustive()
     }
 }
@@ -168,6 +187,14 @@ impl ApiWorkerSchedulerImpl {
         );
         worker.last_update_timestamp = timestamp;
 
+        // If the worker was in quarantine, clear it now that it has checked in.
+        if worker.quarantined_at.take().is_some() {
+            info!(
+                ?worker_id,
+                "Worker exited quarantine after sending keepalive"
+            );
+        }
+
         trace!(
             ?worker_id,
             running_operations = worker.running_action_infos.len(),
@@ -182,6 +209,13 @@ impl ApiWorkerSchedulerImpl {
     fn add_worker(&mut self, worker: Worker) -> Result<(), Error> {
         let worker_id = worker.id.clone();
         let platform_properties = worker.platform_properties.clone();
+
+        // Update endpoint → worker reverse map for locality scoring.
+        if !worker.cas_endpoint.is_empty() {
+            self.endpoint_to_worker
+                .insert(worker.cas_endpoint.clone(), worker_id.clone());
+        }
+
         self.workers.put(worker_id.clone(), worker);
 
         // Add to capability index for fast matching
@@ -214,6 +248,14 @@ impl ApiWorkerSchedulerImpl {
         self.capability_index.remove_worker(worker_id);
 
         let result = self.workers.pop(worker_id);
+
+        // Remove from endpoint → worker reverse map.
+        if let Some(ref worker) = result {
+            if !worker.cas_endpoint.is_empty() {
+                self.endpoint_to_worker.remove(&worker.cas_endpoint);
+            }
+        }
+
         self.worker_change_notify.notify_one();
         result
     }
@@ -234,7 +276,7 @@ impl ApiWorkerSchedulerImpl {
     }
 
     fn inner_find_worker_for_action(
-        &self,
+        &mut self,
         platform_properties: &PlatformProperties,
         full_worker_logging: bool,
     ) -> Option<WorkerId> {
@@ -247,18 +289,45 @@ impl ApiWorkerSchedulerImpl {
 
         if candidates.is_empty() {
             if full_worker_logging {
-                info!("No workers in capability index match required properties");
+                debug!("No workers in capability index match required properties");
             }
             return None;
         }
 
+        // Clear is_paused for candidate workers that now have capacity,
+        // but only if they were paused due to a capacity check (not explicit
+        // worker backpressure like ResourceExhausted). Workers that reported
+        // ResourceExhausted should remain paused until they complete an action.
+        for wid in &candidates {
+            if let Some(worker) = self.workers.0.peek_mut(wid) {
+                if worker.is_paused && !worker.is_draining && !worker.paused_due_to_backpressure {
+                    let has_capacity = worker.max_inflight_tasks == 0
+                        || u64::try_from(worker.running_action_infos.len()).unwrap_or(u64::MAX)
+                            < worker.max_inflight_tasks;
+                    if has_capacity {
+                        worker.is_paused = false;
+                    }
+                }
+            }
+        }
+
         // Check function for availability AND dynamic Minimum property verification.
         // The index only does presence checks for Minimum properties since their
         // values change dynamically as jobs are assigned to workers.
         let worker_matches = |(worker_id, w): &(&WorkerId, &Worker)| -> bool {
+            // Quarantined workers must not receive new actions.
+            if w.quarantined_at.is_some() {
+                if full_worker_logging {
+                    debug!(
+                        "Worker {worker_id} is quarantined, skipping for new work"
+                    );
+                }
+                return false;
+            }
+
             if !w.can_accept_work() {
                 if full_worker_logging {
-                    info!(
+                    debug!(
                         "Worker {worker_id} cannot accept work: is_paused={}, is_draining={}, inflight={}/{}",
                         w.is_paused,
                         w.is_draining,
@@ -279,28 +348,348 @@ impl ApiWorkerSchedulerImpl {
 
         // Now check constraints on filtered candidates.
         // Iterate in LRU order based on allocation strategy.
+        // Note: iter() does not promote entries in the LRU. We find the worker
+        // first via iter(), then promote it via get_mut() below to avoid
+        // multiple consecutive actions all matching the same "least recently used" worker.
         let workers_iter = self.workers.iter();
 
-        let worker_id = match self.allocation_strategy {
-            // Use rfind to get the least recently used that satisfies the properties.
+        // Collect viable candidates with their load info for load-aware selection.
+        let viable: Vec<_> = match self.allocation_strategy {
             WorkerAllocationStrategy::LeastRecentlyUsed => workers_iter
                 .rev()
                 .filter(|(worker_id, _)| candidates.contains(worker_id))
-                .find(&worker_matches)
-                .map(|(_, w)| w.id.clone()),
-
-            // Use find to get the most recently used that satisfies the properties.
+                .filter(|pair| worker_matches(pair))
+                .map(|(_, w)| (w.id.clone(), w.cpu_load_pct))
+                .collect(),
             WorkerAllocationStrategy::MostRecentlyUsed => workers_iter
                 .filter(|(worker_id, _)| candidates.contains(worker_id))
-                .find(&worker_matches)
-                .map(|(_, w)| w.id.clone()),
+                .filter(|pair| worker_matches(pair))
+                .map(|(_, w)| (w.id.clone(), w.cpu_load_pct))
+                .collect(),
+        };
+
+        // Pick the lightest-loaded worker among viable candidates.
+        // Workers with cpu_load_pct == 0 (unknown) are sorted last among
+        // workers that have reported load. Falls back to LRU/MRU order
+        // (first in the vec) when no workers have reported load.
+        let worker_id = if viable.iter().any(|(_, load)| *load > 0) {
+            // At least one worker has reported load — pick lightest.
+            viable
+                .iter()
+                .min_by_key(|(_, load)| if *load == 0 { u32::MAX } else { *load })
+                .map(|(id, _)| id.clone())
+        } else {
+            // No load data — use first viable (LRU/MRU order).
+            viable.first().map(|(id, _)| id.clone())
         };
+
+        // Log load-aware selection decision.
+        if let Some(ref wid) = worker_id {
+            let viable_loads: Vec<_> = viable
+                .iter()
+                .map(|(id, load)| {
+                    let short_id = id.0.chars().take(12).collect::<String>();
+                    (short_id, *load)
+                })
+                .collect();
+            let winner_load = viable
+                .iter()
+                .find(|(id, _)| id == wid)
+                .map(|(_, l)| *l)
+                .unwrap_or(0);
+            debug!(
+                candidates = viable.len(),
+                worker_id = %wid,
+                winner_load_pct = winner_load,
+                ?viable_loads,
+                "Load-aware worker selection"
+            );
+        }
+
+        // Promote the found worker in the LRU so the next find_worker_for_action
+        // call won't pick the same worker again (prevents work bunching).
+        if let Some(ref wid) = worker_id {
+            self.workers.get_mut(wid);
+        }
+
         if full_worker_logging && worker_id.is_none() {
-            warn!("No workers matched!");
+            debug!("No workers matched!");
         }
         worker_id
     }
 
+    /// Atomically finds a suitable worker AND reserves it for the given
+    /// operation by mutating the worker's state (reducing platform properties,
+    /// inserting into `running_action_infos`). Returns the worker ID, the
+    /// channel sender, and pre-built protobuf message so the caller can
+    /// send the notification after releasing the lock.
+    ///
+    /// Uses locality-aware scheduling:
+    /// - Primary: score candidates by total bytes of cached input blobs
+    ///   using pre-computed endpoint scores (computed outside the lock).
+    /// - Fallback: existing LRU/MRU strategy.
+    ///
+    /// This prevents two concurrent match operations from selecting the
+    /// same worker, which is the key enabler for `MATCH_CONCURRENCY > 1`.
+    ///
+    /// `endpoint_scores` and `peer_hints` are pre-computed outside the write
+    /// lock to avoid holding it during O(files) iterations over the locality
+    /// map.
+    fn inner_find_and_reserve_worker(
+        &mut self,
+        platform_properties: &PlatformProperties,
+        operation_id: &OperationId,
+        action_info: &ActionInfoWithProps,
+        full_worker_logging: bool,
+        endpoint_scores: Option<&HashMap<String, (u64, SystemTime)>>,
+        peer_hints: Vec<PeerHint>,
+        resolved_tree: Option<&ResolvedTree>,
+    ) -> Option<(WorkerId, UnboundedSender<UpdateForWorker>, UpdateForWorker)> {
+        let input_root_digest = action_info.inner.input_root_digest;
+
+        // Build the set of capability-matching candidates that can accept work.
+        let candidates = self
+            .capability_index
+            .find_matching_workers(platform_properties, full_worker_logging);
+
+        if candidates.is_empty() {
+            if full_worker_logging {
+                debug!("No workers in capability index match required properties");
+            }
+            return None;
+        }
+
+        // Helper: check if a specific worker is a valid candidate.
+        let worker_is_viable = |worker_id: &WorkerId| -> bool {
+            if !candidates.contains(worker_id) {
+                return false;
+            }
+            let Some(w) = self.workers.0.peek(worker_id) else {
+                return false;
+            };
+            if w.quarantined_at.is_some() || !w.can_accept_work() {
+                return false;
+            }
+            platform_properties.is_satisfied_by(&w.platform_properties, false)
+        };
+
+        // ── Tier 1: Exact root match ──
+        // If a viable worker has the action's input_root_digest in its directory
+        // cache (either as a root or as a subtree of a previously cached tree),
+        // it can hardlink the entire input tree in milliseconds instead of
+        // reconstructing it from CAS.
+        let dir_cache_winner: Option<WorkerId> = {
+            let mut best: Option<(WorkerId, u32)> = None; // (id, cpu_load)
+            for wid in &candidates {
+                if let Some(w) = self.workers.0.peek(wid) {
+                    let has_root_match = w.cached_directory_digests.contains(&input_root_digest);
+                    let has_subtree_match = w.cached_subtree_digests.contains(&input_root_digest);
+                    if (has_root_match || has_subtree_match)
+                        && worker_is_viable(wid)
+                    {
+                        let load = w.cpu_load_pct;
+                        let dominated = best.as_ref().is_some_and(|(_, best_load)| {
+                            let effective_best = if *best_load == 0 { u32::MAX } else { *best_load };
+                            let effective_this = if load == 0 { u32::MAX } else { load };
+                            effective_this >= effective_best
+                        });
+                        if !dominated {
+                            best = Some((wid.clone(), load));
+                        }
+                    }
+                }
+            }
+            if let Some((ref wid, load)) = best {
+                debug!(
+                    ?wid,
+                    cpu_load_pct = load,
+                    %input_root_digest,
+                    "Directory cache hit -- worker has input_root_digest cached (root or subtree), giving scheduling priority"
+                );
+            }
+            best.map(|(wid, _)| wid)
+        };
+
+        // ── Tier 1.5: Partial subtree coverage scoring ──
+        // When no worker has the exact root cached, score workers by the total
+        // file bytes under their cached subtrees. A worker caching a subtree with
+        // 10GB of files scores higher than one caching a subtree with 100 bytes.
+        // We sum the subtree_bytes for each matching directory, taking only the
+        // top-level match (avoid double-counting nested matches).
+        let subtree_coverage_winner: Option<WorkerId> = if dir_cache_winner.is_some() {
+            None // exact match found, skip coverage scoring
+        } else if let Some(tree) = resolved_tree {
+            let total_bytes: u64 = tree.subtree_bytes.get(&input_root_digest).copied().unwrap_or(0);
+            if tree.dir_digests.len() <= 1 || total_bytes == 0 {
+                None // only root (or empty), no subtrees to match
+            } else {
+                let mut best: Option<(WorkerId, u64, u32)> = None; // (id, cached_bytes, cpu_load)
+                for wid in &candidates {
+                    if let Some(w) = self.workers.0.peek(wid) {
+                        if !worker_is_viable(wid) {
+                            continue;
+                        }
+                        // Sum the subtree_bytes for each of the action's directory
+                        // digests that this worker has cached.
+                        let cached_bytes: u64 = tree.dir_digests.iter()
+                            .filter(|d| w.cached_subtree_digests.contains(d))
+                            .map(|d| tree.subtree_bytes.get(d).copied().unwrap_or(0))
+                            .sum();
+                        if cached_bytes == 0 {
+                            continue;
+                        }
+                        let load = w.cpu_load_pct;
+                        let dominated = best.as_ref().is_some_and(|(_, best_bytes, best_load)| {
+                            if cached_bytes != *best_bytes {
+                                return cached_bytes < *best_bytes;
+                            }
+                            // Same coverage — prefer lower CPU load.
+                            let effective_best = if *best_load == 0 { u32::MAX } else { *best_load };
+                            let effective_this = if load == 0 { u32::MAX } else { load };
+                            effective_this >= effective_best
+                        });
+                        if !dominated {
+                            best = Some((wid.clone(), cached_bytes, load));
+                        }
+                    }
+                }
+                if let Some((ref wid, cached_bytes, load)) = best {
+                    let pct = if total_bytes > 0 { cached_bytes * 100 / total_bytes } else { 0 };
+                    debug!(
+                        ?wid,
+                        cached_bytes,
+                        total_bytes,
+                        cpu_load_pct = load,
+                        coverage_pct = pct,
+                        %input_root_digest,
+                        "Subtree coverage winner -- worker has {}% of input tree bytes cached in subtrees",
+                        pct,
+                    );
+                }
+                best.map(|(wid, _, _)| wid)
+            }
+        } else {
+            None
+        };
+
+        // ── Locality scoring ──
+        // Convert pre-computed endpoint scores to worker scores, filtering
+        // to the candidate set. This is O(endpoints) not O(files).
+        let locality_winner = if let Some(ep_scores) = endpoint_scores {
+            let scores = endpoint_scores_to_worker_scores(
+                ep_scores,
+                &self.endpoint_to_worker,
+                &candidates,
+            );
+            if !scores.is_empty() {
+                // Sort workers by score descending, then by timestamp
+                // descending as a tiebreaker. Workers within 10% of the
+                // top score are considered tied and the most recently
+                // refreshed one wins.
+                let mut sorted: Vec<_> = scores.into_iter().collect();
+                // Look up cpu_load_pct for tiebreaking within 10% score range.
+                let load_for_worker = |wid: &WorkerId| -> u32 {
+                    self.workers.0.peek(wid)
+                        .map(|w| w.cpu_load_pct)
+                        .unwrap_or(0)
+                };
+                sorted.sort_by(|a, b| {
+                    let (score_a, ts_a) = a.1;
+                    let (score_b, ts_b) = b.1;
+                    let max_score = score_a.max(score_b);
+                    // Within 10% of each other? Use CPU load, then timestamp.
+                    let threshold = max_score / 10; // 10% of the larger score
+                    if score_a.abs_diff(score_b) <= threshold {
+                        // Scores are similar — prefer lower CPU load.
+                        let load_a = load_for_worker(&a.0);
+                        let load_b = load_for_worker(&b.0);
+                        if load_a != load_b && (load_a > 0 || load_b > 0) {
+                            // Sort unknown (0) after known loads.
+                            let effective_a = if load_a == 0 { u32::MAX } else { load_a };
+                            let effective_b = if load_b == 0 { u32::MAX } else { load_b };
+                            effective_a.cmp(&effective_b)
+                        } else {
+                            // Same load or both unknown — prefer more recent timestamp.
+                            ts_b.cmp(&ts_a)
+                        }
+                    } else {
+                        // Scores differ significantly, prefer higher score.
+                        score_b.cmp(&score_a)
+                    }
+                });
+
+                let best = sorted.first().map(|(_, (s, _))| *s).unwrap_or(0);
+                if best > 0 {
+                    sorted.into_iter()
+                        .find(|(wid, (score, _))| *score > 0 && worker_is_viable(wid))
+                        .map(|(wid, (score, _))| {
+                            debug!(
+                                ?wid,
+                                score,
+                                %input_root_digest,
+                                "Locality scoring -- worker has {} cached input bytes",
+                                score
+                            );
+                            wid
+                        })
+                } else {
+                    None
+                }
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+        let worker_id = if let Some(wid) = dir_cache_winner {
+            // Exact root match trumps all other scoring.
+            self.workers.get_mut(&wid);
+            wid
+        } else if let Some(wid) = subtree_coverage_winner {
+            // Partial subtree coverage beats blob-level locality.
+            self.workers.get_mut(&wid);
+            wid
+        } else if let Some(wid) = locality_winner {
+            // Blob-level locality scoring.
+            self.workers.get_mut(&wid);
+            wid
+        } else {
+            // ── Fallback: existing LRU/MRU strategy ──
+            let wid = self.inner_find_worker_for_action(platform_properties, full_worker_logging)?;
+            wid
+        };
+
+        // Atomically reserve the worker by mutating its state under the same lock.
+        let (tx, msg) = self.prepare_worker_run_action(
+            &worker_id,
+            operation_id,
+            action_info,
+            peer_hints,
+        )?;
+
+        Some((worker_id, tx, msg))
+    }
+
+    /// Undoes a reservation made by `inner_find_and_reserve_worker`.
+    /// This removes the operation from the worker's `running_action_infos`
+    /// and restores the reduced platform properties.
+    fn inner_unreserve_worker(
+        &mut self,
+        worker_id: &WorkerId,
+        operation_id: &OperationId,
+    ) {
+        if let Some(worker) = self.workers.get_mut(worker_id) {
+            if let Some(pending) = worker.running_action_infos.remove(operation_id) {
+                if !worker.restored_platform_properties.remove(operation_id) {
+                    worker.restore_platform_properties(
+                        &pending.action_info.platform_properties,
+                    );
+                }
+            }
+        }
+    }
+
     async fn update_action(
         &mut self,
         worker_id: &WorkerId,
@@ -367,6 +756,7 @@ impl ApiWorkerSchedulerImpl {
 
             if (due_to_backpressure || !worker.can_accept_work()) && worker.has_actions() {
                 worker.is_paused = true;
+                worker.paused_due_to_backpressure = due_to_backpressure;
             }
             complete_action_res
         };
@@ -376,61 +766,63 @@ impl ApiWorkerSchedulerImpl {
         complete_action_res
     }
 
-    /// Notifies the specified worker to run the given action and handles errors by evicting
-    /// the worker if the notification fails.
-    async fn worker_notify_run_action(
+    /// Prepares a worker to run an action by mutating its state (reducing platform
+    /// properties, recording the running action), then returns the cloned `tx` sender
+    /// and pre-built message so the caller can send the notification *after* releasing
+    /// the write lock.
+    ///
+    /// `peer_hints` are pre-computed outside the write lock from the resolved
+    /// input tree. When no resolved tree is available the hints will be empty
+    /// -- the old fallback that generated a single hint for `input_root_digest`
+    /// never worked because workers register individual file digests, not
+    /// directory digests.
+    ///
+    /// Returns `None` if the worker was not found.
+    fn prepare_worker_run_action(
         &mut self,
-        worker_id: WorkerId,
-        operation_id: OperationId,
-        action_info: ActionInfoWithProps,
-    ) -> Result<(), Error> {
-        if let Some(worker) = self.workers.get_mut(&worker_id) {
-            let notify_worker_result = worker
-                .notify_update(WorkerUpdate::RunAction((operation_id, action_info.clone())))
-                .await;
-
-            if let Err(notify_worker_result) = notify_worker_result {
-                warn!(
-                    ?worker_id,
-                    ?action_info,
-                    ?notify_worker_result,
-                    "Worker command failed, removing worker",
-                );
-
-                // A slightly nasty way of figuring out that the worker disconnected
-                // from send_msg_to_worker without introducing complexity to the
-                // code path from here to there.
-                let is_disconnect = notify_worker_result.code == Code::Internal
-                    && notify_worker_result.messages.len() == 1
-                    && notify_worker_result.messages[0] == "Worker Disconnected";
-
-                let err = make_err!(
-                    Code::Internal,
-                    "Worker command failed, removing worker {worker_id} -- {notify_worker_result:?}",
-                );
-
-                return Result::<(), _>::Err(err.clone()).merge(
-                    self.immediate_evict_worker(&worker_id, err, is_disconnect)
-                        .await,
-                );
-            }
-            Ok(())
-        } else {
-            warn!(
+        worker_id: &WorkerId,
+        operation_id: &OperationId,
+        action_info: &ActionInfoWithProps,
+        peer_hints: Vec<PeerHint>,
+    ) -> Option<(UnboundedSender<UpdateForWorker>, UpdateForWorker)> {
+        let worker = self.workers.get_mut(worker_id)?;
+        // Clone the tx so we can send outside the lock.
+        let tx = worker.tx.clone();
+
+        if !peer_hints.is_empty() {
+            debug!(
                 ?worker_id,
-                %operation_id,
-                ?action_info,
-                "Worker not found in worker map in worker_notify_run_action"
+                hints = peer_hints.len(),
+                "Generated peer hints for StartExecute"
             );
-            // Ensure the operation is put back to queued state.
-            self.worker_state_manager
-                .update_operation(
-                    &operation_id,
-                    &worker_id,
-                    UpdateOperationType::UpdateWithDisconnect,
-                )
-                .await
         }
+
+        // Build the protobuf message while we still have access to worker state.
+        let start_execute = StartExecute {
+            execute_request: Some(action_info.inner.as_ref().into()),
+            operation_id: operation_id.to_string(),
+            queued_timestamp: Some(action_info.inner.insert_timestamp.into()),
+            platform: Some((&action_info.platform_properties).into()),
+            worker_id: worker.id.clone().into(),
+            peer_hints,
+        };
+        let msg = UpdateForWorker {
+            update: Some(update_for_worker::Update::StartAction(start_execute)),
+        };
+
+        // Perform the state mutation that run_action would do:
+        // reduce platform properties and record the running action.
+        reduce_platform_properties(
+            &mut worker.platform_properties,
+            &action_info.platform_properties,
+        );
+        worker.running_action_infos.insert(
+            operation_id.clone(),
+            PendingActionInfoData {
+                action_info: action_info.clone(),
+            },
+        );
+        Some((tx, msg))
     }
 
     /// Evicts the worker from the pool and puts items back into the queue if anything was being executed on it.
@@ -467,7 +859,7 @@ impl ApiWorkerSchedulerImpl {
 #[derive(Debug, MetricsComponent)]
 pub struct ApiWorkerScheduler {
     #[metric]
-    inner: Mutex<ApiWorkerSchedulerImpl>,
+    inner: RwLock<ApiWorkerSchedulerImpl>,
     #[metric(group = "platform_property_manager")]
     platform_property_manager: Arc<PlatformPropertyManager>,
 
@@ -480,8 +872,23 @@ pub struct ApiWorkerScheduler {
 
     /// Performance metrics for observability.
     metrics: Arc<SchedulerMetrics>,
+
+    /// Blob locality map for peer-to-peer blob sharing.
+    /// Used to generate peer hints in StartExecute messages.
+    locality_map: Option<SharedBlobLocalityMap>,
+
+    /// CAS store for resolving input trees (reading Directory protos).
+    /// When set, enables tier-2 locality scoring.
+    cas_store: Option<Store>,
+
+    /// Cached resolved input trees: input_root_digest → ResolvedTree.
+    /// Held under a tokio::Mutex briefly for get/put, not during I/O.
+    tree_cache: Arc<tokio::sync::Mutex<LruCache<DigestInfo, Arc<ResolvedTree>>>>,
 }
 
+/// Capacity for the resolved input tree LRU cache.
+const TREE_CACHE_CAPACITY: usize = 1024;
+
 impl ApiWorkerScheduler {
     pub fn new(
         worker_state_manager: Arc<dyn WorkerStateManager>,
@@ -490,9 +897,31 @@ impl ApiWorkerScheduler {
         worker_change_notify: Arc<Notify>,
         worker_timeout_s: u64,
         worker_registry: SharedWorkerRegistry,
+    ) -> Arc<Self> {
+        Self::new_with_locality_map(
+            worker_state_manager,
+            platform_property_manager,
+            allocation_strategy,
+            worker_change_notify,
+            worker_timeout_s,
+            worker_registry,
+            None,
+            None,
+        )
+    }
+
+    pub fn new_with_locality_map(
+        worker_state_manager: Arc<dyn WorkerStateManager>,
+        platform_property_manager: Arc<PlatformPropertyManager>,
+        allocation_strategy: WorkerAllocationStrategy,
+        worker_change_notify: Arc<Notify>,
+        worker_timeout_s: u64,
+        worker_registry: SharedWorkerRegistry,
+        locality_map: Option<SharedBlobLocalityMap>,
+        cas_store: Option<Store>,
     ) -> Arc<Self> {
         Arc::new(Self {
-            inner: Mutex::new(ApiWorkerSchedulerImpl {
+            inner: RwLock::new(ApiWorkerSchedulerImpl {
                 workers: Workers(LruCache::unbounded()),
                 worker_state_manager: worker_state_manager.clone(),
                 allocation_strategy,
@@ -500,11 +929,17 @@ impl ApiWorkerScheduler {
                 worker_registry: worker_registry.clone(),
                 shutting_down: false,
                 capability_index: WorkerCapabilityIndex::new(),
+                endpoint_to_worker: HashMap::new(),
             }),
             platform_property_manager,
             worker_timeout_s,
             worker_registry,
             metrics: Arc::new(SchedulerMetrics::default()),
+            locality_map,
+            cas_store,
+            tree_cache: Arc::new(tokio::sync::Mutex::new(LruCache::new(
+                NonZeroUsize::new(TREE_CACHE_CAPACITY).unwrap(),
+            ))),
         })
     }
 
@@ -522,10 +957,94 @@ impl ApiWorkerScheduler {
         self.metrics
             .actions_dispatched
             .fetch_add(1, Ordering::Relaxed);
-        let mut inner = self.inner.lock().await;
-        inner
-            .worker_notify_run_action(worker_id, operation_id, action_info)
-            .await
+
+        // Phase 1: Acquire write lock, mutate worker state, extract tx + message,
+        // then drop the lock BEFORE sending on the channel.
+        let prepare_result = {
+            let mut inner = self.inner.write().await;
+            let result =
+                inner.prepare_worker_run_action(&worker_id, &operation_id, &action_info, Vec::new());
+            if result.is_none() {
+                // Worker not found - handle under the lock since we need worker_state_manager.
+                warn!(
+                    ?worker_id,
+                    %operation_id,
+                    ?action_info,
+                    "Worker not found in worker map in worker_notify_run_action"
+                );
+                return inner
+                    .worker_state_manager
+                    .update_operation(
+                        &operation_id,
+                        &worker_id,
+                        UpdateOperationType::UpdateWithDisconnect,
+                    )
+                    .await;
+            }
+            result
+            // inner (write lock) is dropped here
+        };
+
+        // Phase 2: Send notification outside the lock to avoid blocking other
+        // scheduler operations if the channel has backpressure.
+        if let Some((tx, msg)) = prepare_result {
+            if let Err(_send_err) = tx.send(msg) {
+                // Worker disconnected. Re-acquire lock to evict.
+                warn!(
+                    ?worker_id,
+                    ?action_info,
+                    "Worker command failed (disconnected), removing worker",
+                );
+                let err = make_err!(
+                    Code::Internal,
+                    "Worker command failed, removing worker {worker_id} -- Worker Disconnected",
+                );
+                let mut inner = self.inner.write().await;
+                return Result::<(), _>::Err(err.clone()).merge(
+                    inner
+                        .immediate_evict_worker(&worker_id, err, true)
+                        .await,
+                );
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Sends the start-execution notification for a worker that was already
+    /// reserved by `find_and_reserve_worker`. The worker's state has already
+    /// been mutated (platform properties reduced, action recorded in
+    /// `running_action_infos`), so this method only sends the pre-built
+    /// message over the channel and handles disconnection errors.
+    pub async fn send_reserved_worker_notification(
+        &self,
+        worker_id: &WorkerId,
+        tx: UnboundedSender<UpdateForWorker>,
+        msg: UpdateForWorker,
+    ) -> Result<(), Error> {
+        self.metrics
+            .actions_dispatched
+            .fetch_add(1, Ordering::Relaxed);
+
+        if let Err(_send_err) = tx.send(msg) {
+            // Worker disconnected. Re-acquire lock to evict.
+            warn!(
+                ?worker_id,
+                "Worker command failed (disconnected) after reservation, removing worker",
+            );
+            let err = make_err!(
+                Code::Internal,
+                "Worker command failed, removing worker {worker_id} -- Worker Disconnected",
+            );
+            let mut inner = self.inner.write().await;
+            return Result::<(), _>::Err(err.clone()).merge(
+                inner
+                    .immediate_evict_worker(worker_id, err, true)
+                    .await,
+            );
+        }
+
+        Ok(())
     }
 
     /// Returns the scheduler metrics for observability.
@@ -548,7 +1067,7 @@ impl ApiWorkerScheduler {
             .find_worker_calls
             .fetch_add(1, Ordering::Relaxed);
 
-        let inner = self.inner.lock().await;
+        let mut inner = self.inner.write().await;
         let worker_count = inner.workers.len() as u64;
         let result = inner.inner_find_worker_for_action(platform_properties, full_worker_logging);
 
@@ -574,10 +1093,111 @@ impl ApiWorkerScheduler {
         result
     }
 
+    /// Atomically finds a suitable worker AND reserves it for the given
+    /// operation. This combines the find and reservation into a single lock
+    /// acquisition, preventing two concurrent match operations from selecting
+    /// the same worker.
+    ///
+    /// Returns `(worker_id, tx, msg)` where `tx` and `msg` can be used to
+    /// send the start-execution notification to the worker outside the lock.
+    /// Returns `None` if no suitable worker was found.
+    ///
+    /// If the caller later decides not to use this reservation (e.g., because
+    /// `assign_operation` fails), it MUST call `unreserve_worker` to undo
+    /// the reservation.
+    pub async fn find_and_reserve_worker(
+        &self,
+        platform_properties: &PlatformProperties,
+        operation_id: &OperationId,
+        action_info: &ActionInfoWithProps,
+        full_worker_logging: bool,
+    ) -> Option<(WorkerId, UnboundedSender<UpdateForWorker>, UpdateForWorker)> {
+        let start = Instant::now();
+        self.metrics
+            .find_worker_calls
+            .fetch_add(1, Ordering::Relaxed);
+
+        // ── Phase 1: async tree resolution (BEFORE write lock) ──
+        let resolved_tree = self
+            .resolve_input_tree(action_info.inner.input_root_digest)
+            .await;
+
+        // ── Phase 2: pre-compute locality scores and peer hints (BEFORE write lock) ──
+        // These are O(files × endpoints_per_blob) operations that previously
+        // ran inside the write lock, blocking all scheduler operations for
+        // 2-5ms on large actions (50K+ inputs).
+        let (endpoint_scores, peer_hints) = match (&resolved_tree, &self.locality_map) {
+            (Some(tree), Some(loc_map)) => {
+                let (scores, hints) = score_and_generate_hints(&tree.file_digests, loc_map);
+                (Some(scores), hints)
+            }
+            _ => (None, Vec::new()),
+        };
+
+        // ── Phase 3: acquire write lock, do selection + reservation ──
+        // Inside the lock we only do O(workers) work: candidate filtering,
+        // endpoint→WorkerId mapping, and state mutation.
+        let mut inner = self.inner.write().await;
+        let worker_count = inner.workers.len() as u64;
+        let result = inner.inner_find_and_reserve_worker(
+            platform_properties,
+            operation_id,
+            action_info,
+            full_worker_logging,
+            endpoint_scores.as_ref(),
+            peer_hints,
+            resolved_tree.as_deref(),
+        );
+
+        // Track workers iterated (worst case is all workers)
+        self.metrics
+            .workers_iterated
+            .fetch_add(worker_count, Ordering::Relaxed);
+
+        if result.is_some() {
+            self.metrics
+                .find_worker_hits
+                .fetch_add(1, Ordering::Relaxed);
+        } else {
+            self.metrics
+                .find_worker_misses
+                .fetch_add(1, Ordering::Relaxed);
+        }
+
+        #[allow(clippy::cast_possible_truncation)]
+        self.metrics
+            .find_worker_time_ns
+            .fetch_add(start.elapsed().as_nanos() as u64, Ordering::Relaxed);
+        result
+    }
+
+    /// Undoes a reservation made by `find_and_reserve_worker`. This must
+    /// be called if the match is abandoned after reservation (e.g., if
+    /// `assign_operation` returns an error).
+    pub async fn unreserve_worker(
+        &self,
+        worker_id: &WorkerId,
+        operation_id: &OperationId,
+    ) {
+        let mut inner = self.inner.write().await;
+        inner.inner_unreserve_worker(worker_id, operation_id);
+    }
+
+    /// Returns true if any registered worker could match the given platform
+    /// properties (static check only — does not consider dynamic resource
+    /// availability like current cpu_count).
+    pub async fn has_matching_workers(&self, platform_properties: &PlatformProperties) -> bool {
+        let inner = self.inner.read().await;
+        !inner
+            .capability_index
+            .find_matching_workers(platform_properties, false)
+            .is_empty()
+    }
+
     /// Checks to see if the worker exists in the worker pool. Should only be used in unit tests.
     #[must_use]
     pub async fn contains_worker_for_test(&self, worker_id: &WorkerId) -> bool {
-        let inner = self.inner.lock().await;
+        let inner = self.inner.read().await;
         inner.workers.contains(worker_id)
     }
 
@@ -586,12 +1206,296 @@ impl ApiWorkerScheduler {
         &self,
         worker_id: &WorkerId,
     ) -> Result<(), Error> {
-        let mut inner = self.inner.lock().await;
+        let mut inner = self.inner.write().await;
         let worker = inner.workers.get_mut(worker_id).ok_or_else(|| {
             make_input_err!("WorkerId '{}' does not exist in workers map", worker_id)
         })?;
         worker.keep_alive()
     }
+
+    /// Resolves the full input tree for the given `input_root_digest` by
+    /// reading Directory protos from the CAS store and collecting all file
+    /// digests and sizes. Results are cached in `tree_cache`.
+    ///
+    /// Returns `None` if no CAS store is configured or on any error (errors
+    /// are logged but do not fail scheduling — we just skip locality scoring).
+    ///
+    /// Runs *outside* the scheduler write lock, so multiple actions can
+    /// resolve concurrently. The `tokio::Mutex` on `tree_cache` is held
+    /// only briefly for get/put, not during store I/O.
+    async fn resolve_input_tree(
+        &self,
+        input_root_digest: DigestInfo,
+    ) -> Option<Arc<ResolvedTree>> {
+        let cas_store = self.cas_store.as_ref()?;
+
+        // Check cache first (brief lock).
+        {
+            let mut cache = self.tree_cache.lock().await;
+            if let Some(cached) = cache.get(&input_root_digest) {
+                debug!(
+                    %input_root_digest,
+                    file_count = cached.file_digests.len(),
+                    dir_count = cached.dir_digests.len(),
+                    "Tree resolution cache hit"
+                );
+                return Some(cached.clone());
+            }
+        }
+
+        // Cache miss — resolve the tree by reading Directory protos from CAS.
+        let result = resolve_tree_from_cas(cas_store, input_root_digest).await;
+        match result {
+            Ok(resolved) => {
+                debug!(
+                    %input_root_digest,
+                    file_count = resolved.file_digests.len(),
+                    dir_count = resolved.dir_digests.len(),
+                    "Resolved input tree from CAS (cache miss)"
+                );
+                let arc = Arc::new(resolved);
+                // Store in cache (brief lock).
+                {
+                    let mut cache = self.tree_cache.lock().await;
+                    cache.put(input_root_digest, arc.clone());
+                }
+                Some(arc)
+            }
+            Err(err) => {
+                warn!(
+                    %input_root_digest,
+                    ?err,
+                    "Failed to resolve input tree for locality scoring, skipping"
+                );
+                None
+            }
+        }
+    }
+}
+
+/// Resolved input tree containing file digests, directory digests,
+/// and per-subtree file byte totals for coverage scoring.
+struct ResolvedTree {
+    /// (file_digest, file_size) pairs, deduplicated.
+    file_digests: Vec<(DigestInfo, u64)>,
+    /// All directory digests in the tree (including root), deduplicated.
+    dir_digests: HashSet<DigestInfo>,
+    /// Total file bytes under each directory subtree (recursive).
+    /// Used to weight subtree coverage scoring — a subtree with 10GB
+    /// of files is worth more than one with 100 bytes.
+    subtree_bytes: HashMap<DigestInfo, u64>,
+}
+
+/// Resolves a directory tree from the CAS store by recursively reading
+/// Directory protos and collecting file digests (for locality scoring),
+/// directory digests (for subtree coverage scoring), and per-subtree
+/// file byte totals (for weighted coverage scoring). Deduplicates both
+/// file and directory digests.
+async fn resolve_tree_from_cas(
+    cas_store: &Store,
+    root_digest: DigestInfo,
+) -> Result<ResolvedTree, Error> {
+    use futures::stream::FuturesUnordered;
+    use futures::StreamExt;
+
+    let mut file_digests: Vec<(DigestInfo, u64)> = Vec::new();
+    let mut seen_files: HashSet<DigestInfo> = HashSet::new();
+    let mut dirs_to_visit: Vec<DigestInfo> = vec![root_digest];
+    let mut seen_dirs: HashSet<DigestInfo> = HashSet::new();
+    seen_dirs.insert(root_digest);
+
+    // Track tree structure for bottom-up subtree size computation.
+    let mut dir_direct_bytes: HashMap<DigestInfo, u64> = HashMap::new();
+    let mut dir_children: HashMap<DigestInfo, Vec<DigestInfo>> = HashMap::new();
+    // BFS order — used for bottom-up traversal (reverse of BFS = leaves first).
+    let mut bfs_order: Vec<DigestInfo> = vec![root_digest];
+
+    while !dirs_to_visit.is_empty() {
+        let fetches: FuturesUnordered<_> = dirs_to_visit
+            .drain(..)
+            .map(|dir_digest| {
+                let cas_store = cas_store.clone();
+                async move {
+                    let key: StoreKey<'_> = dir_digest.into();
+                    let bytes = cas_store
+                        .get_part_unchunked(key, 0, None)
+                        .await
+                        .err_tip(|| {
+                            format!(
+                                "Reading directory {dir_digest} from CAS for tree resolution"
+                            )
+                        })?;
+                    let directory = Directory::decode(bytes).map_err(|e| {
+                        make_err!(Code::Internal, "Failed to decode Directory proto: {e}")
+                    })?;
+                    Ok::<_, Error>((dir_digest, directory))
+                }
+            })
+            .collect();
+
+        let results: Vec<Result<(DigestInfo, Directory), Error>> = fetches.collect().await;
+        for result in results {
+            let (parent_digest, directory) = result?;
+
+            // Sum direct file bytes for this directory.
+            let mut direct_bytes: u64 = 0;
+            for file_node in &directory.files {
+                if let Some(ref digest) = file_node.digest {
+                    if let Ok(digest_info) = DigestInfo::try_from(digest) {
+                        let size = digest_info.size_bytes();
+                        direct_bytes += size;
+                        if seen_files.insert(digest_info) {
+                            file_digests.push((digest_info, size));
+                        }
+                    }
+                }
+            }
+            dir_direct_bytes.insert(parent_digest, direct_bytes);
+
+            // Queue subdirectories for visiting (dedup via seen_dirs).
+            let mut children = Vec::new();
+            for dir_node in &directory.directories {
+                if let Some(ref digest) = dir_node.digest {
+                    if let Ok(digest_info) = DigestInfo::try_from(digest) {
+                        children.push(digest_info);
+                        if seen_dirs.insert(digest_info) {
+                            dirs_to_visit.push(digest_info);
+                            bfs_order.push(digest_info);
+                        }
+                    }
+                }
+            }
+            dir_children.insert(parent_digest, children);
+        }
+    }
+
+    // Bottom-up pass: compute total file bytes under each subtree.
+    // Reverse BFS order gives us leaves-first, so children are always
+    // computed before parents.
+    let mut subtree_bytes: HashMap<DigestInfo, u64> = HashMap::new();
+    for &dir_digest in bfs_order.iter().rev() {
+        let direct = dir_direct_bytes.get(&dir_digest).copied().unwrap_or(0);
+        let children_total: u64 = dir_children
+            .get(&dir_digest)
+            .map(|children| {
+                children.iter()
+                    .map(|c| subtree_bytes.get(c).copied().unwrap_or(0))
+                    .sum()
+            })
+            .unwrap_or(0);
+        subtree_bytes.insert(dir_digest, direct + children_total);
+    }
+
+    Ok(ResolvedTree {
+        file_digests,
+        dir_digests: seen_dirs,
+        subtree_bytes,
+    })
+}
+
+/// Scores endpoints by the total bytes of input blobs they have cached
+/// AND generates peer hints in a single pass over the file digests,
+/// acquiring the locality map read lock only once.
+///
+/// Returns:
+/// - `HashMap<String, (u64, SystemTime)>`: endpoint scores (total cached
+///   bytes, most recent blob timestamp)
+/// - `Vec<PeerHint>`: peer hints sorted by file size descending, truncated
+///   to MAX_PEER_HINTS
+///
+/// This is called OUTSIDE the scheduler write lock, so it does not need
+/// access to `endpoint_to_worker` or the candidate set. The caller maps
+/// endpoints to WorkerIds and filters to candidates inside the lock.
+fn score_and_generate_hints(
+    file_digests: &[(DigestInfo, u64)],
+    locality_map: &SharedBlobLocalityMap,
+) -> (HashMap<String, (u64, SystemTime)>, Vec<PeerHint>) {
+    /// Maximum number of peer hints to include in a StartExecute message
+    /// to avoid oversized messages.
+    const MAX_PEER_HINTS: usize = 16384;
+
+    let map = locality_map.read();
+    let blobs = map.blobs_map();
+    let mut scores: HashMap<String, (u64, SystemTime)> = HashMap::new();
+    let mut hint_candidates: Vec<(DigestInfo, u64, Vec<String>)> = Vec::new();
+
+    for &(digest, size) in file_digests {
+        if let Some(endpoints) = blobs.get(&digest) {
+            // Accumulate endpoint scores.
+            for (endpoint, ts) in endpoints {
+                let entry = scores
+                    .entry(endpoint.to_string())
+                    .or_insert((0, UNIX_EPOCH));
+                entry.0 += size;
+                if *ts > entry.1 {
+                    entry.1 = *ts;
+                }
+            }
+            // Collect hint candidate if this digest has peer locations.
+            if !endpoints.is_empty() {
+                let peer_eps: Vec<String> =
+                    endpoints.keys().map(|e| e.to_string()).collect();
+                hint_candidates.push((digest, size, peer_eps));
+            }
+        }
+    }
+
+    // Sort by size descending to prioritize large files.
+    hint_candidates.sort_by(|a, b| b.1.cmp(&a.1));
+    hint_candidates.truncate(MAX_PEER_HINTS);
+
+    let peer_hints: Vec<PeerHint> = hint_candidates
+        .into_iter()
+        .map(|(digest, _size, peer_endpoints)| PeerHint {
+            digest: Some(digest.into()),
+            peer_endpoints,
+        })
+        .collect();
+
+    (scores, peer_hints)
+}
+
+/// Converts endpoint scores to worker scores using the endpoint-to-worker
+/// mapping, filtering to the given candidate set.
+///
+/// Returns `HashMap<WorkerId, (u64, SystemTime)>` where the tuple is
+/// (total cached bytes, most recent blob timestamp across all endpoints
+/// belonging to this worker).
+fn endpoint_scores_to_worker_scores(
+    endpoint_scores: &HashMap<String, (u64, SystemTime)>,
+    endpoint_to_worker: &HashMap<String, WorkerId>,
+    candidates: &HashSet<WorkerId>,
+) -> HashMap<WorkerId, (u64, SystemTime)> {
+    let mut worker_scores: HashMap<WorkerId, (u64, SystemTime)> = HashMap::new();
+    for (endpoint, &(score, ts)) in endpoint_scores {
+        if let Some(worker_id) = endpoint_to_worker.get(endpoint) {
+            if candidates.contains(worker_id) {
+                let entry = worker_scores
+                    .entry(worker_id.clone())
+                    .or_insert((0, UNIX_EPOCH));
+                entry.0 += score;
+                if ts > entry.1 {
+                    entry.1 = ts;
+                }
+            }
+        }
+    }
+    worker_scores
+}
+
+/// Backward-compatible wrapper used by existing tests. Scores candidate
+/// workers by the total bytes of input blobs they have cached.
+/// Returns only the byte score (drops the timestamp) for simpler assertions.
+#[cfg(test)]
+fn score_workers(
+    candidates: &HashSet<WorkerId>,
+    file_digests: &[(DigestInfo, u64)],
+    locality_map: &SharedBlobLocalityMap,
+    endpoint_to_worker: &HashMap<String, WorkerId>,
+) -> HashMap<WorkerId, u64> {
+    let (endpoint_scores, _hints) = score_and_generate_hints(file_digests, locality_map);
+    let full_scores = endpoint_scores_to_worker_scores(&endpoint_scores, endpoint_to_worker, candidates);
+    full_scores.into_iter().map(|(wid, (score, _))| (wid, score)).collect()
 }
 
 #[async_trait]
@@ -603,7 +1507,7 @@ impl WorkerScheduler for ApiWorkerScheduler {
     async fn add_worker(&self, worker: Worker) -> Result<(), Error> {
         let worker_id = worker.id.clone();
         let worker_timestamp = worker.last_update_timestamp;
-        let mut inner = self.inner.lock().await;
+        let mut inner = self.inner.write().await;
         if inner.shutting_down {
             warn!("Rejected worker add during shutdown: {}", worker_id);
             return Err(make_err!(
@@ -632,7 +1536,7 @@ impl WorkerScheduler for ApiWorkerScheduler {
         operation_id: &OperationId,
         update: UpdateOperationType,
     ) -> Result<(), Error> {
-        let mut inner = self.inner.lock().await;
+        let mut inner = self.inner.write().await;
         inner.update_action(worker_id, operation_id, update).await
     }
 
@@ -642,7 +1546,7 @@ impl WorkerScheduler for ApiWorkerScheduler {
         timestamp: WorkerTimestamp,
     ) -> Result<(), Error> {
         {
-            let mut inner = self.inner.lock().await;
+            let mut inner = self.inner.write().await;
             inner
                 .refresh_lifetime(worker_id, timestamp)
                 .err_tip(|| "Error refreshing lifetime in worker_keep_alive_received()")?;
@@ -657,7 +1561,7 @@ impl WorkerScheduler for ApiWorkerScheduler {
     async fn remove_worker(&self, worker_id: &WorkerId) -> Result<(), Error> {
         self.worker_registry.remove_worker(worker_id).await;
 
-        let mut inner = self.inner.lock().await;
+        let mut inner = self.inner.write().await;
         inner
             .immediate_evict_worker(
                 worker_id,
@@ -668,7 +1572,7 @@ impl WorkerScheduler for ApiWorkerScheduler {
     }
 
     async fn shutdown(&self, shutdown_guard: ShutdownGuard) {
-        let mut inner = self.inner.lock().await;
+        let mut inner = self.inner.write().await;
         inner.shutting_down = true; // should reject further worker registration
         while let Some(worker_id) = inner
             .workers
@@ -692,54 +1596,100 @@ impl WorkerScheduler for ApiWorkerScheduler {
     async fn remove_timedout_workers(&self, now_timestamp: WorkerTimestamp) -> Result<(), Error> {
         // Check worker liveness using both the local timestamp (from LRU)
         // and the worker registry. A worker is alive if either source says it's alive.
+        //
+        // Quarantine phase: workers that miss keepalive for > worker_timeout but
+        // < 2*worker_timeout are quarantined (stop receiving new work) rather than
+        // immediately evicted. Workers that miss keepalive for >= 2*worker_timeout
+        // are fully evicted.
         let timeout = Duration::from_secs(self.worker_timeout_s);
         let now = UNIX_EPOCH + Duration::from_secs(now_timestamp);
         let timeout_threshold = now_timestamp.saturating_sub(self.worker_timeout_s);
+        let evict_threshold = now_timestamp.saturating_sub(self.worker_timeout_s * 2);
 
-        let workers_to_check: Vec<(WorkerId, bool)> = {
-            let inner = self.inner.lock().await;
+        // Collect (worker_id, local_alive, already_quarantined) for workers that
+        // have not responded within the base timeout window.
+        let workers_to_check: Vec<(WorkerId, bool, bool)> = {
+            let inner = self.inner.read().await;
             inner
                 .workers
                 .iter()
-                .map(|(worker_id, worker)| {
+                .filter_map(|(worker_id, worker)| {
                     let local_alive = worker.last_update_timestamp > timeout_threshold;
-                    (worker_id.clone(), local_alive)
+                    if local_alive {
+                        None
+                    } else {
+                        let already_quarantined = worker.quarantined_at.is_some();
+                        // Check if past the eviction threshold (2x timeout)
+                        let past_evict_threshold =
+                            worker.last_update_timestamp <= evict_threshold;
+                        Some((worker_id.clone(), past_evict_threshold, already_quarantined))
+                    }
                 })
                 .collect()
         };
 
-        let mut worker_ids_to_remove = Vec::new();
-        for (worker_id, local_alive) in workers_to_check {
-            if local_alive {
-                continue;
-            }
+        if workers_to_check.is_empty() {
+            return Ok(());
+        }
 
+        // For each candidate, consult the registry to determine actual liveness.
+        let mut workers_to_quarantine = Vec::new();
+        let mut worker_ids_to_remove = Vec::new();
+        for (worker_id, past_evict_threshold, already_quarantined) in workers_to_check {
             let registry_alive = self
                 .worker_registry
                 .is_worker_alive(&worker_id, timeout, now)
                 .await;
 
-            if !registry_alive {
+            if registry_alive {
+                // Registry says alive — no action needed.
+                continue;
+            }
+
+            if past_evict_threshold {
+                // Has been unresponsive for >= 2x the timeout — evict.
                 trace!(
                     ?worker_id,
-                    local_alive,
-                    registry_alive,
-                    timeout_threshold,
-                    "Worker timed out - neither local nor registry shows alive"
+                    past_evict_threshold,
+                    "Worker exceeded double-timeout, evicting from pool"
                 );
                 worker_ids_to_remove.push(worker_id);
+            } else if !already_quarantined {
+                // Has been unresponsive for > timeout but < 2x timeout — quarantine.
+                trace!(
+                    ?worker_id,
+                    "Worker missed keepalive, entering quarantine (stops receiving work)"
+                );
+                workers_to_quarantine.push(worker_id);
             }
+            // If already_quarantined && !past_evict_threshold: still waiting, no action.
         }
 
-        if worker_ids_to_remove.is_empty() {
+        if workers_to_quarantine.is_empty() && worker_ids_to_remove.is_empty() {
             return Ok(());
         }
 
-        let mut inner = self.inner.lock().await;
-        let mut result = Ok(());
+        let mut inner = self.inner.write().await;
 
+        // Apply quarantine to workers that just crossed the first timeout.
+        let quarantine_time = SystemTime::now();
+        for worker_id in &workers_to_quarantine {
+            if let Some(worker) = inner.workers.peek_mut(worker_id) {
+                warn!(
+                    ?worker_id,
+                    "Worker missed keepalive, quarantining (will not receive new work)"
+                );
+                worker.quarantined_at = Some(quarantine_time);
+            }
+        }
+        // Notify the matching engine so it skips quarantined workers on next cycle.
+        if !workers_to_quarantine.is_empty() {
+            inner.worker_change_notify.notify_one();
+        }
+
+        let mut result = Ok(());
         for worker_id in &worker_ids_to_remove {
-            warn!(?worker_id, "Worker timed out, removing from pool");
+            warn!(?worker_id, "Worker timed out (2x timeout), removing from pool");
             result = result.merge(
                 inner
                     .immediate_evict_worker(
@@ -758,9 +1708,560 @@ impl WorkerScheduler for ApiWorkerScheduler {
     }
 
     async fn set_drain_worker(&self, worker_id: &WorkerId, is_draining: bool) -> Result<(), Error> {
-        let mut inner = self.inner.lock().await;
+        let mut inner = self.inner.write().await;
         inner.set_drain_worker(worker_id, is_draining).await
     }
+
+    async fn update_worker_load(&self, worker_id: &WorkerId, cpu_load_pct: u32) -> Result<(), Error> {
+        // Use peek_mut to avoid promoting the worker in the LRU cache —
+        // load updates should not affect scheduling order.
+        let mut inner = self.inner.write().await;
+        let worker = inner.workers.0.peek_mut(worker_id).ok_or_else(|| {
+            make_input_err!(
+                "Worker not found in worker map in update_worker_load() {}",
+                worker_id
+            )
+        })?;
+        worker.cpu_load_pct = cpu_load_pct;
+        debug!(%worker_id, cpu_load_pct, "Worker load updated");
+        Ok(())
+    }
+
+    async fn update_cached_directories(
+        &self,
+        worker_id: &WorkerId,
+        digests: HashSet<DigestInfo>,
+    ) -> Result<(), Error> {
+        let mut inner = self.inner.write().await;
+        let worker = inner.workers.0.peek_mut(worker_id).ok_or_else(|| {
+            make_input_err!(
+                "Worker not found in worker map in update_cached_directories() {}",
+                worker_id
+            )
+        })?;
+        let count = digests.len();
+        worker.cached_directory_digests = digests;
+        debug!(%worker_id, count, "Worker cached directory digests updated");
+        Ok(())
+    }
+
+    async fn update_cached_subtrees(
+        &self,
+        worker_id: &WorkerId,
+        is_full_snapshot: bool,
+        full_set: Vec<DigestInfo>,
+        added: Vec<DigestInfo>,
+        removed: Vec<DigestInfo>,
+    ) -> Result<(), Error> {
+        let mut inner = self.inner.write().await;
+        let worker = inner.workers.0.peek_mut(worker_id).ok_or_else(|| {
+            make_input_err!(
+                "Worker not found in worker map in update_cached_subtrees() {}",
+                worker_id
+            )
+        })?;
+        if is_full_snapshot {
+            let count = full_set.len();
+            worker.cached_subtree_digests = full_set.into_iter().collect();
+            debug!(%worker_id, count, "Worker cached subtree digests replaced (full snapshot)");
+        } else {
+            let added_count = added.len();
+            let removed_count = removed.len();
+            for digest in added {
+                worker.cached_subtree_digests.insert(digest);
+            }
+            for digest in &removed {
+                worker.cached_subtree_digests.remove(digest);
+            }
+            let total = worker.cached_subtree_digests.len();
+            debug!(
+                %worker_id,
+                added_count,
+                removed_count,
+                total,
+                "Worker cached subtree digests updated (delta)"
+            );
+        }
+        Ok(())
+    }
 }
 
 impl RootMetricsComponent for ApiWorkerScheduler {}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashSet;
+    use bytes::Bytes;
+    use nativelink_config::stores::MemorySpec;
+    use nativelink_proto::build::bazel::remote::execution::v2::{
+        Digest as ProtoDigest, DirectoryNode, FileNode,
+    };
+    use nativelink_store::memory_store::MemoryStore;
+    use nativelink_util::blob_locality_map::new_shared_blob_locality_map;
+    use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc};
+
+    /// Helper: encode a Directory proto and compute its DigestInfo (SHA256).
+    fn encode_directory(dir: &Directory) -> (Vec<u8>, DigestInfo) {
+        let dir_bytes = dir.encode_to_vec();
+        let mut hasher = DigestHasherFunc::Sha256.hasher();
+        hasher.update(&dir_bytes);
+        let digest_info = hasher.finalize_digest();
+        (dir_bytes, digest_info)
+    }
+
+    /// Helper: create a FileNode with a deterministic fake digest.
+    fn make_file_node(name: &str, hash_byte: u8, size: i64) -> FileNode {
+        FileNode {
+            name: name.to_string(),
+            digest: Some(ProtoDigest {
+                hash: format!("{:02x}", hash_byte).repeat(32), // 64-char hex
+                size_bytes: size,
+                ..Default::default()
+            }),
+            ..Default::default()
+        }
+    }
+
+    #[test]
+    fn test_score_workers_basic() {
+        let locality_map = new_shared_blob_locality_map();
+        let d1 = DigestInfo::new([1u8; 32], 1000);
+        let d2 = DigestInfo::new([2u8; 32], 2000);
+        let d3 = DigestInfo::new([3u8; 32], 3000);
+
+        // worker-a has d1 and d2 (3000 bytes total)
+        // worker-b has d2 and d3 (5000 bytes total)
+        {
+            let mut map = locality_map.write();
+            map.register_blobs("grpc://worker-a:50081", &[d1, d2]);
+            map.register_blobs("grpc://worker-b:50081", &[d2, d3]);
+        }
+
+        let worker_a = WorkerId::from("worker-a-id".to_string());
+        let worker_b = WorkerId::from("worker-b-id".to_string());
+
+        let mut endpoint_to_worker = HashMap::new();
+        endpoint_to_worker.insert("grpc://worker-a:50081".to_string(), worker_a.clone());
+        endpoint_to_worker.insert("grpc://worker-b:50081".to_string(), worker_b.clone());
+
+        let mut candidates = HashSet::new();
+        candidates.insert(worker_a.clone());
+        candidates.insert(worker_b.clone());
+
+        let file_digests = vec![(d1, 1000), (d2, 2000), (d3, 3000)];
+
+        let scores = score_workers(&candidates, &file_digests, &locality_map, &endpoint_to_worker);
+
+        assert_eq!(scores.get(&worker_a), Some(&3000)); // d1(1000) + d2(2000)
+        assert_eq!(scores.get(&worker_b), Some(&5000)); // d2(2000) + d3(3000)
+    }
+
+    #[test]
+    fn test_score_workers_non_candidate_excluded() {
+        let locality_map = new_shared_blob_locality_map();
+        let d1 = DigestInfo::new([1u8; 32], 1000);
+
+        {
+            let mut map = locality_map.write();
+            map.register_blobs("grpc://worker-a:50081", &[d1]);
+        }
+
+        let worker_a = WorkerId::from("worker-a-id".to_string());
+        let mut endpoint_to_worker = HashMap::new();
+        endpoint_to_worker.insert("grpc://worker-a:50081".to_string(), worker_a.clone());
+
+        // worker_a is NOT in candidates
+        let candidates = HashSet::new();
+        let file_digests = vec![(d1, 1000)];
+
+        let scores = score_workers(&candidates, &file_digests, &locality_map, &endpoint_to_worker);
+        assert!(scores.is_empty());
+    }
+
+    #[test]
+    fn test_score_workers_empty_locality_map() {
+        let locality_map = new_shared_blob_locality_map();
+        let d1 = DigestInfo::new([1u8; 32], 1000);
+
+        let worker_a = WorkerId::from("worker-a-id".to_string());
+        let mut candidates = HashSet::new();
+        candidates.insert(worker_a.clone());
+
+        let endpoint_to_worker = HashMap::new();
+        let file_digests = vec![(d1, 1000)];
+
+        let scores = score_workers(&candidates, &file_digests, &locality_map, &endpoint_to_worker);
+        assert!(scores.is_empty());
+    }
+
+    // ---------------------------------------------------------------
+    // resolve_tree_from_cas tests
+    // ---------------------------------------------------------------
+
+    #[tokio::test]
+    async fn test_resolve_tree_single_directory() {
+        // A single directory with 3 files, no subdirectories.
+        let dir = Directory {
+            files: vec![
+                make_file_node("file1.txt", 0xaa, 1000),
+                make_file_node("file2.txt", 0xbb, 2000),
+                make_file_node("file3.txt", 0xcc, 3000),
+            ],
+            directories: vec![],
+            ..Default::default()
+        };
+
+        let (dir_bytes, dir_digest) = encode_directory(&dir);
+        let store = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let key: StoreKey<'_> = dir_digest.into();
+        store
+            .update_oneshot(key, Bytes::from(dir_bytes))
+            .await
+            .expect("store update_oneshot failed");
+
+        let result = resolve_tree_from_cas(&store, dir_digest)
+            .await
+            .expect("resolve_tree_from_cas failed");
+
+        assert_eq!(result.file_digests.len(), 3, "Expected 3 file digests");
+        assert_eq!(result.dir_digests.len(), 1, "Expected 1 directory digest (root)");
+        assert!(result.dir_digests.contains(&dir_digest));
+
+        // Root subtree contains all files: 1000+2000+3000 = 6000
+        assert_eq!(result.subtree_bytes.get(&dir_digest), Some(&6000));
+
+        // Verify all three sizes are present (order may vary).
+        let mut sizes: Vec<u64> = result.file_digests.iter().map(|&(_, s)| s).collect();
+        sizes.sort();
+        assert_eq!(sizes, vec![1000, 2000, 3000]);
+    }
+
+    #[tokio::test]
+    async fn test_resolve_tree_nested_directories() {
+        // Subdirectory with 2 files.
+        let sub_dir = Directory {
+            files: vec![
+                make_file_node("sub_file1.txt", 0x11, 500),
+                make_file_node("sub_file2.txt", 0x22, 700),
+            ],
+            directories: vec![],
+            ..Default::default()
+        };
+        let (sub_dir_bytes, sub_dir_digest) = encode_directory(&sub_dir);
+
+        // Root directory with 1 file and a reference to the subdirectory.
+        let root_dir = Directory {
+            files: vec![make_file_node("root_file.txt", 0x33, 1200)],
+            directories: vec![DirectoryNode {
+                name: "subdir".to_string(),
+                digest: Some(sub_dir_digest.into()),
+            }],
+            ..Default::default()
+        };
+        let (root_dir_bytes, root_dir_digest) = encode_directory(&root_dir);
+
+        let store = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let root_key: StoreKey<'_> = root_dir_digest.into();
+        store
+            .update_oneshot(root_key, Bytes::from(root_dir_bytes))
+            .await
+            .expect("store root dir");
+        let sub_key: StoreKey<'_> = sub_dir_digest.into();
+        store
+            .update_oneshot(sub_key, Bytes::from(sub_dir_bytes))
+            .await
+            .expect("store sub dir");
+
+        let result = resolve_tree_from_cas(&store, root_dir_digest)
+            .await
+            .expect("resolve_tree_from_cas failed");
+
+        assert_eq!(result.file_digests.len(), 3, "Expected 3 files (1 root + 2 subdir)");
+        assert_eq!(result.dir_digests.len(), 2, "Expected 2 directory digests (root + subdir)");
+        assert!(result.dir_digests.contains(&root_dir_digest));
+        assert!(result.dir_digests.contains(&sub_dir_digest));
+
+        // subdir has 500+700=1200 bytes of files
+        assert_eq!(result.subtree_bytes.get(&sub_dir_digest), Some(&1200));
+        // root has 1200 (own file) + 1200 (subdir subtree) = 2400
+        assert_eq!(result.subtree_bytes.get(&root_dir_digest), Some(&2400));
+
+        let mut sizes: Vec<u64> = result.file_digests.iter().map(|&(_, s)| s).collect();
+        sizes.sort();
+        assert_eq!(sizes, vec![500, 700, 1200]);
+    }
+
+    #[tokio::test]
+    async fn test_resolve_tree_deduplicates_files() {
+        // Two directories both referencing the same file digest.
+        let shared_file = make_file_node("shared.txt", 0xdd, 999);
+
+        let sub_dir = Directory {
+            files: vec![shared_file.clone()],
+            directories: vec![],
+            ..Default::default()
+        };
+        let (sub_dir_bytes, sub_dir_digest) = encode_directory(&sub_dir);
+
+        let root_dir = Directory {
+            files: vec![
+                // Same digest as the file in sub_dir (same hash_byte 0xdd, same size).
+                make_file_node("also_shared.txt", 0xdd, 999),
+            ],
+            directories: vec![DirectoryNode {
+                name: "subdir".to_string(),
+                digest: Some(sub_dir_digest.into()),
+            }],
+            ..Default::default()
+        };
+        let (root_dir_bytes, root_dir_digest) = encode_directory(&root_dir);
+
+        let store = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let root_key: StoreKey<'_> = root_dir_digest.into();
+        store
+            .update_oneshot(root_key, Bytes::from(root_dir_bytes))
+            .await
+            .expect("store root dir");
+        let sub_key: StoreKey<'_> = sub_dir_digest.into();
+        store
+            .update_oneshot(sub_key, Bytes::from(sub_dir_bytes))
+            .await
+            .expect("store sub dir");
+
+        let result = resolve_tree_from_cas(&store, root_dir_digest)
+            .await
+            .expect("resolve_tree_from_cas failed");
+
+        // The same digest should appear only once.
+        assert_eq!(
+            result.file_digests.len(),
+            1,
+            "Duplicate file digest should be deduplicated"
+        );
+        assert_eq!(result.file_digests[0].1, 999);
+        assert_eq!(result.dir_digests.len(), 2, "Expected root + subdir");
+        assert!(result.dir_digests.contains(&root_dir_digest));
+        assert!(result.dir_digests.contains(&sub_dir_digest));
+
+        // Both dirs have the same file (999 bytes) — subtree_bytes counts
+        // each occurrence (not deduplicated, since it's per-directory).
+        assert_eq!(result.subtree_bytes.get(&sub_dir_digest), Some(&999));
+        assert_eq!(result.subtree_bytes.get(&root_dir_digest), Some(&1998)); // 999 + 999
+    }
+
+    #[tokio::test]
+    async fn test_resolve_tree_circular_directory() {
+        // A true hash cycle (A->B->A) is impossible with content-addressed
+        // hashes: the digest of A depends on B's digest and vice versa.
+        // Instead, we test the seen_dirs guard with a diamond structure:
+        //   root -> {dir_left, dir_right}, both -> dir_shared
+        // Without the seen_dirs set, dir_shared would be visited twice.
+        let dir_shared = Directory {
+            files: vec![make_file_node("shared.txt", 0x11, 100)],
+            directories: vec![],
+            ..Default::default()
+        };
+        let (shared_bytes, shared_digest) = encode_directory(&dir_shared);
+
+        let dir_left = Directory {
+            files: vec![make_file_node("left.txt", 0x22, 200)],
+            directories: vec![DirectoryNode {
+                name: "shared".to_string(),
+                digest: Some(shared_digest.into()),
+            }],
+            ..Default::default()
+        };
+        let (left_bytes, left_digest) = encode_directory(&dir_left);
+
+        let dir_right = Directory {
+            files: vec![make_file_node("right.txt", 0x33, 300)],
+            directories: vec![DirectoryNode {
+                name: "shared".to_string(),
+                digest: Some(shared_digest.into()),
+            }],
+            ..Default::default()
+        };
+        let (right_bytes, right_digest) = encode_directory(&dir_right);
+
+        let root = Directory {
+            files: vec![],
+            directories: vec![
+                DirectoryNode {
+                    name: "left".to_string(),
+                    digest: Some(left_digest.into()),
+                },
+                DirectoryNode {
+                    name: "right".to_string(),
+                    digest: Some(right_digest.into()),
+                },
+            ],
+            ..Default::default()
+        };
+        let (root_bytes, root_digest) = encode_directory(&root);
+
+        let store = Store::new(MemoryStore::new(&MemorySpec::default()));
+        for (bytes, digest) in [
+            (root_bytes, root_digest),
+            (left_bytes, left_digest),
+            (right_bytes, right_digest),
+            (shared_bytes, shared_digest),
+        ] {
+            let key: StoreKey<'_> = digest.into();
+            store
+                .update_oneshot(key, Bytes::from(bytes))
+                .await
+                .expect("store update");
+        }
+
+        let result = resolve_tree_from_cas(&store, root_digest)
+            .await
+            .expect("resolve_tree_from_cas failed");
+
+        // dir_shared is referenced by both dir_left and dir_right, but
+        // seen_dirs ensures it's only visited once. Files: shared(0x11),
+        // left(0x22), right(0x33) — all unique digests, so 3 total.
+        assert_eq!(
+            result.file_digests.len(),
+            3,
+            "Diamond structure: shared dir visited once, 3 unique files"
+        );
+        // 4 directories: root, left, right, shared
+        assert_eq!(result.dir_digests.len(), 4, "Expected 4 directory digests");
+        assert!(result.dir_digests.contains(&root_digest));
+        assert!(result.dir_digests.contains(&left_digest));
+        assert!(result.dir_digests.contains(&right_digest));
+        assert!(result.dir_digests.contains(&shared_digest));
+
+        // shared: 100 bytes (its own file)
+        assert_eq!(result.subtree_bytes.get(&shared_digest), Some(&100));
+        // left: 200 (own) + 100 (shared) = 300
+        assert_eq!(result.subtree_bytes.get(&left_digest), Some(&300));
+        // right: 300 (own) + 100 (shared) = 400
+        assert_eq!(result.subtree_bytes.get(&right_digest), Some(&400));
+        // root: 0 (no own files) + 300 (left) + 400 (right) = 700
+        assert_eq!(result.subtree_bytes.get(&root_digest), Some(&700));
+
+        let mut sizes: Vec<u64> = result.file_digests.iter().map(|&(_, s)| s).collect();
+        sizes.sort();
+        assert_eq!(sizes, vec![100, 200, 300]);
+    }
+
+    #[tokio::test]
+    async fn test_resolve_tree_missing_directory() {
+        // Attempt to resolve a digest that doesn't exist in the store.
+        let store = Store::new(MemoryStore::new(&MemorySpec::default()));
+
+        let missing_digest = DigestInfo::new([0xff; 32], 42);
+        let result = resolve_tree_from_cas(&store, missing_digest).await;
+
+        assert!(
+            result.is_err(),
+            "Should return an error for a missing directory"
+        );
+    }
+
+    #[test]
+    fn test_score_workers_empty_file_list() {
+        let locality_map = new_shared_blob_locality_map();
+
+        // Even with data in the locality map, empty file_digests => empty scores.
+        {
+            let mut map = locality_map.write();
+            let d1 = DigestInfo::new([1u8; 32], 1000);
+            map.register_blobs("grpc://worker-a:50081", &[d1]);
+        }
+
+        let worker_a = WorkerId::from("worker-a-id".to_string());
+        let mut endpoint_to_worker = HashMap::new();
+        endpoint_to_worker.insert("grpc://worker-a:50081".to_string(), worker_a.clone());
+
+        let mut candidates = HashSet::new();
+        candidates.insert(worker_a);
+
+        let file_digests: Vec<(DigestInfo, u64)> = vec![];
+
+        let scores = score_workers(&candidates, &file_digests, &locality_map, &endpoint_to_worker);
+        assert!(
+            scores.is_empty(),
+            "Expected empty scores for empty file_digests, got {scores:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_resolve_input_tree_cache_hit_returns_same_arc() {
+        use nativelink_config::schedulers::WorkerAllocationStrategy;
+        use nativelink_metric::MetricsComponent;
+        use nativelink_util::operation_state_manager::{UpdateOperationType, WorkerStateManager};
+        use crate::platform_property_manager::PlatformPropertyManager;
+        use crate::worker_registry::WorkerRegistry;
+
+        // Minimal mock WorkerStateManager for constructing ApiWorkerScheduler.
+        #[derive(Debug)]
+        struct NoopWorkerStateManager;
+
+        impl MetricsComponent for NoopWorkerStateManager {
+            fn publish(
+                &self,
+                _kind: MetricKind,
+                _field_metadata: MetricFieldData,
+            ) -> Result<MetricPublishKnownKindData, nativelink_metric::Error> {
+                Ok(MetricPublishKnownKindData::Component)
+            }
+        }
+
+        #[tonic::async_trait]
+        impl WorkerStateManager for NoopWorkerStateManager {
+            async fn update_operation(
+                &self,
+                _operation_id: &OperationId,
+                _worker_id: &WorkerId,
+                _update: UpdateOperationType,
+            ) -> Result<(), Error> {
+                Ok(())
+            }
+        }
+
+        // Create a store with a single-directory tree (one file).
+        let store = Store::new(MemoryStore::new(&MemorySpec::default()));
+
+        let dir = Directory {
+            files: vec![make_file_node("test.txt", 0xaa, 1000)],
+            directories: vec![],
+            ..Default::default()
+        };
+        let (dir_bytes, dir_digest) = encode_directory(&dir);
+        let key: StoreKey<'_> = dir_digest.into();
+        store
+            .update_oneshot(key, Bytes::from(dir_bytes))
+            .await
+            .expect("store update");
+
+        // Build scheduler with CAS store.
+        let scheduler = ApiWorkerScheduler::new_with_locality_map(
+            Arc::new(NoopWorkerStateManager),
+            Arc::new(PlatformPropertyManager::new(HashMap::new())),
+            WorkerAllocationStrategy::default(),
+            Arc::new(Notify::new()),
+            100,
+            Arc::new(WorkerRegistry::new()),
+            None,
+            Some(store),
+        );
+
+        // First call: cache miss, resolves from CAS.
+        let result1 = scheduler.resolve_input_tree(dir_digest).await;
+        assert!(result1.is_some(), "Expected Some from first resolve");
+
+        // Second call: cache hit, should return the same Arc.
+        let result2 = scheduler.resolve_input_tree(dir_digest).await;
+        assert!(result2.is_some(), "Expected Some from second resolve");
+
+        let arc1 = result1.unwrap();
+        let arc2 = result2.unwrap();
+        assert!(
+            Arc::ptr_eq(&arc1, &arc2),
+            "Expected resolve_input_tree to return the same Arc on cache hit (pointer equality)"
+        );
+    }
+}
diff --git a/nativelink-scheduler/src/awaited_action_db/awaited_action.rs b/nativelink-scheduler/src/awaited_action_db/awaited_action.rs
index 337c354e0..ab8abc14d 100644
--- a/nativelink-scheduler/src/awaited_action_db/awaited_action.rs
+++ b/nativelink-scheduler/src/awaited_action_db/awaited_action.rs
@@ -163,6 +163,12 @@ impl AwaitedAction {
         self.sort_key
     }
 
+    /// Boost this action to maximum priority so it is scheduled next.
+    /// Used for retrying infrastructure failures (e.g. OOM/SIGKILL).
+    pub(crate) fn boost_priority(&mut self) {
+        self.sort_key = AwaitedActionSortKey::new(i32::MAX, 0);
+    }
+
     pub const fn state(&self) -> &Arc<ActionState> {
         &self.state
     }
diff --git a/nativelink-scheduler/src/default_scheduler_factory.rs b/nativelink-scheduler/src/default_scheduler_factory.rs
index 58e27605b..fc9fd3c7e 100644
--- a/nativelink-scheduler/src/default_scheduler_factory.rs
+++ b/nativelink-scheduler/src/default_scheduler_factory.rs
@@ -23,6 +23,7 @@ use nativelink_error::{Error, ResultExt, make_input_err};
 use nativelink_proto::com::github::trace_machina::nativelink::events::OriginEvent;
 use nativelink_store::redis_store::RedisStore;
 use nativelink_store::store_manager::StoreManager;
+use nativelink_util::blob_locality_map::SharedBlobLocalityMap;
 use nativelink_util::instant_wrapper::InstantWrapper;
 use nativelink_util::operation_state_manager::ClientStateManager;
 use redis::aio::{ConnectionManager, PubSub};
@@ -49,18 +50,20 @@ pub fn scheduler_factory(
     spec: &SchedulerSpec,
     store_manager: &StoreManager,
     maybe_origin_event_tx: Option<&mpsc::Sender<OriginEvent>>,
+    locality_map: Option<SharedBlobLocalityMap>,
 ) -> Result<SchedulerFactoryResults, Error> {
-    inner_scheduler_factory(spec, store_manager, maybe_origin_event_tx)
+    inner_scheduler_factory(spec, store_manager, maybe_origin_event_tx, locality_map)
 }
 
 fn inner_scheduler_factory(
     spec: &SchedulerSpec,
     store_manager: &StoreManager,
     maybe_origin_event_tx: Option<&mpsc::Sender<OriginEvent>>,
+    locality_map: Option<SharedBlobLocalityMap>,
 ) -> Result<SchedulerFactoryResults, Error> {
     let scheduler: SchedulerFactoryResults = match spec {
         SchedulerSpec::Simple(spec) => {
-            simple_scheduler_factory(spec, store_manager, SystemTime::now, maybe_origin_event_tx)?
+            simple_scheduler_factory(spec, store_manager, SystemTime::now, maybe_origin_event_tx, locality_map)?
         }
         SchedulerSpec::Grpc(spec) => (Some(Arc::new(GrpcScheduler::new(spec)?)), None),
         SchedulerSpec::CacheLookup(spec) => {
@@ -68,7 +71,7 @@ fn inner_scheduler_factory(
                 .get_store(&spec.ac_store)
                 .err_tip(|| format!("'ac_store': '{}' does not exist", spec.ac_store))?;
             let (action_scheduler, worker_scheduler) =
-                inner_scheduler_factory(&spec.scheduler, store_manager, maybe_origin_event_tx)
+                inner_scheduler_factory(&spec.scheduler, store_manager, maybe_origin_event_tx, locality_map.clone())
                     .err_tip(|| "In nested CacheLookupScheduler construction")?;
             let cache_lookup_scheduler = Arc::new(CacheLookupScheduler::new(
                 ac_store,
@@ -78,7 +81,7 @@ fn inner_scheduler_factory(
         }
         SchedulerSpec::PropertyModifier(spec) => {
             let (action_scheduler, worker_scheduler) =
-                inner_scheduler_factory(&spec.scheduler, store_manager, maybe_origin_event_tx)
+                inner_scheduler_factory(&spec.scheduler, store_manager, maybe_origin_event_tx, locality_map.clone())
                     .err_tip(|| "In nested PropertyModifierScheduler construction")?;
             let property_modifier_scheduler = Arc::new(PropertyModifierScheduler::new(
                 spec,
@@ -96,7 +99,19 @@ fn simple_scheduler_factory(
     store_manager: &StoreManager,
     now_fn: fn() -> SystemTime,
     maybe_origin_event_tx: Option<&mpsc::Sender<OriginEvent>>,
+    locality_map: Option<SharedBlobLocalityMap>,
 ) -> Result<SchedulerFactoryResults, Error> {
+    // Resolve the CAS store for locality-aware scheduling if configured.
+    let cas_store = if let Some(ref cas_store_name) = spec.cas_store {
+        Some(
+            store_manager
+                .get_store(cas_store_name)
+                .err_tip(|| format!("'cas_store': '{cas_store_name}' does not exist"))?,
+        )
+    } else {
+        None
+    };
+
     match spec
         .experimental_backend
         .as_ref()
@@ -109,11 +124,13 @@ fn simple_scheduler_factory(
                 &task_change_notify,
                 SystemTime::now,
             );
-            let (action_scheduler, worker_scheduler) = SimpleScheduler::new(
+            let (action_scheduler, worker_scheduler) = SimpleScheduler::new_with_cas_store(
                 spec,
                 awaited_action_db,
                 task_change_notify,
                 maybe_origin_event_tx.cloned(),
+                cas_store,
+                locality_map,
             );
             Ok((Some(action_scheduler), Some(worker_scheduler)))
         }
@@ -143,11 +160,13 @@ fn simple_scheduler_factory(
                 Default::default,
             )
             .err_tip(|| "In state_manager_factory::redis_state_manager")?;
-            let (action_scheduler, worker_scheduler) = SimpleScheduler::new(
+            let (action_scheduler, worker_scheduler) = SimpleScheduler::new_with_cas_store(
                 spec,
                 awaited_action_db,
                 task_change_notify,
                 maybe_origin_event_tx.cloned(),
+                cas_store,
+                locality_map,
             );
             Ok((Some(action_scheduler), Some(worker_scheduler)))
         }
diff --git a/nativelink-scheduler/src/memory_awaited_action_db.rs b/nativelink-scheduler/src/memory_awaited_action_db.rs
index 6154bd17e..ac62b7dce 100644
--- a/nativelink-scheduler/src/memory_awaited_action_db.rs
+++ b/nativelink-scheduler/src/memory_awaited_action_db.rs
@@ -286,7 +286,7 @@ impl SortedAwaitedActions {
             operation_id: new_awaited_action.operation_id().clone(),
         });
 
-        let Some(sorted_awaited_action) = maybe_sorted_awaited_action else {
+        let Some(mut sorted_awaited_action) = maybe_sorted_awaited_action else {
             return Err(make_err!(
                 Code::Internal,
                 "sorted_action_info_hash_keys and action_info_hash_key_to_awaited_action are out of sync - {} - {:?}",
@@ -295,6 +295,13 @@ impl SortedAwaitedActions {
             ));
         };
 
+        // Update sort_key to match the new awaited action. Without this,
+        // boost_priority() (used during SIGKILL retry) changes the sort_key
+        // on the AwaitedAction stored in the watch channel, but the BTree
+        // entry retains the old sort_key, causing all subsequent lookups to
+        // fail with "out of sync".
+        sorted_awaited_action.sort_key = new_awaited_action.sort_key();
+
         self.insert_sort_map_for_stage(&new_awaited_action.state().stage, &sorted_awaited_action)
             .err_tip(|| "In AwaitedActionDb::update_awaited_action")?;
         Ok(())
@@ -417,14 +424,19 @@ impl<I: InstantWrapper, NowFn: Fn() -> I + Clone + Send + Sync> AwaitedActionDbI
                     debug!(%operation_id, "Clearing operation from state manager");
                     let awaited_action = tx.borrow().clone();
                     // Cleanup action_info_hash_key_to_awaited_action if it was marked cached.
+                    // Only remove the entry if it still points to THIS operation.
+                    // A newer operation may have claimed this key slot if the
+                    // action completed and was re-requested before this cleanup ran.
                     match &awaited_action.action_info().unique_qualifier {
                         ActionUniqueQualifier::Cacheable(action_key) => {
-                            let maybe_awaited_action = self
+                            let dominated_by_self = self
                                 .action_info_hash_key_to_awaited_action
-                                .remove(action_key);
-                            if !awaited_action.state().stage.is_finished()
-                                && maybe_awaited_action.is_none()
-                            {
+                                .get(action_key)
+                                .map_or(false, |mapped_op_id| *mapped_op_id == operation_id);
+                            if dominated_by_self {
+                                self.action_info_hash_key_to_awaited_action
+                                    .remove(action_key);
+                            } else if !awaited_action.state().stage.is_finished() {
                                 error!(
                                     %operation_id,
                                     ?awaited_action,
@@ -552,18 +564,22 @@ impl<I: InstantWrapper, NowFn: Fn() -> I + Clone + Send + Sync> AwaitedActionDbI
         }
         match &new_awaited_action.action_info().unique_qualifier {
             ActionUniqueQualifier::Cacheable(action_key) => {
-                let maybe_awaited_action =
-                    action_info_hash_key_to_awaited_action.remove(action_key);
-                match maybe_awaited_action {
-                    Some(removed_operation_id) => {
-                        if &removed_operation_id != new_awaited_action.operation_id() {
-                            error!(
-                                ?removed_operation_id,
-                                ?new_awaited_action,
-                                ?action_key,
-                                "action_info_hash_key_to_awaited_action and operation_id_to_awaited_action are out of sync",
-                            );
-                        }
+                // Only remove the entry if it belongs to this operation.
+                // A newer operation may have claimed this key slot if the
+                // original was cleaned up and re-requested.
+                match action_info_hash_key_to_awaited_action.get(action_key) {
+                    Some(mapped_operation_id)
+                        if mapped_operation_id == new_awaited_action.operation_id() =>
+                    {
+                        action_info_hash_key_to_awaited_action.remove(action_key);
+                    }
+                    Some(mapped_operation_id) => {
+                        error!(
+                            ?mapped_operation_id,
+                            ?new_awaited_action,
+                            ?action_key,
+                            "action_info_hash_key_to_awaited_action points to a different operation_id",
+                        );
                     }
                     None => {
                         error!(
@@ -702,6 +718,20 @@ impl<I: InstantWrapper, NowFn: Fn() -> I + Clone + Send + Sync> AwaitedActionDbI
             }
         }
 
+        // Log orphaned completed actions (no active WaitExecution subscriber).
+        // These are typically from Bazel dynamic execution where the local leg
+        // won and the client dropped the remote stream.
+        if matches!(
+            new_awaited_action.state().stage,
+            ActionStage::Completed(_) | ActionStage::CompletedFromCache(_)
+        ) && tx.receiver_count() == 0
+        {
+            debug!(
+                operation_id = ?new_awaited_action.operation_id(),
+                "Completed action has no subscribers (likely orphaned dynamic execution)",
+            );
+        }
+
         // Notify all listeners of the new state and ignore if no one is listening.
         // Note: Do not use `.send()` as it will not update the state if all listeners
         // are dropped.
diff --git a/nativelink-scheduler/src/simple_scheduler.rs b/nativelink-scheduler/src/simple_scheduler.rs
index d977fceea..dfdb27059 100644
--- a/nativelink-scheduler/src/simple_scheduler.rs
+++ b/nativelink-scheduler/src/simple_scheduler.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::{BTreeSet, HashMap};
+use std::collections::{BTreeSet, HashMap, HashSet};
 use std::sync::Arc;
 use std::time::{Instant, SystemTime};
 
@@ -23,6 +23,7 @@ use nativelink_error::{Code, Error, ResultExt};
 use nativelink_metric::{MetricsComponent, RootMetricsComponent};
 use nativelink_proto::com::github::trace_machina::nativelink::events::OriginEvent;
 use nativelink_util::action_messages::{ActionInfo, ActionState, OperationId, WorkerId};
+use nativelink_util::common::DigestInfo;
 use nativelink_util::instant_wrapper::InstantWrapper;
 use nativelink_util::known_platform_property_provider::KnownPlatformPropertyProvider;
 use nativelink_util::operation_state_manager::{
@@ -30,6 +31,7 @@ use nativelink_util::operation_state_manager::{
     OperationFilter, OperationStageFlags, OrderDirection, UpdateOperationType,
 };
 use nativelink_util::origin_event::OriginMetadata;
+use nativelink_util::platform_properties::PlatformProperties;
 use nativelink_util::shutdown_guard::ShutdownGuard;
 use nativelink_util::spawn;
 use nativelink_util::task::JoinHandleDropGuard;
@@ -39,7 +41,7 @@ use opentelemetry::context::{Context, FutureExt as OtelFutureExt};
 use opentelemetry_semantic_conventions::attribute::ENDUSER_ID;
 use tokio::sync::{Notify, mpsc};
 use tokio::time::Duration;
-use tracing::{debug, error, info, info_span, warn};
+use tracing::{debug, error, info_span, warn};
 
 use crate::api_worker_scheduler::ApiWorkerScheduler;
 use crate::awaited_action_db::{AwaitedActionDb, CLIENT_KEEPALIVE_DURATION};
@@ -51,7 +53,9 @@ use crate::worker_scheduler::WorkerScheduler;
 
 /// Default timeout for workers in seconds.
 /// If this changes, remember to change the documentation in the config.
-const DEFAULT_WORKER_TIMEOUT_S: u64 = 5;
+/// A 5-second timeout causes unnecessary worker churn on any brief network
+/// hiccup or GC pause, so we use a more generous default.
+const DEFAULT_WORKER_TIMEOUT_S: u64 = 30;
 
 /// Mark operations as completed with error if no client has updated them
 /// within this duration.
@@ -146,6 +150,11 @@ pub struct SimpleScheduler {
     /// e.g. "worker busy", "can't find any worker"
     /// Set to None to disable. This is quite noisy, so we limit it
     worker_match_logging_interval: Option<Duration>,
+
+    /// Maximum number of actions that can be matched per client
+    /// (identified by `instance_name`) in one matching cycle.
+    /// 0 means unlimited (fair scheduling disabled).
+    max_matches_per_client_per_cycle: usize,
 }
 
 impl core::fmt::Debug for SimpleScheduler {
@@ -216,98 +225,31 @@ impl SimpleScheduler {
     // can create a map of capabilities of each worker and then try and match
     // the actions to the worker using the map lookup (ie. map reduce).
     async fn do_try_match(&self, full_worker_logging: bool) -> Result<(), Error> {
-        async fn match_action_to_worker(
-            action_state_result: &dyn ActionStateResult,
-            workers: &ApiWorkerScheduler,
-            matching_engine_state_manager: &dyn MatchingEngineStateManager,
-            platform_property_manager: &PlatformPropertyManager,
-            full_worker_logging: bool,
-        ) -> Result<(), Error> {
-            let (action_info, maybe_origin_metadata) =
-                action_state_result
-                    .as_action_info()
-                    .await
-                    .err_tip(|| "Failed to get action_info from as_action_info_result stream")?;
-
-            // TODO(palfrey) We should not compute this every time and instead store
-            // it with the ActionInfo when we receive it.
-            let platform_properties = platform_property_manager
-                .make_platform_properties(action_info.platform_properties.clone())
-                .err_tip(
-                    || "Failed to make platform properties in SimpleScheduler::do_try_match",
-                )?;
-
-            let action_info = ActionInfoWithProps {
-                inner: action_info,
-                platform_properties,
-            };
-
-            // Try to find a worker for the action.
-            let worker_id = {
-                match workers
-                    .find_worker_for_action(&action_info.platform_properties, full_worker_logging)
-                    .await
-                {
-                    Some(worker_id) => worker_id,
-                    // If we could not find a worker for the action,
-                    // we have nothing to do.
-                    None => return Ok(()),
-                }
-            };
-
-            let attach_operation_fut = async move {
-                // Extract the operation_id from the action_state.
-                let operation_id = {
-                    let (action_state, _origin_metadata) = action_state_result
-                        .as_state()
-                        .await
-                        .err_tip(|| "Failed to get action_info from as_state_result stream")?;
-                    action_state.client_operation_id.clone()
-                };
-
-                // Tell the matching engine that the operation is being assigned to a worker.
-                let assign_result = matching_engine_state_manager
-                    .assign_operation(&operation_id, Ok(&worker_id))
-                    .await
-                    .err_tip(|| "Failed to assign operation in do_try_match");
-                if let Err(err) = assign_result {
-                    if err.code == Code::Aborted {
-                        // If the operation was aborted, it means that the operation was
-                        // cancelled due to another operation being assigned to the worker.
-                        return Ok(());
-                    }
-                    // Any other error is a real error.
-                    return Err(err);
-                }
-
-                debug!(%worker_id, %operation_id, ?action_info, "Notifying worker of operation");
-                workers
-                    .worker_notify_run_action(worker_id, operation_id, action_info)
-                    .await
-                    .err_tip(|| {
-                        "Failed to run worker_notify_run_action in SimpleScheduler::do_try_match"
-                    })
-            };
-            tokio::pin!(attach_operation_fut);
-
-            let origin_metadata = maybe_origin_metadata.unwrap_or_default();
-
-            let ctx = Context::current_with_baggage(vec![KeyValue::new(
-                ENDUSER_ID,
-                origin_metadata.identity,
-            )]);
-
-            info_span!("do_try_match")
-                .in_scope(|| attach_operation_fut)
-                .with_context(ctx)
-                .await
-        }
-
-        let mut result = Ok(());
+        /// Maximum number of actions to process concurrently during matching.
+        /// find_and_reserve_worker atomically finds AND reserves the worker
+        /// (reducing platform properties and inserting into running_action_infos)
+        /// under a single lock acquisition, so concurrent matches cannot
+        /// select the same worker.
+        const MATCH_CONCURRENCY: usize = 8;
+
+        // Cache for computed platform properties, keyed by sorted key-value
+        // pairs. This avoids recomputing the same PlatformProperties for
+        // actions that share identical platform requirements (the common case).
+        let props_cache: std::sync::Mutex<
+            HashMap<Vec<(String, String)>, Arc<PlatformProperties>>,
+        > = std::sync::Mutex::new(HashMap::new());
+
+        // Per-client match counter for fair scheduling. When
+        // max_matches_per_client_per_cycle > 0, limits how many actions
+        // from the same instance_name can be matched in one cycle,
+        // preventing a single client from monopolizing all workers.
+        let per_client_matches: std::sync::Mutex<HashMap<String, usize>> =
+            std::sync::Mutex::new(HashMap::new());
+        let max_per_client = self.max_matches_per_client_per_cycle;
 
         let start = Instant::now();
 
-        let mut stream = self
+        let stream = self
             .get_queued_operations()
             .await
             .err_tip(|| "Failed to get queued operations in do_try_match")?;
@@ -320,17 +262,49 @@ impl SimpleScheduler {
             );
         }
 
-        while let Some(action_state_result) = stream.next().await {
-            result = result.merge(
-                match_action_to_worker(
-                    action_state_result.as_ref(),
+        // Collect all queued actions so we own them, then process up to
+        // MATCH_CONCURRENCY concurrently using FuturesUnordered. Each action
+        // independently finds a worker and assigns itself; conflicts are
+        // resolved by the existing error handling (Aborted codes, None from
+        // find_worker, etc.).
+        let queued_actions: Vec<Box<dyn ActionStateResult>> = stream.collect().await;
+
+        let mut futures_set = futures::stream::FuturesUnordered::<
+            std::pin::Pin<Box<dyn std::future::Future<Output = Result<(), Error>> + Send + '_>>,
+        >::new();
+        let mut action_iter = queued_actions.into_iter();
+        let mut result = Ok(());
+
+        // Seed the initial batch.
+        for action_state_result in action_iter.by_ref().take(MATCH_CONCURRENCY) {
+            futures_set.push(Box::pin(Self::match_action_to_worker_cached(
+                action_state_result,
+                self.worker_scheduler.as_ref(),
+                self.matching_engine_state_manager.as_ref(),
+                self.platform_property_manager.as_ref(),
+                &props_cache,
+                &per_client_matches,
+                max_per_client,
+                full_worker_logging,
+            )));
+        }
+
+        // Process futures as they complete, adding new ones to maintain concurrency.
+        while let Some(match_result) = futures_set.next().await {
+            result = result.merge(match_result);
+
+            if let Some(action_state_result) = action_iter.next() {
+                futures_set.push(Box::pin(Self::match_action_to_worker_cached(
+                    action_state_result,
                     self.worker_scheduler.as_ref(),
                     self.matching_engine_state_manager.as_ref(),
                     self.platform_property_manager.as_ref(),
+                    &props_cache,
+                    &per_client_matches,
+                    max_per_client,
                     full_worker_logging,
-                )
-                .await,
-            );
+                )));
+            }
         }
 
         let total_elapsed = start.elapsed();
@@ -344,6 +318,165 @@ impl SimpleScheduler {
 
         result
     }
+
+    /// Matches a single action to a worker, using a shared cache for computed
+    /// platform properties to avoid redundant recomputation across actions
+    /// with identical platform requirements.
+    ///
+    /// When `max_per_client > 0`, enforces fair scheduling by limiting how
+    /// many actions from the same `instance_name` can be matched per cycle.
+    /// Actions that exceed the limit are skipped (left in queue for next cycle).
+    async fn match_action_to_worker_cached(
+        action_state_result: Box<dyn ActionStateResult>,
+        workers: &ApiWorkerScheduler,
+        matching_engine_state_manager: &dyn MatchingEngineStateManager,
+        platform_property_manager: &PlatformPropertyManager,
+        props_cache: &std::sync::Mutex<
+            HashMap<Vec<(String, String)>, Arc<PlatformProperties>>,
+        >,
+        per_client_matches: &std::sync::Mutex<HashMap<String, usize>>,
+        max_per_client: usize,
+        full_worker_logging: bool,
+    ) -> Result<(), Error> {
+        let (action_info, maybe_origin_metadata) = action_state_result
+            .as_action_info()
+            .await
+            .err_tip(|| "Failed to get action_info from as_action_info_result stream")?;
+
+        // Fair scheduling: atomically check and optimistically increment the
+        // per-client counter. If the client has hit its limit, skip the action.
+        // If the match later fails, we decrement to undo the reservation.
+        let client_name = action_info.instance_name().clone();
+        let claimed_slot = if max_per_client > 0 {
+            let mut map = per_client_matches.lock().unwrap_or_else(|e| e.into_inner());
+            let count = map.entry(client_name.clone()).or_insert(0);
+            if *count >= max_per_client {
+                // Skip — action stays queued for next cycle.
+                return Ok(());
+            }
+            *count += 1;
+            true
+        } else {
+            false
+        };
+
+        // Helper to undo the optimistic increment on failure paths.
+        let undo_claim = |per_client_matches: &std::sync::Mutex<HashMap<String, usize>>,
+                          client_name: &str| {
+            let mut map = per_client_matches.lock().unwrap_or_else(|e| e.into_inner());
+            if let Some(count) = map.get_mut(client_name) {
+                *count = count.saturating_sub(1);
+            }
+        };
+
+        // Build a deterministic cache key from the raw platform
+        // properties (sorted key-value pairs).
+        let mut cache_key: Vec<(String, String)> =
+            action_info.platform_properties.clone().into_iter().collect();
+        cache_key.sort();
+
+        // Look up or compute and cache the platform properties.
+        let platform_properties = {
+            let mut cache = props_cache.lock().unwrap_or_else(|e| e.into_inner());
+            if let Some(cached) = cache.get(&cache_key) {
+                cached.clone()
+            } else {
+                let computed = platform_property_manager
+                    .make_platform_properties(action_info.platform_properties.clone())
+                    .err_tip(|| {
+                        "Failed to make platform properties in SimpleScheduler::do_try_match"
+                    })?;
+                let arc = Arc::new(computed);
+                cache.insert(cache_key, arc.clone());
+                arc
+            }
+        };
+
+        let action_info_with_props = ActionInfoWithProps {
+            inner: action_info,
+            platform_properties: (*platform_properties).clone(),
+        };
+
+        // Extract the operation_id from the action_state BEFORE finding a
+        // worker, so we can pass it to find_and_reserve_worker for atomic
+        // reservation.
+        let operation_id = {
+            let (action_state, _origin_metadata) = action_state_result
+                .as_state()
+                .await
+                .err_tip(|| "Failed to get action_info from as_state_result stream")?;
+            action_state.client_operation_id.clone()
+        };
+
+        // Atomically find a worker AND reserve it for this operation.
+        // The worker's platform properties are reduced and the action is
+        // recorded in running_action_infos under a single lock acquisition,
+        // preventing concurrent matches from selecting the same worker.
+        let (worker_id, tx, msg) = match workers
+            .find_and_reserve_worker(
+                &action_info_with_props.platform_properties,
+                &operation_id,
+                &action_info_with_props,
+                full_worker_logging,
+            )
+            .await
+        {
+            Some(result) => result,
+            // No worker found — undo the optimistic increment.
+            None => {
+                if claimed_slot {
+                    undo_claim(per_client_matches, &client_name);
+                }
+                return Ok(());
+            }
+        };
+
+        // Tell the matching engine that the operation is being assigned to a worker.
+        let assign_result = matching_engine_state_manager
+            .assign_operation(&operation_id, Ok(&worker_id))
+            .await
+            .err_tip(|| "Failed to assign operation in do_try_match");
+        if let Err(err) = assign_result {
+            // Undo the worker reservation since the assignment failed.
+            workers.unreserve_worker(&worker_id, &operation_id).await;
+            if claimed_slot {
+                undo_claim(per_client_matches, &client_name);
+            }
+            if err.code == Code::Aborted {
+                // The operation was cancelled due to another operation
+                // being assigned to the worker.
+                return Ok(());
+            }
+            // Any other error is a real error.
+            return Err(err);
+        }
+
+        let origin_metadata = maybe_origin_metadata.unwrap_or_default();
+        let ctx = Context::current_with_baggage(vec![KeyValue::new(
+            ENDUSER_ID,
+            origin_metadata.identity,
+        )]);
+
+        let notify_fut = async {
+            debug!(
+                %worker_id,
+                %operation_id,
+                ?action_info_with_props,
+                "Notifying worker of operation"
+            );
+            workers
+                .send_reserved_worker_notification(&worker_id, tx, msg)
+                .await
+                .err_tip(|| {
+                    "Failed to send_reserved_worker_notification in SimpleScheduler::do_try_match"
+                })
+        };
+
+        info_span!("do_try_match")
+            .in_scope(|| notify_fut)
+            .with_context(ctx)
+            .await
+    }
 }
 
 impl SimpleScheduler {
@@ -352,24 +485,41 @@ impl SimpleScheduler {
         awaited_action_db: A,
         task_change_notify: Arc<Notify>,
         maybe_origin_event_tx: Option<mpsc::Sender<OriginEvent>>,
+    ) -> (Arc<Self>, Arc<dyn WorkerScheduler>) {
+        Self::new_with_cas_store(
+            spec,
+            awaited_action_db,
+            task_change_notify,
+            maybe_origin_event_tx,
+            None,
+            None,
+        )
+    }
+
+    pub fn new_with_cas_store<A: AwaitedActionDb>(
+        spec: &SimpleSpec,
+        awaited_action_db: A,
+        task_change_notify: Arc<Notify>,
+        maybe_origin_event_tx: Option<mpsc::Sender<OriginEvent>>,
+        cas_store: Option<nativelink_util::store_trait::Store>,
+        locality_map: Option<nativelink_util::blob_locality_map::SharedBlobLocalityMap>,
     ) -> (Arc<Self>, Arc<dyn WorkerScheduler>) {
         Self::new_with_callback(
             spec,
             awaited_action_db,
             || {
-                // The cost of running `do_try_match()` is very high, but constant
-                // in relation to the number of changes that have happened. This
-                // means that grabbing this lock to process `do_try_match()` should
-                // always yield to any other tasks that might want the lock. The
-                // easiest and most fair way to do this is to sleep for a small
-                // amount of time. Using something like tokio::task::yield_now()
-                // does not yield as aggressively as we'd like if new futures are
-                // scheduled within a future.
-                tokio::time::sleep(Duration::from_millis(1))
+                // Yield to allow other tasks to make progress between match
+                // cycles. A full 1ms sleep is too aggressive and caps matching
+                // to ~1000 cycles/sec. sleep(ZERO) defers to the next timer
+                // tick, preventing busy-spinning when no other tasks are
+                // runnable (unlike yield_now which returns immediately).
+                tokio::time::sleep(Duration::ZERO)
             },
             task_change_notify,
             SystemTime::now,
             maybe_origin_event_tx,
+            cas_store,
+            locality_map,
         )
     }
 
@@ -386,6 +536,8 @@ impl SimpleScheduler {
         task_change_notify: Arc<Notify>,
         now_fn: NowFn,
         maybe_origin_event_tx: Option<mpsc::Sender<OriginEvent>>,
+        cas_store: Option<nativelink_util::store_trait::Store>,
+        locality_map: Option<nativelink_util::blob_locality_map::SharedBlobLocalityMap>,
     ) -> (Arc<Self>, Arc<dyn WorkerScheduler>) {
         let platform_property_manager = Arc::new(PlatformPropertyManager::new(
             spec.supported_platform_properties
@@ -433,13 +585,15 @@ impl SimpleScheduler {
             Some(worker_registry.clone()),
         );
 
-        let worker_scheduler = ApiWorkerScheduler::new(
+        let worker_scheduler = ApiWorkerScheduler::new_with_locality_map(
             state_manager.clone(),
             platform_property_manager.clone(),
             spec.allocation_strategy,
             worker_change_notify.clone(),
             worker_timeout_s,
             worker_registry,
+            locality_map,
+            cas_store,
         );
 
         let worker_scheduler_clone = worker_scheduler.clone();
@@ -450,6 +604,8 @@ impl SimpleScheduler {
                 spawn!("simple_scheduler_task_worker_matching", async move {
                     let mut last_match_successful = true;
                     let mut worker_match_logging_last: Option<Instant> = None;
+                    let mut last_stall_check: Option<Instant> = None;
+                    let mut consecutive_match_errors: u32 = 0;
                     // Break out of the loop only when the inner is dropped.
                     loop {
                         let task_change_fut = task_change_notify.notified();
@@ -542,11 +698,129 @@ impl SimpleScheduler {
                                         for item in value {
                                             items.push(item.to_string());
                                         }
-                                        info!(?items, "Oldest actions in state");
+                                        debug!(?items, "Oldest actions in state");
                                     }
 
                                     worker_match_logging_last.replace(now);
                                 }
+
+                                // Stall detection: every 30s, check for actions stuck
+                                // in Queued state for >60s. Only fires as an error when
+                                // no actions are executing (true deadlock). If workers are
+                                // busy executing, queued stalls are just capacity limits.
+                                let should_check_stalls = match last_stall_check {
+                                    None => true,
+                                    Some(when) => now.duration_since(when) >= Duration::from_secs(30),
+                                };
+                                if should_check_stalls {
+                                    last_stall_check = Some(now);
+                                    let stall_threshold = Duration::from_secs(60);
+                                    match scheduler
+                                        .matching_engine_state_manager
+                                        .filter_operations(OperationFilter {
+                                            stages: OperationStageFlags::Queued,
+                                            order_by_priority_direction: Some(OrderDirection::Desc),
+                                            ..Default::default()
+                                        })
+                                        .await
+                                    {
+                                        Ok(queued_stream) => {
+                                            let queued_actions: Vec<_> = queued_stream.collect().await;
+                                            let mut stalled_count: usize = 0;
+                                            let mut unmatchable_count: usize = 0;
+                                            let prop_manager = scheduler.worker_scheduler.get_platform_property_manager();
+                                            for action_state_result in &queued_actions {
+                                                if let Ok((state, _)) = action_state_result.as_state().await {
+                                                    if let Ok(elapsed) = state.last_transition_timestamp.elapsed() {
+                                                        if elapsed > stall_threshold {
+                                                            stalled_count += 1;
+                                                            // Check if any worker could ever match this action.
+                                                            match action_state_result.as_action_info().await {
+                                                                Ok((action_info, _)) => {
+                                                                    match prop_manager.make_platform_properties(
+                                                                        action_info.platform_properties.clone(),
+                                                                    ) {
+                                                                        Ok(props) => {
+                                                                            if !scheduler.worker_scheduler.has_matching_workers(&props).await {
+                                                                                error!(
+                                                                                    operation_id = %state.client_operation_id,
+                                                                                    action_digest = %state.action_digest,
+                                                                                    properties = ?action_info.platform_properties,
+                                                                                    "Action queued >60s with NO matching workers — \
+                                                                                     no registered worker can satisfy its platform requirements"
+                                                                                );
+                                                                                unmatchable_count += 1;
+                                                                            }
+                                                                        }
+                                                                        Err(e) => {
+                                                                            warn!(
+                                                                                operation_id = %state.client_operation_id,
+                                                                                ?e,
+                                                                                "Failed to parse platform properties for stalled action — cannot check matchability"
+                                                                            );
+                                                                        }
+                                                                    }
+                                                                }
+                                                                Err(e) => {
+                                                                    warn!(
+                                                                        operation_id = %state.client_operation_id,
+                                                                        ?e,
+                                                                        "Failed to get action_info for stalled action — cannot check matchability"
+                                                                    );
+                                                                }
+                                                            }
+                                                        }
+                                                    }
+                                                }
+                                            }
+                                            let matchable_stalled = stalled_count - unmatchable_count;
+                                            if matchable_stalled > 0 {
+                                                // Check if workers are actively executing. If so,
+                                                // the queue backlog is just capacity pressure.
+                                                let executing_count = match scheduler
+                                                    .matching_engine_state_manager
+                                                    .filter_operations(OperationFilter {
+                                                        stages: OperationStageFlags::Executing,
+                                                        ..Default::default()
+                                                    })
+                                                    .await
+                                                {
+                                                    Ok(s) => s.count().await,
+                                                    Err(e) => {
+                                                        // Query failed — assume workers are busy
+                                                        // rather than raising a false deadlock alarm.
+                                                        warn!(?e, "Failed to query executing actions for stall check");
+                                                        usize::MAX
+                                                    }
+                                                };
+
+                                                if executing_count > 0 {
+                                                    warn!(
+                                                        stalled_count = matchable_stalled,
+                                                        total_queued = queued_actions.len(),
+                                                        executing_count,
+                                                        unmatchable_count,
+                                                        "Actions waiting in queue >60s (workers at capacity)"
+                                                    );
+                                                } else {
+                                                    error!(
+                                                        stalled_count = matchable_stalled,
+                                                        total_queued = queued_actions.len(),
+                                                        unmatchable_count,
+                                                        "Actions stalled in Queued state >60s with NO executing actions (possible scheduling deadlock)"
+                                                    );
+                                                }
+                                            }
+                                        }
+                                        Err(e) => {
+                                            error!(
+                                                ?e,
+                                                "Failed to query queued actions for stall check — scheduler state may be corrupted"
+                                            );
+                                        }
+                                    }
+                                }
+
                                 res
                             }
                             // If the inner went away it means the scheduler is shutting
@@ -554,8 +828,21 @@ impl SimpleScheduler {
                             None => return,
                         };
                         last_match_successful = result.is_ok();
-                        if let Err(err) = result {
-                            error!(?err, "Error while running do_try_match");
+                        if let Err(err) = &result {
+                            consecutive_match_errors += 1;
+                            if consecutive_match_errors >= 10 {
+                                error!(
+                                    consecutive_match_errors,
+                                    ?err,
+                                    "do_try_match failing consecutively — \
+                                     possible scheduler data structure corruption. \
+                                     A server restart may be required to recover.",
+                                );
+                            } else {
+                                error!(?err, "Error while running do_try_match");
+                            }
+                        } else {
+                            consecutive_match_errors = 0;
                         }
 
                         on_matching_engine_run().await;
@@ -586,6 +873,7 @@ impl SimpleScheduler {
                 maybe_origin_event_tx,
                 task_worker_matching_spawn,
                 worker_match_logging_interval,
+                max_matches_per_client_per_cycle: spec.max_matches_per_client_per_cycle,
             }
         });
         (action_scheduler, worker_scheduler_clone)
@@ -678,6 +966,35 @@ impl WorkerScheduler for SimpleScheduler {
             .set_drain_worker(worker_id, is_draining)
             .await
     }
+
+    async fn update_worker_load(&self, worker_id: &WorkerId, cpu_load_pct: u32) -> Result<(), Error> {
+        self.worker_scheduler
+            .update_worker_load(worker_id, cpu_load_pct)
+            .await
+    }
+
+    async fn update_cached_directories(
+        &self,
+        worker_id: &WorkerId,
+        digests: HashSet<DigestInfo>,
+    ) -> Result<(), Error> {
+        self.worker_scheduler
+            .update_cached_directories(worker_id, digests)
+            .await
+    }
+
+    async fn update_cached_subtrees(
+        &self,
+        worker_id: &WorkerId,
+        is_full_snapshot: bool,
+        full_set: Vec<DigestInfo>,
+        added: Vec<DigestInfo>,
+        removed: Vec<DigestInfo>,
+    ) -> Result<(), Error> {
+        self.worker_scheduler
+            .update_cached_subtrees(worker_id, is_full_snapshot, full_set, added, removed)
+            .await
+    }
 }
 
 impl RootMetricsComponent for SimpleScheduler {}
diff --git a/nativelink-scheduler/src/simple_scheduler_state_manager.rs b/nativelink-scheduler/src/simple_scheduler_state_manager.rs
index 040290ce3..090ed597a 100644
--- a/nativelink-scheduler/src/simple_scheduler_state_manager.rs
+++ b/nativelink-scheduler/src/simple_scheduler_state_manager.rs
@@ -676,7 +676,7 @@ where
                 // No action found. It is ok if the action was not found. It
                 // probably means that the action was dropped, but worker was
                 // still processing it.
-                warn!(
+                debug!(
                     %operation_id,
                     "Unable to update action due to it being missing, probably dropped"
                 );
@@ -716,21 +716,16 @@ where
 
             // Make sure we don't update an action that is already completed.
             if awaited_action.state().stage.is_finished() {
-                match &update {
-                    UpdateOperationType::UpdateWithDisconnect | UpdateOperationType::KeepAlive => {
-                        // No need to error a keep-alive when it's completed, it's just
-                        // unnecessary log noise.
-                        return Ok(());
-                    }
-                    _ => {
-                        return Err(make_err!(
-                            Code::Internal,
-                            "Action {operation_id} is already completed with state {:?} - maybe_worker_id: {:?}",
-                            awaited_action.state().stage,
-                            maybe_worker_id,
-                        ));
-                    }
-                }
+                // This is a benign race: the worker finished after the scheduler
+                // already timed out the operation (e.g. client stopped listening).
+                // No client is waiting for the result, so just log and move on.
+                debug!(
+                    %operation_id,
+                    ?maybe_worker_id,
+                    stage = ?awaited_action.state().stage,
+                    "Ignoring late update for already-completed action"
+                );
+                return Ok(());
             }
 
             let stage = match &update {
@@ -756,16 +751,46 @@ where
                         warn!(state = ?awaited_action.state(), "Action already assigned");
                         return Err(make_err!(Code::Aborted, "Action already assigned"));
                     }
-                    stage.clone()
+                    // Exit code 9 = SIGKILL, typically from the OOM killer.
+                    // Treat as a retryable infrastructure error rather than
+                    // a permanent action failure.
+                    if let ActionStage::Completed(result) = stage {
+                        if result.exit_code == 9 {
+                            awaited_action.attempts += 1;
+                            if awaited_action.attempts <= self.max_job_retries {
+                                warn!(
+                                    %operation_id,
+                                    attempts = awaited_action.attempts,
+                                    max_retries = self.max_job_retries,
+                                    "Action killed by SIGKILL (OOM?), re-queuing with max priority"
+                                );
+                                awaited_action.boost_priority();
+                                ActionStage::Queued
+                            } else {
+                                warn!(
+                                    %operation_id,
+                                    attempts = awaited_action.attempts,
+                                    "Action killed by SIGKILL (OOM?) and exceeded max retries"
+                                );
+                                stage.clone()
+                            }
+                        } else {
+                            stage.clone()
+                        }
+                    } else {
+                        stage.clone()
+                    }
                 }
                 UpdateOperationType::UpdateWithError(err) => {
                     // Don't count a backpressure failure as an attempt for an action.
                     let due_to_backpressure = err.code == Code::ResourceExhausted;
+                    // Missing inputs can only be fixed by the client re-uploading.
+                    let missing_inputs = err.code == Code::FailedPrecondition;
                     if !due_to_backpressure {
                         awaited_action.attempts += 1;
                     }
 
-                    if awaited_action.attempts > self.max_job_retries {
+                    if missing_inputs || awaited_action.attempts > self.max_job_retries {
                         ActionStage::Completed(ActionResult {
                             execution_metadata: ExecutionMetadata {
                                 worker: maybe_worker_id.map_or_else(String::default, ToString::to_string),
diff --git a/nativelink-scheduler/src/worker.rs b/nativelink-scheduler/src/worker.rs
index 0d6e68b6a..eb346fcb0 100644
--- a/nativelink-scheduler/src/worker.rs
+++ b/nativelink-scheduler/src/worker.rs
@@ -24,6 +24,7 @@ use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::
     ConnectionResult, StartExecute, UpdateForWorker, update_for_worker,
 };
 use nativelink_util::action_messages::{ActionInfo, OperationId, WorkerId};
+use nativelink_util::common::DigestInfo;
 use nativelink_util::metrics_utils::{AsyncCounterWrapper, CounterWithTime, FuncCounterWrapper};
 use nativelink_util::platform_properties::{PlatformProperties, PlatformPropertyValue};
 use tokio::sync::mpsc::UnboundedSender;
@@ -92,6 +93,12 @@ pub struct Worker {
     #[metric(help = "If the worker is paused.")]
     pub is_paused: bool,
 
+    /// Whether the pause was caused by explicit worker backpressure
+    /// (ResourceExhausted) as opposed to a capacity check. When true,
+    /// the scheduler should not auto-clear is_paused based on capacity
+    /// alone — it should wait for the worker to complete an action.
+    pub paused_due_to_backpressure: bool,
+
     /// Whether the worker is draining.
     #[metric(help = "If the worker is draining.")]
     pub is_draining: bool,
@@ -100,6 +107,33 @@ pub struct Worker {
     #[metric(help = "Maximum inflight tasks for this worker (or 0 for unlimited)")]
     pub max_inflight_tasks: u64,
 
+    /// When this worker entered quarantine (i.e. missed keepalive for
+    /// > worker_timeout but < 2*worker_timeout). While quarantined the
+    /// worker will not receive new actions but is not yet evicted.
+    /// Reset to `None` when a keepalive is received.
+    pub quarantined_at: Option<SystemTime>,
+
+    /// The worker's CAS gRPC endpoint for peer blob serving.
+    /// Empty if the worker does not support peer serving.
+    #[metric(help = "The worker's CAS endpoint for peer blob sharing.")]
+    pub cas_endpoint: String,
+
+    /// CPU load percentage reported by the worker (load_avg_1m / num_cpus * 100).
+    /// 0 means unknown (worker hasn't reported load yet).
+    #[metric(help = "CPU load percentage reported by the worker.")]
+    pub cpu_load_pct: u32,
+
+    /// Digests of input root directories cached in the worker's directory cache.
+    /// The scheduler gives routing preference to workers that already have the
+    /// action's input_root_digest cached.
+    pub cached_directory_digests: HashSet<DigestInfo>,
+
+    /// All subtree digests (roots + subtrees) from the worker's directory cache.
+    /// Updated via delta encoding from BlobsAvailableNotification.
+    /// The scheduler uses this for subtree-aware scheduling: checking whether
+    /// the action's input_root_digest appears as ANY subtree in any cached entry.
+    pub cached_subtree_digests: HashSet<DigestInfo>,
+
     /// Stats about the worker.
     #[metric]
     metrics: Arc<Metrics>,
@@ -116,7 +150,7 @@ fn send_msg_to_worker(
 /// Reduces the platform properties available on the worker based on the platform properties provided.
 /// This is used because we allow more than 1 job to run on a worker at a time, and this is how the
 /// scheduler knows if more jobs can run on a given worker.
-fn reduce_platform_properties(
+pub(crate) fn reduce_platform_properties(
     parent_props: &mut PlatformProperties,
     reduction_props: &PlatformProperties,
 ) {
@@ -140,6 +174,17 @@ impl Worker {
         tx: UnboundedSender<UpdateForWorker>,
         timestamp: WorkerTimestamp,
         max_inflight_tasks: u64,
+    ) -> Self {
+        Self::new_with_cas_endpoint(id, platform_properties, tx, timestamp, max_inflight_tasks, String::new())
+    }
+
+    pub fn new_with_cas_endpoint(
+        id: WorkerId,
+        platform_properties: PlatformProperties,
+        tx: UnboundedSender<UpdateForWorker>,
+        timestamp: WorkerTimestamp,
+        max_inflight_tasks: u64,
+        cas_endpoint: String,
     ) -> Self {
         Self {
             id,
@@ -149,8 +194,14 @@ impl Worker {
             restored_platform_properties: HashSet::new(),
             last_update_timestamp: timestamp,
             is_paused: false,
+            paused_due_to_backpressure: false,
             is_draining: false,
             max_inflight_tasks,
+            quarantined_at: None,
+            cas_endpoint,
+            cpu_load_pct: 0,
+            cached_directory_digests: HashSet::new(),
+            cached_subtree_digests: HashSet::new(),
             metrics: Arc::new(Metrics {
                 connected_timestamp: SystemTime::now()
                     .duration_since(UNIX_EPOCH)
@@ -218,6 +269,7 @@ impl Worker {
                     queued_timestamp: Some(action_info.inner.insert_timestamp.into()),
                     platform: Some((&action_info.platform_properties).into()),
                     worker_id,
+                    peer_hints: Vec::new(),
                 };
                 reduce_platform_properties(
                     worker_platform_properties,
@@ -256,6 +308,7 @@ impl Worker {
             self.restore_platform_properties(&pending_action_info.action_info.platform_properties);
         }
         self.is_paused = false;
+        self.paused_due_to_backpressure = false;
         self.metrics.actions_completed.inc();
         Ok(())
     }
@@ -264,7 +317,7 @@ impl Worker {
         !self.running_action_infos.is_empty()
     }
 
-    fn restore_platform_properties(&mut self, props: &PlatformProperties) {
+    pub(crate) fn restore_platform_properties(&mut self, props: &PlatformProperties) {
         for (property, prop_value) in &props.properties {
             if let PlatformPropertyValue::Minimum(value) = prop_value {
                 let worker_props = &mut self.platform_properties.properties;
diff --git a/nativelink-scheduler/src/worker_capability_index.rs b/nativelink-scheduler/src/worker_capability_index.rs
index b0e45b76b..b7a15d923 100644
--- a/nativelink-scheduler/src/worker_capability_index.rs
+++ b/nativelink-scheduler/src/worker_capability_index.rs
@@ -31,7 +31,7 @@ use std::collections::{HashMap, HashSet};
 
 use nativelink_util::action_messages::WorkerId;
 use nativelink_util::platform_properties::{PlatformProperties, PlatformPropertyValue};
-use tracing::info;
+use tracing::debug;
 
 /// A property key-value pair used for indexing.
 #[derive(Clone, Hash, Eq, PartialEq, Debug)]
@@ -136,7 +136,7 @@ impl WorkerCapabilityIndex {
     ) -> HashSet<WorkerId> {
         if self.all_workers.is_empty() {
             if full_worker_logging {
-                info!("No workers available to match!");
+                debug!("No workers available to match!");
             }
             return HashSet::new();
         }
@@ -173,7 +173,7 @@ impl WorkerCapabilityIndex {
                                 .filter(|pk| &pk.0.name == name)
                                 .map(|pk| pk.0.value.clone())
                                 .collect();
-                            info!(
+                            debug!(
                                 "No candidate workers due to a lack of matching '{name}' = {value:?}. Workers have: {values:?}"
                             );
                         }
@@ -202,7 +202,7 @@ impl WorkerCapabilityIndex {
 
                     if internal_candidates.is_empty() {
                         if full_worker_logging {
-                            info!(
+                            debug!(
                                 "No candidate workers due to a lack of key '{name}'. Job asked for {value:?}"
                             );
                         }
diff --git a/nativelink-scheduler/src/worker_scheduler.rs b/nativelink-scheduler/src/worker_scheduler.rs
index fe9bcb0f4..b13289140 100644
--- a/nativelink-scheduler/src/worker_scheduler.rs
+++ b/nativelink-scheduler/src/worker_scheduler.rs
@@ -12,10 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::collections::HashSet;
+
 use async_trait::async_trait;
 use nativelink_error::Error;
 use nativelink_metric::RootMetricsComponent;
 use nativelink_util::action_messages::{OperationId, WorkerId};
+use nativelink_util::common::DigestInfo;
 use nativelink_util::operation_state_manager::UpdateOperationType;
 use nativelink_util::shutdown_guard::ShutdownGuard;
 
@@ -59,4 +62,31 @@ pub trait WorkerScheduler: Sync + Send + Unpin + RootMetricsComponent + 'static
 
     /// Sets if the worker is draining or not.
     async fn set_drain_worker(&self, worker_id: &WorkerId, is_draining: bool) -> Result<(), Error>;
+
+    /// Updates the CPU load reported by a worker.
+    /// `cpu_load_pct` is load_avg_1m / num_cpus * 100. 0 means unknown.
+    async fn update_worker_load(&self, worker_id: &WorkerId, cpu_load_pct: u32) -> Result<(), Error>;
+
+    /// Updates the set of cached directory digests for a worker.
+    /// The scheduler uses this to give routing preference to workers that
+    /// already have the action's input_root_digest cached in their directory cache.
+    async fn update_cached_directories(
+        &self,
+        worker_id: &WorkerId,
+        digests: HashSet<DigestInfo>,
+    ) -> Result<(), Error>;
+
+    /// Updates the set of cached subtree digests for a worker using delta encoding.
+    ///
+    /// When `is_full_snapshot` is true, `full_set` replaces the entire set.
+    /// When `is_full_snapshot` is false, `added` digests are inserted and
+    /// `removed` digests are deleted from the existing set.
+    async fn update_cached_subtrees(
+        &self,
+        worker_id: &WorkerId,
+        is_full_snapshot: bool,
+        full_set: Vec<DigestInfo>,
+        added: Vec<DigestInfo>,
+        removed: Vec<DigestInfo>,
+    ) -> Result<(), Error>;
 }
diff --git a/nativelink-scheduler/tests/redis_store_awaited_action_db_test.rs b/nativelink-scheduler/tests/redis_store_awaited_action_db_test.rs
index 906d511ac..0e2070c76 100644
--- a/nativelink-scheduler/tests/redis_store_awaited_action_db_test.rs
+++ b/nativelink-scheduler/tests/redis_store_awaited_action_db_test.rs
@@ -270,6 +270,8 @@ async fn test_multiple_clients_subscribe_to_same_action() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
 
     // First client adds the action
@@ -324,6 +326,7 @@ async fn test_multiple_clients_subscribe_to_same_action() -> Result<(), Error> {
                 queued_timestamp: Some(SystemTime::UNIX_EPOCH.into()),
                 platform: Some(Platform::default()),
                 worker_id: worker_id.clone().into(),
+                peer_hints: Vec::new(),
             })),
         };
         let msg_for_worker = rx_from_worker.recv().await.unwrap();
diff --git a/nativelink-scheduler/tests/simple_scheduler_test.rs b/nativelink-scheduler/tests/simple_scheduler_test.rs
index 59364bf28..b2ae67644 100644
--- a/nativelink-scheduler/tests/simple_scheduler_test.rs
+++ b/nativelink-scheduler/tests/simple_scheduler_test.rs
@@ -22,15 +22,17 @@ use std::sync::Arc;
 use std::time::{SystemTime, UNIX_EPOCH};
 
 use async_lock::Mutex;
+use bytes::Bytes;
 use futures::task::Poll;
 use futures::{Stream, StreamExt, poll};
 use mock_instant::thread_local::{MockClock, SystemTime as MockSystemTime};
 use nativelink_config::schedulers::{PropertyType, SimpleSpec};
+use nativelink_config::stores::MemorySpec;
 use nativelink_error::{Code, Error, ResultExt, make_err};
 use nativelink_macro::nativelink_test;
 use nativelink_metric::MetricsComponent;
 use nativelink_proto::build::bazel::remote::execution::v2::{
-    ExecuteRequest, Platform, digest_function,
+    Directory, ExecuteRequest, FileNode, Platform, digest_function,
 };
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
     ConnectionResult, StartExecute, UpdateForWorker, update_for_worker,
@@ -43,10 +45,12 @@ use nativelink_scheduler::default_scheduler_factory::memory_awaited_action_db_fa
 use nativelink_scheduler::simple_scheduler::SimpleScheduler;
 use nativelink_scheduler::worker::Worker;
 use nativelink_scheduler::worker_scheduler::WorkerScheduler;
+use nativelink_store::memory_store::MemoryStore;
 use nativelink_util::action_messages::{
     ActionInfo, ActionResult, ActionStage, ActionState, DirectoryInfo, ExecutionMetadata, FileInfo,
     INTERNAL_ERROR_EXIT_CODE, NameOrPath, OperationId, SymlinkInfo, WorkerId,
 };
+use nativelink_util::blob_locality_map::new_shared_blob_locality_map;
 use nativelink_util::common::DigestInfo;
 use nativelink_util::instant_wrapper::MockInstantWrapped;
 use nativelink_util::operation_state_manager::{
@@ -54,6 +58,8 @@ use nativelink_util::operation_state_manager::{
     UpdateOperationType,
 };
 use nativelink_util::platform_properties::{PlatformProperties, PlatformPropertyValue};
+use nativelink_util::store_trait::{Store, StoreLike};
+use prost::Message;
 use pretty_assertions::assert_eq;
 use tokio::sync::{Notify, mpsc};
 use utils::scheduler_utils::{INSTANCE_NAME, make_base_action_info, update_eq};
@@ -134,6 +140,8 @@ async fn basic_add_action_with_one_worker_test() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -159,6 +167,7 @@ async fn basic_add_action_with_one_worker_test() -> Result<(), Error> {
                 queued_timestamp: Some(insert_timestamp.into()),
                 platform: Some(Platform::default()),
                 worker_id: worker_id.into(),
+                peer_hints: Vec::new(),
             })),
         };
         let msg_for_worker = rx_from_worker.recv().await.unwrap();
@@ -234,6 +243,8 @@ async fn client_does_not_receive_update_timeout() -> Result<(), Error> {
         task_change_notify.clone(),
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -295,6 +306,8 @@ async fn find_executing_action() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -339,6 +352,7 @@ async fn find_executing_action() -> Result<(), Error> {
                 queued_timestamp: Some(insert_timestamp.into()),
                 platform: Some(Platform::default()),
                 worker_id: worker_id.into(),
+                peer_hints: Vec::new(),
             })),
         };
         let msg_for_worker = rx_from_worker.recv().await.unwrap();
@@ -380,6 +394,8 @@ async fn remove_worker_reschedules_multiple_running_job_test() -> Result<(), Err
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest1 = DigestInfo::new([99u8; 32], 512);
     let action_digest2 = DigestInfo::new([88u8; 32], 512);
@@ -418,6 +434,7 @@ async fn remove_worker_reschedules_multiple_running_job_test() -> Result<(), Err
         queued_timestamp: Some(insert_timestamp1.into()),
         platform: Some(Platform::default()),
         worker_id: worker_id1.to_string(),
+        peer_hints: Vec::new(),
     };
 
     let mut expected_start_execute_for_worker2 = StartExecute {
@@ -431,6 +448,7 @@ async fn remove_worker_reschedules_multiple_running_job_test() -> Result<(), Err
         queued_timestamp: Some(insert_timestamp2.into()),
         platform: Some(Platform::default()),
         worker_id: worker_id1.to_string(),
+        peer_hints: Vec::new(),
     };
     let operation_id1 = {
         // Worker1 should now see first execution request.
@@ -574,6 +592,8 @@ async fn set_drain_worker_pauses_and_resumes_worker_test() -> Result<(), Error>
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -664,6 +684,8 @@ async fn worker_should_not_queue_if_properties_dont_match_test() -> Result<(), E
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
     let mut platform_properties = HashMap::new();
@@ -718,6 +740,7 @@ async fn worker_should_not_queue_if_properties_dont_match_test() -> Result<(), E
                 queued_timestamp: Some(insert_timestamp.into()),
                 platform: Some((&worker2_properties).into()),
                 worker_id: worker_id2.to_string(),
+                peer_hints: Vec::new(),
             })),
         };
         let msg_for_worker = rx_from_worker2.recv().await.unwrap();
@@ -761,6 +784,8 @@ async fn cacheable_items_join_same_action_queued_test() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -817,6 +842,7 @@ async fn cacheable_items_join_same_action_queued_test() -> Result<(), Error> {
                 queued_timestamp: Some(insert_timestamp1.into()),
                 platform: Some(Platform::default()),
                 worker_id: worker_id.into(),
+                peer_hints: Vec::new(),
             })),
         };
         let msg_for_worker = rx_from_worker.recv().await.unwrap();
@@ -870,6 +896,8 @@ async fn worker_disconnects_does_not_schedule_for_execution_test() -> Result<(),
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let worker_id = WorkerId("worker_id".to_string());
     let action_digest = DigestInfo::new([99u8; 32], 512);
@@ -1028,6 +1056,8 @@ async fn matching_engine_fails_sends_abort() -> Result<(), Error> {
             task_change_notify,
             MockInstantWrapped::default,
             None,
+            None, // cas_store
+            None, // locality_map
         );
         // Initial worker calls do_try_match, so send it no items.
         senders.get_range_of_actions.send(vec![]).unwrap();
@@ -1074,6 +1104,8 @@ async fn matching_engine_fails_sends_abort() -> Result<(), Error> {
             task_change_notify,
             MockInstantWrapped::default,
             None,
+            None, // cas_store
+            None, // locality_map
         );
         // senders.tx_get_awaited_action_by_id.send(Ok(None)).unwrap();
         senders.get_range_of_actions.send(vec![]).unwrap();
@@ -1135,6 +1167,8 @@ async fn worker_timesout_reschedules_running_job_test() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -1168,6 +1202,7 @@ async fn worker_timesout_reschedules_running_job_test() -> Result<(), Error> {
         queued_timestamp: Some(insert_timestamp.into()),
         platform: Some(Platform::default()),
         worker_id: worker_id1.to_string(),
+        peer_hints: Vec::new(),
     };
 
     {
@@ -1205,14 +1240,19 @@ async fn worker_timesout_reschedules_running_job_test() -> Result<(), Error> {
         );
     }
 
-    // Keep worker 2 alive.
+    // Keep worker 2 alive at 2x timeout so it survives both phases.
     scheduler
-        .worker_keep_alive_received(&worker_id2, NOW_TIME + WORKER_TIMEOUT_S)
+        .worker_keep_alive_received(&worker_id2, NOW_TIME + 2 * WORKER_TIMEOUT_S)
         .await?;
-    // This should remove worker 1 (the one executing our job).
+    // Phase 1: quarantine worker 1 at 1x timeout (stops receiving new work).
     scheduler
         .remove_timedout_workers(NOW_TIME + WORKER_TIMEOUT_S)
         .await?;
+    tokio::task::yield_now().await;
+    // Phase 2: evict worker 1 at 2x timeout (fully removed, job rescheduled).
+    scheduler
+        .remove_timedout_workers(NOW_TIME + 2 * WORKER_TIMEOUT_S)
+        .await?;
     tokio::task::yield_now().await; // Allow task<->worker matcher to run.
 
     {
@@ -1269,6 +1309,8 @@ async fn update_action_sends_completed_result_to_client_test() -> Result<(), Err
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -1372,6 +1414,8 @@ async fn update_action_sends_completed_result_after_disconnect() -> Result<(), E
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -1493,6 +1537,8 @@ async fn update_action_with_wrong_worker_id_errors_test() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -1603,6 +1649,8 @@ async fn does_not_crash_if_operation_joined_then_relaunched() -> Result<(), Erro
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -1638,6 +1686,7 @@ async fn does_not_crash_if_operation_joined_then_relaunched() -> Result<(), Erro
                 queued_timestamp: Some(insert_timestamp.into()),
                 platform: Some(Platform::default()),
                 worker_id: worker_id.clone().into(),
+                peer_hints: Vec::new(),
             })),
         };
         let msg_for_worker = rx_from_worker.recv().await.unwrap();
@@ -1753,6 +1802,8 @@ async fn run_two_jobs_on_same_worker_with_platform_properties_restrictions() ->
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest1 = DigestInfo::new([11u8; 32], 512);
     let action_digest2 = DigestInfo::new([99u8; 32], 512);
@@ -1921,6 +1972,8 @@ async fn run_jobs_in_the_order_they_were_queued() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest1 = DigestInfo::new([11u8; 32], 512);
     let action_digest2 = DigestInfo::new([99u8; 32], 512);
@@ -1989,6 +2042,8 @@ async fn worker_retries_on_internal_error_and_fails_test() -> Result<(), Error>
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -2151,6 +2206,8 @@ async fn ensure_scheduler_drops_inner_spawn() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     assert_eq!(dropped.load(Ordering::Relaxed), false);
 
@@ -2181,6 +2238,8 @@ async fn ensure_task_or_worker_change_notification_received_test() -> Result<(),
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -2267,6 +2326,8 @@ async fn client_reconnect_keeps_action_alive() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -2346,6 +2407,8 @@ async fn client_timesout_job_then_same_action_requested() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -2419,6 +2482,8 @@ async fn logs_when_no_workers_match() -> Result<(), Error> {
         task_change_notify,
         MockInstantWrapped::default,
         None,
+        None, // cas_store
+        None, // locality_map
     );
     let action_digest = DigestInfo::new([99u8; 32], 512);
 
@@ -2450,3 +2515,1150 @@ async fn logs_when_no_workers_match() -> Result<(), Error> {
 
     Ok(())
 }
+
+#[nativelink_test]
+async fn worker_fails_precondition_completes_immediately_test() -> Result<(), Error> {
+    let worker_id = WorkerId("worker_id".to_string());
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec {
+            max_job_retries: 5,
+            ..Default::default()
+        },
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        None, // cas_store
+        None, // locality_map
+    );
+    let action_digest = DigestInfo::new([99u8; 32], 512);
+
+    let mut rx_from_worker =
+        setup_new_worker(&scheduler, worker_id.clone(), PlatformProperties::default()).await?;
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener =
+        setup_action(&scheduler, action_digest, HashMap::new(), insert_timestamp).await?;
+
+    let operation_id = {
+        // Other tests check full data. We only care if we got StartAction.
+        let operation_id = match rx_from_worker.recv().await.unwrap().update {
+            Some(update_for_worker::Update::StartAction(exec)) => exec.operation_id,
+            v => panic!("Expected StartAction, got : {v:?}"),
+        };
+        // Other tests check full data. We only care if client thinks we are Executing.
+        assert_eq!(
+            action_listener.changed().await.unwrap().0.stage,
+            ActionStage::Executing
+        );
+        OperationId::from(operation_id.as_str())
+    };
+
+    let err = make_err!(Code::FailedPrecondition, "Missing input blobs");
+    // Send FailedPrecondition error from worker. This should NOT be retried
+    // even though max_job_retries is 5.
+    drop(
+        scheduler
+            .update_action(
+                &worker_id,
+                &operation_id,
+                UpdateOperationType::UpdateWithError(err.clone()),
+            )
+            .await,
+    );
+
+    {
+        // Client should get notification saying the action completed (not re-queued).
+        let (action_state, _maybe_origin_metadata) = action_listener.changed().await.unwrap();
+        let expected_action_state = ActionState {
+            // Name is a random string, so we ignore it and just make it the same.
+            client_operation_id: action_state.client_operation_id.clone(),
+            stage: ActionStage::Completed(ActionResult {
+                output_files: Vec::default(),
+                output_folders: Vec::default(),
+                output_file_symlinks: Vec::default(),
+                output_directory_symlinks: Vec::default(),
+                exit_code: INTERNAL_ERROR_EXIT_CODE,
+                stdout_digest: DigestInfo::zero_digest(),
+                stderr_digest: DigestInfo::zero_digest(),
+                execution_metadata: ExecutionMetadata {
+                    worker: worker_id.to_string(),
+                    queued_timestamp: SystemTime::UNIX_EPOCH,
+                    worker_start_timestamp: SystemTime::UNIX_EPOCH,
+                    worker_completed_timestamp: SystemTime::UNIX_EPOCH,
+                    input_fetch_start_timestamp: SystemTime::UNIX_EPOCH,
+                    input_fetch_completed_timestamp: SystemTime::UNIX_EPOCH,
+                    execution_start_timestamp: SystemTime::UNIX_EPOCH,
+                    execution_completed_timestamp: SystemTime::UNIX_EPOCH,
+                    output_upload_start_timestamp: SystemTime::UNIX_EPOCH,
+                    output_upload_completed_timestamp: SystemTime::UNIX_EPOCH,
+                },
+                server_logs: HashMap::default(),
+                error: Some(err.clone()),
+                message: String::new(),
+            }),
+            action_digest: action_state.action_digest,
+            last_transition_timestamp: SystemTime::now(),
+        };
+        let mut received_state = action_state.as_ref().clone();
+        if let ActionStage::Completed(stage) = &mut received_state.stage {
+            if let Some(real_err) = &mut stage.error {
+                // Verify the error contains the FailedPrecondition message.
+                assert!(
+                    real_err.to_string().contains("Missing input blobs"),
+                    "{real_err} did not contain 'Missing input blobs'",
+                );
+                assert!(
+                    real_err
+                        .to_string()
+                        .contains("Job cancelled because it attempted to execute too many times"),
+                    "{real_err} did not contain 'Job cancelled because it attempted to execute too many times'",
+                );
+                *real_err = err;
+            }
+        } else {
+            panic!(
+                "Expected Completed (not re-queued), got : {:?}",
+                action_state.stage
+            );
+        }
+        assert_eq!(received_state, expected_action_state);
+    }
+
+    Ok(())
+}
+
+// ============================================================================
+// Locality-aware scheduling tests
+// ============================================================================
+
+/// Helper: adds a worker with a specific CAS endpoint (for locality mapping).
+async fn setup_new_worker_with_cas_endpoint(
+    scheduler: &SimpleScheduler,
+    worker_id: WorkerId,
+    props: PlatformProperties,
+    cas_endpoint: &str,
+) -> Result<mpsc::UnboundedReceiver<UpdateForWorker>, Error> {
+    let (tx, mut rx) = mpsc::unbounded_channel();
+    let worker = Worker::new_with_cas_endpoint(
+        worker_id.clone(),
+        props,
+        tx,
+        NOW_TIME,
+        0,
+        cas_endpoint.to_string(),
+    );
+    scheduler
+        .add_worker(worker)
+        .await
+        .err_tip(|| "Failed to add worker")?;
+    tokio::task::yield_now().await;
+    verify_initial_connection_message(worker_id, &mut rx).await;
+    Ok(rx)
+}
+
+/// Helper: schedules an action with a custom `input_root_digest`.
+async fn setup_action_with_input_root(
+    scheduler: &SimpleScheduler,
+    action_digest: DigestInfo,
+    input_root_digest: DigestInfo,
+    platform_properties: HashMap<String, String>,
+    insert_timestamp: SystemTime,
+) -> Result<Box<dyn ActionStateResult>, Error> {
+    let mut action_info = make_base_action_info(insert_timestamp, action_digest);
+    Arc::make_mut(&mut action_info).platform_properties = platform_properties;
+    Arc::make_mut(&mut action_info).input_root_digest = input_root_digest;
+    let client_id = OperationId::default();
+    let result = scheduler.add_action(client_id, action_info).await;
+    tokio::task::yield_now().await;
+    result
+}
+
+/// Helper: extracts the StartExecute from a worker receiver, returning
+/// (operation_id, start_execute).
+async fn recv_start_execute(
+    rx: &mut mpsc::UnboundedReceiver<UpdateForWorker>,
+) -> (String, StartExecute) {
+    match rx.recv().await.unwrap().update {
+        Some(update_for_worker::Update::StartAction(se)) => (se.operation_id.clone(), se),
+        v => panic!("Expected StartAction, got: {v:?}"),
+    }
+}
+
+#[nativelink_test]
+async fn locality_scoring_selects_best_worker_test() -> Result<(), Error> {
+    // Test: When a locality map is populated and CAS store has Directory protos,
+    // the worker with the most cached input bytes should be preferred.
+    let worker_id_a = WorkerId("worker_a".to_string());
+    let worker_id_b = WorkerId("worker_b".to_string());
+    let cas_endpoint_a = "worker-a:50081";
+    let cas_endpoint_b = "worker-b:50081";
+
+    // Create file digests that will be in the input tree.
+    let file_digest1 = DigestInfo::new([1u8; 32], 5000); // 5000 bytes
+    let file_digest2 = DigestInfo::new([2u8; 32], 3000); // 3000 bytes
+    let file_digest3 = DigestInfo::new([3u8; 32], 2000); // 2000 bytes
+
+    // Build a Directory proto with these files as the input root.
+    let input_root_dir = Directory {
+        files: vec![
+            FileNode {
+                name: "file1.txt".to_string(),
+                digest: Some(file_digest1.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+            FileNode {
+                name: "file2.txt".to_string(),
+                digest: Some(file_digest2.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+            FileNode {
+                name: "file3.txt".to_string(),
+                digest: Some(file_digest3.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+        ],
+        ..Default::default()
+    };
+    let dir_bytes = input_root_dir.encode_to_vec();
+    let input_root_digest = DigestInfo::new(
+        {
+            use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc};
+            let mut hasher = DigestHasherFunc::Sha256.hasher();
+            hasher.update(&dir_bytes);
+            let digest_info = hasher.finalize_digest();
+            **digest_info.packed_hash()
+        },
+        dir_bytes.len() as u64,
+    );
+
+    // Create a CAS store and populate it with the directory proto.
+    let cas_store_inner = MemoryStore::new(&MemorySpec::default());
+    let cas_store = Store::new(cas_store_inner.clone());
+    let key: nativelink_util::store_trait::StoreKey<'_> = input_root_digest.into();
+    cas_store
+        .update_oneshot(key, Bytes::from(dir_bytes))
+        .await?;
+
+    // Create and populate the locality map.
+    // Worker A has file1 (5000) and file3 (2000) = 7000 total.
+    // Worker B has file2 (3000) = 3000 total.
+    // Worker A should win.
+    let locality_map = new_shared_blob_locality_map();
+    {
+        let mut map = locality_map.write();
+        map.register_blobs(cas_endpoint_a, &[file_digest1, file_digest3]);
+        map.register_blobs(cas_endpoint_b, &[file_digest2]);
+    }
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        Some(cas_store),
+        Some(locality_map),
+    );
+
+    let action_digest = DigestInfo::new([99u8; 32], 512);
+
+    // Add workers WITH cas_endpoints so the endpoint_to_worker map is populated.
+    let mut rx_a = setup_new_worker_with_cas_endpoint(
+        &scheduler,
+        worker_id_a.clone(),
+        PlatformProperties::default(),
+        cas_endpoint_a,
+    )
+    .await?;
+    let mut rx_b = setup_new_worker_with_cas_endpoint(
+        &scheduler,
+        worker_id_b.clone(),
+        PlatformProperties::default(),
+        cas_endpoint_b,
+    )
+    .await?;
+
+    // Schedule the action.
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener = setup_action_with_input_root(
+        &scheduler,
+        action_digest,
+        input_root_digest,
+        HashMap::new(),
+        insert_timestamp,
+    )
+    .await?;
+
+    // Worker A should get the action because it has the highest locality score (7000 > 3000).
+    let (selected_worker_id, _se) = tokio::select! {
+        msg = rx_a.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_a, got: {v:?}"),
+            };
+            (worker_id_a.clone(), se)
+        }
+        msg = rx_b.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_b, got: {v:?}"),
+            };
+            (worker_id_b.clone(), se)
+        }
+    };
+
+    assert_eq!(
+        selected_worker_id, worker_id_a,
+        "Locality scoring should select worker_a (7000 cached bytes > worker_b's 3000)"
+    );
+
+    assert_eq!(
+        action_listener.changed().await.unwrap().0.stage,
+        ActionStage::Executing
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn no_peer_hints_without_resolved_tree_test() -> Result<(), Error> {
+    // Test: When a locality map has entries for the input_root_digest itself
+    // but there is no CAS store / no resolved tree, peer hints should be
+    // empty. The old fallback that generated a single hint for
+    // input_root_digest never worked because workers register individual
+    // file digests, not directory digests.
+    let worker_id = WorkerId("worker_recv".to_string());
+    let peer_endpoint = "peer-worker:50081";
+
+    let input_root = DigestInfo::new([77u8; 32], 4096);
+
+    // Create locality map and register the input_root_digest on a peer endpoint.
+    let locality_map = new_shared_blob_locality_map();
+    {
+        let mut map = locality_map.write();
+        map.register_blobs(peer_endpoint, &[input_root]);
+    }
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        None, // no CAS store -- no resolved tree available
+        Some(locality_map),
+    );
+
+    let action_digest = DigestInfo::new([88u8; 32], 256);
+
+    let mut rx_from_worker =
+        setup_new_worker(&scheduler, worker_id.clone(), PlatformProperties::default()).await?;
+
+    // Schedule action with a specific input_root.
+    let insert_timestamp = make_system_time(1);
+    let _action_listener = setup_action_with_input_root(
+        &scheduler,
+        action_digest,
+        input_root,
+        HashMap::new(),
+        insert_timestamp,
+    )
+    .await?;
+
+    // Worker should receive StartAction with empty peer_hints (no resolved tree).
+    let (_, start_execute) = recv_start_execute(&mut rx_from_worker).await;
+
+    assert!(
+        start_execute.peer_hints.is_empty(),
+        "peer_hints should be empty without a resolved tree (directory digests are not useful)"
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn peer_hints_from_resolved_tree_test() -> Result<(), Error> {
+    // Test: When a CAS store has a Directory proto for the input root, and
+    // the locality map has entries for individual file digests, the
+    // StartExecute message should contain per-file peer hints sorted by
+    // size descending.
+    let worker_id = WorkerId("worker_recv".to_string());
+    let peer_endpoint = "peer-worker:50081";
+
+    // Create file digests.
+    let file_large = DigestInfo::new([10u8; 32], 10000);
+    let file_small = DigestInfo::new([11u8; 32], 500);
+
+    // Build Directory proto.
+    let input_root_dir = Directory {
+        files: vec![
+            FileNode {
+                name: "large.bin".to_string(),
+                digest: Some(file_large.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+            FileNode {
+                name: "small.txt".to_string(),
+                digest: Some(file_small.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+        ],
+        ..Default::default()
+    };
+    let dir_bytes = input_root_dir.encode_to_vec();
+    let input_root_digest = DigestInfo::new(
+        {
+            use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc};
+            let mut hasher = DigestHasherFunc::Sha256.hasher();
+            hasher.update(&dir_bytes);
+            let digest_info = hasher.finalize_digest();
+            **digest_info.packed_hash()
+        },
+        dir_bytes.len() as u64,
+    );
+
+    // Create and populate CAS store.
+    let cas_store_inner = MemoryStore::new(&MemorySpec::default());
+    let cas_store = Store::new(cas_store_inner);
+    let key: nativelink_util::store_trait::StoreKey<'_> = input_root_digest.into();
+    cas_store
+        .update_oneshot(key, Bytes::from(dir_bytes))
+        .await?;
+
+    // Create locality map with file blobs registered on a peer.
+    let locality_map = new_shared_blob_locality_map();
+    {
+        let mut map = locality_map.write();
+        map.register_blobs(peer_endpoint, &[file_large, file_small]);
+    }
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        Some(cas_store),
+        Some(locality_map),
+    );
+
+    let action_digest = DigestInfo::new([99u8; 32], 512);
+
+    let mut rx_from_worker =
+        setup_new_worker(&scheduler, worker_id.clone(), PlatformProperties::default()).await?;
+
+    let insert_timestamp = make_system_time(1);
+    let _action_listener = setup_action_with_input_root(
+        &scheduler,
+        action_digest,
+        input_root_digest,
+        HashMap::new(),
+        insert_timestamp,
+    )
+    .await?;
+
+    let (_, start_execute) = recv_start_execute(&mut rx_from_worker).await;
+
+    // Should have per-file peer hints (one per file in the tree).
+    assert_eq!(
+        start_execute.peer_hints.len(),
+        2,
+        "Should have 2 peer hints (one per file in the input tree)"
+    );
+
+    // Hints should be sorted by size descending (large first).
+    let first_hint_digest = DigestInfo::try_from(
+        start_execute.peer_hints[0]
+            .digest
+            .as_ref()
+            .expect("hint should have digest"),
+    )
+    .unwrap();
+    let second_hint_digest = DigestInfo::try_from(
+        start_execute.peer_hints[1]
+            .digest
+            .as_ref()
+            .expect("hint should have digest"),
+    )
+    .unwrap();
+
+    assert_eq!(
+        first_hint_digest, file_large,
+        "First hint should be the largest file"
+    );
+    assert_eq!(
+        second_hint_digest, file_small,
+        "Second hint should be the smaller file"
+    );
+
+    // Both hints should reference the peer endpoint.
+    for hint in &start_execute.peer_hints {
+        assert!(
+            hint.peer_endpoints.contains(&peer_endpoint.to_string()),
+            "Each hint should reference the peer endpoint"
+        );
+    }
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn fallback_to_lru_when_no_locality_data_test() -> Result<(), Error> {
+    // Test: When a locality map and CAS store are configured but contain NO
+    // blob data for the action's input tree, the scheduler should fall back
+    // to the normal LRU worker selection without errors.
+    let worker_id_a = WorkerId("worker_a".to_string());
+    let worker_id_b = WorkerId("worker_b".to_string());
+    let cas_endpoint_a = "worker-a:50081";
+    let cas_endpoint_b = "worker-b:50081";
+
+    // Build a Directory proto with files, but do NOT register those files
+    // in the locality map -- simulating a fresh deployment or cold start.
+    let file_digest1 = DigestInfo::new([30u8; 32], 4000);
+    let file_digest2 = DigestInfo::new([31u8; 32], 2000);
+
+    let input_root_dir = Directory {
+        files: vec![
+            FileNode {
+                name: "cold_file1.bin".to_string(),
+                digest: Some(file_digest1.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+            FileNode {
+                name: "cold_file2.bin".to_string(),
+                digest: Some(file_digest2.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+        ],
+        ..Default::default()
+    };
+    let dir_bytes = input_root_dir.encode_to_vec();
+    let input_root_digest = DigestInfo::new(
+        {
+            use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc};
+            let mut hasher = DigestHasherFunc::Sha256.hasher();
+            hasher.update(&dir_bytes);
+            let digest_info = hasher.finalize_digest();
+            **digest_info.packed_hash()
+        },
+        dir_bytes.len() as u64,
+    );
+
+    // Create CAS store with the directory proto so tree resolution succeeds.
+    let cas_store_inner = MemoryStore::new(&MemorySpec::default());
+    let cas_store = Store::new(cas_store_inner);
+    let key: nativelink_util::store_trait::StoreKey<'_> = input_root_digest.into();
+    cas_store
+        .update_oneshot(key, Bytes::from(dir_bytes))
+        .await?;
+
+    // Create an EMPTY locality map -- no blobs registered on any endpoint.
+    let locality_map = new_shared_blob_locality_map();
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        Some(cas_store),
+        Some(locality_map),
+    );
+
+    let action_digest = DigestInfo::new([99u8; 32], 512);
+
+    // Add two workers with CAS endpoints.
+    let mut rx_a = setup_new_worker_with_cas_endpoint(
+        &scheduler,
+        worker_id_a.clone(),
+        PlatformProperties::default(),
+        cas_endpoint_a,
+    )
+    .await?;
+    let mut rx_b = setup_new_worker_with_cas_endpoint(
+        &scheduler,
+        worker_id_b.clone(),
+        PlatformProperties::default(),
+        cas_endpoint_b,
+    )
+    .await?;
+
+    // Schedule action with the input root.
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener = setup_action_with_input_root(
+        &scheduler,
+        action_digest,
+        input_root_digest,
+        HashMap::new(),
+        insert_timestamp,
+    )
+    .await?;
+
+    // One of the workers should receive the action (LRU fallback).
+    // We don't care which worker gets it -- just that it succeeds.
+    let (selected_worker_id, start_execute) = tokio::select! {
+        msg = rx_a.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_a, got: {v:?}"),
+            };
+            (worker_id_a.clone(), se)
+        }
+        msg = rx_b.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_b, got: {v:?}"),
+            };
+            (worker_id_b.clone(), se)
+        }
+    };
+
+    // Verify the action was dispatched to one of the two workers.
+    assert!(
+        selected_worker_id == worker_id_a || selected_worker_id == worker_id_b,
+        "Action should be dispatched to one of the available workers via LRU fallback"
+    );
+
+    // With no locality data, there should be no peer hints (no blobs are registered).
+    assert!(
+        start_execute.peer_hints.is_empty(),
+        "peer_hints should be empty when locality map has no data for input files, got {} hints",
+        start_execute.peer_hints.len()
+    );
+
+    // Client should see the Executing state.
+    assert_eq!(
+        action_listener.changed().await.unwrap().0.stage,
+        ActionStage::Executing
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn locality_scoring_with_empty_map_and_no_cas_store_test() -> Result<(), Error> {
+    // Test: When locality_map is provided but cas_store is None (tree
+    // resolution impossible), scheduling should still work via LRU fallback.
+    // This covers the path where resolve_input_tree returns None.
+    let worker_id = WorkerId("worker_solo".to_string());
+
+    // Create locality map but don't populate it.
+    let locality_map = new_shared_blob_locality_map();
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        None, // No CAS store -- tree resolution returns None
+        Some(locality_map),
+    );
+
+    let action_digest = DigestInfo::new([55u8; 32], 256);
+
+    let mut rx_from_worker =
+        setup_new_worker(&scheduler, worker_id.clone(), PlatformProperties::default()).await?;
+
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener =
+        setup_action(&scheduler, action_digest, HashMap::new(), insert_timestamp).await?;
+
+    // Worker should receive the action via normal LRU selection.
+    let (_, start_execute) = recv_start_execute(&mut rx_from_worker).await;
+
+    // No peer hints should be generated (no tree, no locality data).
+    assert!(
+        start_execute.peer_hints.is_empty(),
+        "peer_hints should be empty when no CAS store is configured"
+    );
+
+    assert_eq!(
+        action_listener.changed().await.unwrap().0.stage,
+        ActionStage::Executing
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn locality_scoring_partial_data_still_selects_best_worker_test() -> Result<(), Error> {
+    // Test: When only SOME workers have locality data, the scoring should
+    // still pick the one with the most cached bytes, and the worker with
+    // no cached data should get a score of 0 (falling behind).
+    let worker_id_a = WorkerId("worker_a".to_string());
+    let worker_id_b = WorkerId("worker_b".to_string());
+    let cas_endpoint_a = "worker-a:50081";
+    let cas_endpoint_b = "worker-b:50081";
+
+    // Files in the input tree.
+    let file_digest1 = DigestInfo::new([40u8; 32], 8000);
+    let file_digest2 = DigestInfo::new([41u8; 32], 1000);
+
+    let input_root_dir = Directory {
+        files: vec![
+            FileNode {
+                name: "big.dat".to_string(),
+                digest: Some(file_digest1.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+            FileNode {
+                name: "small.dat".to_string(),
+                digest: Some(file_digest2.into()),
+                is_executable: false,
+                ..Default::default()
+            },
+        ],
+        ..Default::default()
+    };
+    let dir_bytes = input_root_dir.encode_to_vec();
+    let input_root_digest = DigestInfo::new(
+        {
+            use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc};
+            let mut hasher = DigestHasherFunc::Sha256.hasher();
+            hasher.update(&dir_bytes);
+            let digest_info = hasher.finalize_digest();
+            **digest_info.packed_hash()
+        },
+        dir_bytes.len() as u64,
+    );
+
+    // Create CAS store with directory proto.
+    let cas_store_inner = MemoryStore::new(&MemorySpec::default());
+    let cas_store = Store::new(cas_store_inner);
+    let key: nativelink_util::store_trait::StoreKey<'_> = input_root_digest.into();
+    cas_store
+        .update_oneshot(key, Bytes::from(dir_bytes))
+        .await?;
+
+    // Only worker B has file_digest1 (8000 bytes). Worker A has nothing.
+    let locality_map = new_shared_blob_locality_map();
+    {
+        let mut map = locality_map.write();
+        map.register_blobs(cas_endpoint_b, &[file_digest1]);
+    }
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        Some(cas_store),
+        Some(locality_map),
+    );
+
+    let action_digest = DigestInfo::new([99u8; 32], 512);
+
+    let mut rx_a = setup_new_worker_with_cas_endpoint(
+        &scheduler,
+        worker_id_a.clone(),
+        PlatformProperties::default(),
+        cas_endpoint_a,
+    )
+    .await?;
+    let mut rx_b = setup_new_worker_with_cas_endpoint(
+        &scheduler,
+        worker_id_b.clone(),
+        PlatformProperties::default(),
+        cas_endpoint_b,
+    )
+    .await?;
+
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener = setup_action_with_input_root(
+        &scheduler,
+        action_digest,
+        input_root_digest,
+        HashMap::new(),
+        insert_timestamp,
+    )
+    .await?;
+
+    // Worker B should be selected (8000 cached bytes vs. 0 for worker A).
+    let (selected_worker_id, _se) = tokio::select! {
+        msg = rx_a.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_a, got: {v:?}"),
+            };
+            (worker_id_a.clone(), se)
+        }
+        msg = rx_b.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_b, got: {v:?}"),
+            };
+            (worker_id_b.clone(), se)
+        }
+    };
+
+    assert_eq!(
+        selected_worker_id, worker_id_b,
+        "Locality scoring should select worker_b (8000 cached bytes vs. worker_a's 0)"
+    );
+
+    assert_eq!(
+        action_listener.changed().await.unwrap().0.stage,
+        ActionStage::Executing
+    );
+
+    Ok(())
+}
+
+// ---------------------------------------------------------------
+// CPU-load-aware scheduling tests
+// ---------------------------------------------------------------
+
+#[nativelink_test]
+async fn cpu_load_update_worker_load_stores_correctly() -> Result<(), Error> {
+    // Verify that update_worker_load stores the load on the worker and
+    // influences scheduling. We set load on a single worker, submit an
+    // action, and confirm the worker still receives it (proving the
+    // update didn't break anything and the worker is still viable).
+    let worker_id = WorkerId("worker_load_test".to_string());
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        None, // cas_store
+        None, // locality_map
+    );
+
+    let mut rx = setup_new_worker(
+        &scheduler,
+        worker_id.clone(),
+        PlatformProperties::default(),
+    )
+    .await?;
+
+    // Update the worker's CPU load.
+    scheduler.update_worker_load(&worker_id, 42).await?;
+
+    // Submit an action — the single worker should still be selected.
+    let action_digest = DigestInfo::new([10u8; 32], 256);
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener =
+        setup_action(&scheduler, action_digest, HashMap::new(), insert_timestamp).await?;
+
+    // Worker should receive the action.
+    let (_op_id, _se) = recv_start_execute(&mut rx).await;
+
+    assert_eq!(
+        action_listener.changed().await.unwrap().0.stage,
+        ActionStage::Executing
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn cpu_load_lightest_loaded_worker_gets_picked() -> Result<(), Error> {
+    // Create 3 workers with different cpu_load_pct values.
+    // Worker A=80, Worker B=20, Worker C=50.
+    // Worker B (lightest load) should be selected for the action.
+    let worker_id_a = WorkerId("worker_a".to_string());
+    let worker_id_b = WorkerId("worker_b".to_string());
+    let worker_id_c = WorkerId("worker_c".to_string());
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        None, // cas_store
+        None, // locality_map
+    );
+
+    // Add all 3 workers (no queued actions yet, so no matching happens).
+    let mut rx_a = setup_new_worker(
+        &scheduler,
+        worker_id_a.clone(),
+        PlatformProperties::default(),
+    )
+    .await?;
+    let mut rx_b = setup_new_worker(
+        &scheduler,
+        worker_id_b.clone(),
+        PlatformProperties::default(),
+    )
+    .await?;
+    let mut rx_c = setup_new_worker(
+        &scheduler,
+        worker_id_c.clone(),
+        PlatformProperties::default(),
+    )
+    .await?;
+
+    // Set CPU loads: A=80, B=20, C=50.
+    scheduler.update_worker_load(&worker_id_a, 80).await?;
+    scheduler.update_worker_load(&worker_id_b, 20).await?;
+    scheduler.update_worker_load(&worker_id_c, 50).await?;
+
+    // Submit an action.
+    let action_digest = DigestInfo::new([20u8; 32], 512);
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener =
+        setup_action(&scheduler, action_digest, HashMap::new(), insert_timestamp).await?;
+
+    // Determine which worker received the action.
+    let (selected_worker_id, _se) = tokio::select! {
+        msg = rx_a.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_a, got: {v:?}"),
+            };
+            (worker_id_a.clone(), se)
+        }
+        msg = rx_b.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_b, got: {v:?}"),
+            };
+            (worker_id_b.clone(), se)
+        }
+        msg = rx_c.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_c, got: {v:?}"),
+            };
+            (worker_id_c.clone(), se)
+        }
+    };
+
+    assert_eq!(
+        selected_worker_id, worker_id_b,
+        "Worker B (cpu_load_pct=20) should be selected as lightest-loaded"
+    );
+
+    assert_eq!(
+        action_listener.changed().await.unwrap().0.stage,
+        ActionStage::Executing
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn cpu_load_unknown_zero_sorted_last() -> Result<(), Error> {
+    // Create 2 workers: one with cpu_load_pct=60 (known) and one with
+    // cpu_load_pct=0 (unknown). The worker with known load should be
+    // selected over the unknown one, even though 0 < 60 numerically.
+    let worker_id_known = WorkerId("worker_known".to_string());
+    let worker_id_unknown = WorkerId("worker_unknown".to_string());
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        None, // cas_store
+        None, // locality_map
+    );
+
+    let mut rx_known = setup_new_worker(
+        &scheduler,
+        worker_id_known.clone(),
+        PlatformProperties::default(),
+    )
+    .await?;
+    let mut rx_unknown = setup_new_worker(
+        &scheduler,
+        worker_id_unknown.clone(),
+        PlatformProperties::default(),
+    )
+    .await?;
+
+    // Set only one worker's load; the other stays at default 0 (unknown).
+    scheduler.update_worker_load(&worker_id_known, 60).await?;
+    // worker_unknown stays at cpu_load_pct=0.
+
+    // Submit an action.
+    let action_digest = DigestInfo::new([30u8; 32], 512);
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener =
+        setup_action(&scheduler, action_digest, HashMap::new(), insert_timestamp).await?;
+
+    // Determine which worker received the action.
+    let (selected_worker_id, _se) = tokio::select! {
+        msg = rx_known.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_known, got: {v:?}"),
+            };
+            (worker_id_known.clone(), se)
+        }
+        msg = rx_unknown.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_unknown, got: {v:?}"),
+            };
+            (worker_id_unknown.clone(), se)
+        }
+    };
+
+    assert_eq!(
+        selected_worker_id, worker_id_known,
+        "Worker with known load (60) should be preferred over unknown (0)"
+    );
+
+    assert_eq!(
+        action_listener.changed().await.unwrap().0.stage,
+        ActionStage::Executing
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn cpu_load_falls_back_to_lru_when_no_load_data() -> Result<(), Error> {
+    // Create 2 workers with cpu_load_pct=0 on both (no load data).
+    // Scheduling should still work via LRU/MRU fallback.
+    let worker_id_1 = WorkerId("worker_1".to_string());
+    let worker_id_2 = WorkerId("worker_2".to_string());
+
+    let task_change_notify = Arc::new(Notify::new());
+    let (scheduler, _worker_scheduler) = SimpleScheduler::new_with_callback(
+        &SimpleSpec::default(),
+        memory_awaited_action_db_factory(
+            0,
+            &task_change_notify.clone(),
+            MockInstantWrapped::default,
+        ),
+        || async move {},
+        task_change_notify,
+        MockInstantWrapped::default,
+        None,
+        None, // cas_store
+        None, // locality_map
+    );
+
+    // Add both workers (both have cpu_load_pct=0 by default).
+    let mut rx_1 = setup_new_worker(
+        &scheduler,
+        worker_id_1.clone(),
+        PlatformProperties::default(),
+    )
+    .await?;
+    let mut rx_2 = setup_new_worker(
+        &scheduler,
+        worker_id_2.clone(),
+        PlatformProperties::default(),
+    )
+    .await?;
+
+    // Neither worker has load data — cpu_load_pct stays at 0.
+
+    // Submit an action. It should be assigned to one of the workers
+    // via LRU fallback (the first in LRU order).
+    let action_digest = DigestInfo::new([40u8; 32], 512);
+    let insert_timestamp = make_system_time(1);
+    let mut action_listener =
+        setup_action(&scheduler, action_digest, HashMap::new(), insert_timestamp).await?;
+
+    // Either worker is acceptable — just verify one was selected.
+    let (selected_worker_id, _se) = tokio::select! {
+        msg = rx_1.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_1, got: {v:?}"),
+            };
+            (worker_id_1.clone(), se)
+        }
+        msg = rx_2.recv() => {
+            let se = match msg.unwrap().update {
+                Some(update_for_worker::Update::StartAction(se)) => se,
+                v => panic!("Expected StartAction on worker_2, got: {v:?}"),
+            };
+            (worker_id_2.clone(), se)
+        }
+    };
+
+    // Verify a worker was actually selected (the assert_eq on stage below
+    // also proves this, but let's be explicit).
+    assert!(
+        selected_worker_id == worker_id_1 || selected_worker_id == worker_id_2,
+        "One of the workers should have been selected via LRU fallback"
+    );
+
+    assert_eq!(
+        action_listener.changed().await.unwrap().0.stage,
+        ActionStage::Executing
+    );
+
+    Ok(())
+}
diff --git a/nativelink-scheduler/tests/utils/scheduler_utils.rs b/nativelink-scheduler/tests/utils/scheduler_utils.rs
index 7492efe6e..f7986f985 100644
--- a/nativelink-scheduler/tests/utils/scheduler_utils.rs
+++ b/nativelink-scheduler/tests/utils/scheduler_utils.rs
@@ -143,5 +143,11 @@ pub(crate) fn update_eq(
             }
             _ => false,
         },
+        update_for_worker::Update::TouchBlobs(actual_update) => match expected_update {
+            update_for_worker::Update::TouchBlobs(expected_update) => {
+                expected_update == actual_update
+            }
+            _ => false,
+        },
     }
 }
diff --git a/nativelink-service/Cargo.toml b/nativelink-service/Cargo.toml
index 3f14715d1..0352fad23 100644
--- a/nativelink-service/Cargo.toml
+++ b/nativelink-service/Cargo.toml
@@ -20,14 +20,14 @@ bytes = { version = "1.10.1", default-features = false }
 futures = { version = "0.3.31", default-features = false }
 http-body-util = { version = "0.1.3", default-features = false }
 hyper = { version = "1.6.0", default-features = false }
-opentelemetry = { version = "0.29.1", default-features = false }
-opentelemetry-semantic-conventions = { version = "0.29.0", default-features = false, features = [
+opentelemetry = { version = "0.31.0", default-features = false }
+opentelemetry-semantic-conventions = { version = "0.31.0", default-features = false, features = [
   "default",
   "semconv_experimental",
 ] }
 parking_lot = { version = "0.12.3", default-features = false }
-prost = { version = "0.13.5", default-features = false }
-prost-types = { version = "0.13.5", default-features = false, features = [
+prost = { version = "0.14.3", default-features = false }
+prost-types = { version = "0.14.3", default-features = false, features = [
   "std",
 ] }
 rand = { version = "0.9.0", default-features = false, features = [
@@ -43,11 +43,12 @@ tokio = { version = "1.44.1", features = [
 tokio-stream = { version = "0.1.17", features = [
   "fs",
 ], default-features = false }
-tonic = { version = "0.13.0", features = [
+tonic = { version = "0.14.5", features = [
   "gzip",
   "router",
-  "tls-ring",
+  "tls-aws-lc",
   "transport",
+  "zstd",
 ], default-features = false }
 tower = { version = "0.5.2", default-features = false }
 tracing = { version = "0.1.41", default-features = false }
@@ -67,11 +68,12 @@ hyper-util = { version = "0.1.11", default-features = false }
 pretty_assertions = { version = "1.4.1", features = [
   "std",
 ], default-features = false }
-prost-types = { version = "0.13.5", default-features = false }
+prost-types = { version = "0.14.3", default-features = false }
+tonic-prost = { version = "0.14.5", default-features = false }
 serde_json = { version = "1.0.140", default-features = false, features = [
   "std",
 ] }
-sha2 = { version = "0.10.8", default-features = false }
+sha2 = { version = "0.10.8", default-features = false, features = ["asm"] }
 tracing-test = { version = "0.2.5", default-features = false, features = [
   "no-env-filter",
 ] }
diff --git a/nativelink-service/src/ac_server.rs b/nativelink-service/src/ac_server.rs
index 29db64d14..b9e190aef 100644
--- a/nativelink-service/src/ac_server.rs
+++ b/nativelink-service/src/ac_server.rs
@@ -30,11 +30,13 @@ use nativelink_store::grpc_store::GrpcStore;
 use nativelink_store::store_manager::StoreManager;
 use nativelink_util::common::DigestInfo;
 use nativelink_util::digest_hasher::make_ctx_for_hash_func;
+use nativelink_util::log_utils::throughput_mbps;
+use nativelink_util::stall_detector::StallGuard;
 use nativelink_util::store_trait::{Store, StoreLike};
 use opentelemetry::context::FutureExt;
 use prost::Message;
 use tonic::{Request, Response, Status};
-use tracing::{Instrument, Level, error, error_span, instrument};
+use tracing::{Instrument, Level, error, error_span, info, instrument};
 
 #[derive(Debug, Clone)]
 pub struct AcStoreInfo {
@@ -104,9 +106,21 @@ impl AcServer {
             return grpc_store.get_action_result(Request::new(request)).await;
         }
 
+        let get_start = std::time::Instant::now();
         let res = get_and_decode_digest::<ActionResult>(&store_info.store, digest.into()).await;
         match res {
-            Ok(action_result) => Ok(Response::new(action_result)),
+            Ok(action_result) => {
+                let elapsed = get_start.elapsed();
+                let size_bytes = action_result.encoded_len() as u64;
+                info!(
+                    ?digest,
+                    size_bytes,
+                    elapsed_ms = elapsed.as_millis() as u64,
+                    throughput_mbps = format!("{:.1}", throughput_mbps(size_bytes, elapsed)),
+                    "AC read completed",
+                );
+                Ok(Response::new(action_result))
+            }
             Err(mut e) => {
                 if e.code == Code::NotFound {
                     // `get_action_result` is frequent to get NotFound errors, so remove all
@@ -158,11 +172,35 @@ impl AcServer {
             .encode(&mut store_data)
             .err_tip(|| "Provided ActionResult could not be serialized")?;
 
-        store_info
+        let size_bytes = store_data.len() as u64;
+        let start = std::time::Instant::now();
+        let result = store_info
             .store
             .update_oneshot(digest, store_data.freeze())
             .await
-            .err_tip(|| "Failed to update in action cache")?;
+            .err_tip(|| "Failed to update in action cache");
+        let elapsed = start.elapsed();
+        match &result {
+            Ok(()) => {
+                info!(
+                    ?digest,
+                    size_bytes,
+                    elapsed_ms = elapsed.as_millis() as u64,
+                    throughput_mbps = format!("{:.1}", throughput_mbps(size_bytes, elapsed)),
+                    "AC write completed",
+                );
+            }
+            Err(e) => {
+                error!(
+                    ?digest,
+                    size_bytes,
+                    elapsed_ms = elapsed.as_millis() as u64,
+                    ?e,
+                    "AC write failed",
+                );
+            }
+        }
+        result?;
         Ok(Response::new(action_result))
     }
 }
@@ -181,6 +219,10 @@ impl ActionCache for AcServer {
     ) -> Result<Response<ActionResult>, Status> {
         let request = grpc_request.into_inner();
         let digest_function = request.digest_function;
+        let _stall_guard = StallGuard::new(
+            nativelink_util::stall_detector::DEFAULT_STALL_THRESHOLD,
+            "AC::get_action_result",
+        );
         let result = self
             .inner_get_action_result(request)
             .instrument(error_span!("ac_server_get_action_result"))
@@ -201,7 +243,7 @@ impl ActionCache for AcServer {
 
     #[instrument(
         err,
-        ret(level = Level::INFO),
+        ret(level = Level::DEBUG),
         level = Level::ERROR,
         skip_all,
         fields(request = ?grpc_request.get_ref())
@@ -212,6 +254,10 @@ impl ActionCache for AcServer {
     ) -> Result<Response<ActionResult>, Status> {
         let request = grpc_request.into_inner();
         let digest_function = request.digest_function;
+        let _stall_guard = StallGuard::new(
+            nativelink_util::stall_detector::DEFAULT_STALL_THRESHOLD,
+            "AC::update_action_result",
+        );
         self.inner_update_action_result(request)
             .instrument(error_span!("ac_server_update_action_result"))
             .with_context(
diff --git a/nativelink-service/src/bytestream_server.rs b/nativelink-service/src/bytestream_server.rs
index d47b3cd9e..403ce7e98 100644
--- a/nativelink-service/src/bytestream_server.rs
+++ b/nativelink-service/src/bytestream_server.rs
@@ -16,6 +16,7 @@ use core::convert::Into;
 use core::fmt::{Debug, Formatter};
 use core::pin::Pin;
 use core::sync::atomic::{AtomicU64, Ordering};
+use core::task::{Context, Poll};
 use core::time::Duration;
 use std::collections::HashMap;
 use std::collections::hash_map::Entry;
@@ -41,16 +42,18 @@ use nativelink_proto::google::bytestream::{
 use nativelink_store::grpc_store::GrpcStore;
 use nativelink_store::store_manager::StoreManager;
 use nativelink_util::buf_channel::{
-    DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair,
+    DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair_with_size,
 };
 use nativelink_util::common::DigestInfo;
+use nativelink_util::log_utils::throughput_mbps;
+use nativelink_util::stall_detector::StallGuard;
 use nativelink_util::digest_hasher::{
     DigestHasherFunc, default_digest_hasher_func, make_ctx_for_hash_func,
 };
 use nativelink_util::proto_stream_utils::WriteRequestStreamWrapper;
 use nativelink_util::resource_info::ResourceInfo;
 use nativelink_util::spawn;
-use nativelink_util::store_trait::{Store, StoreLike, StoreOptimizations, UploadSizeInfo};
+use nativelink_util::store_trait::{IS_WORKER_REQUEST, Store, StoreLike, StoreOptimizations, UploadSizeInfo};
 use nativelink_util::task::JoinHandleDropGuard;
 use opentelemetry::context::FutureExt;
 use parking_lot::Mutex;
@@ -62,7 +65,7 @@ use tracing::{Instrument, Level, debug, error, error_span, info, instrument, tra
 const DEFAULT_PERSIST_STREAM_ON_DISCONNECT_TIMEOUT: Duration = Duration::from_secs(60);
 
 /// If this value changes update the documentation in the config definition.
-const DEFAULT_MAX_BYTES_PER_STREAM: usize = 64 * 1024;
+const DEFAULT_MAX_BYTES_PER_STREAM: usize = 3 * 1024 * 1024;
 
 /// Metrics for `ByteStream` server operations.
 /// Tracks upload/download activity, throughput, and latency.
@@ -271,6 +274,75 @@ impl Debug for InstanceInfo {
 type ReadStream = Pin<Box<dyn Stream<Item = Result<ReadResponse, Status>> + Send + 'static>>;
 type StoreUpdateFuture = Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'static>>;
 
+/// Wrapper around a `ReadStream` that logs total bytes and elapsed time when
+/// the stream completes (yields `None`) or is dropped before completion.
+struct LoggingReadStream {
+    inner: ReadStream,
+    start_time: Instant,
+    digest: DigestInfo,
+    expected_size: u64,
+    bytes_sent: u64,
+    completed: bool,
+}
+
+impl LoggingReadStream {
+    fn new(inner: ReadStream, start_time: Instant, digest: DigestInfo, expected_size: u64) -> Self {
+        Self {
+            inner,
+            start_time,
+            digest,
+            expected_size,
+            bytes_sent: 0,
+            completed: false,
+        }
+    }
+
+    fn log_completion(&self, status: &str) {
+        let elapsed = self.start_time.elapsed();
+        let elapsed_ms = elapsed.as_millis() as u64;
+        debug!(
+            digest = %self.digest,
+            expected_size = self.expected_size,
+            bytes_sent = self.bytes_sent,
+            elapsed_ms,
+            throughput_mbps = %throughput_mbps(self.bytes_sent, elapsed),
+            status,
+            "ByteStream::read: CAS read completed",
+        );
+    }
+}
+
+impl Stream for LoggingReadStream {
+    type Item = Result<ReadResponse, Status>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        let result = self.inner.as_mut().poll_next(cx);
+        match &result {
+            Poll::Ready(Some(Ok(response))) => {
+                self.bytes_sent += response.data.len() as u64;
+            }
+            Poll::Ready(None) => {
+                self.completed = true;
+                self.log_completion("ok");
+            }
+            Poll::Ready(Some(Err(_))) => {
+                self.completed = true;
+                self.log_completion("error");
+            }
+            Poll::Pending => {}
+        }
+        result
+    }
+}
+
+impl Drop for LoggingReadStream {
+    fn drop(&mut self) {
+        if !self.completed {
+            self.log_completion("dropped");
+        }
+    }
+}
+
 struct StreamState {
     uuid: UuidKey,
     tx: DropCloserWriteHalf,
@@ -402,6 +474,15 @@ impl ByteStreamServer {
         let max_bytes_per_stream = if config.max_bytes_per_stream == 0 {
             DEFAULT_MAX_BYTES_PER_STREAM
         } else {
+            if config.max_bytes_per_stream > 4 * 1024 * 1024 {
+                warn!(
+                    configured = config.max_bytes_per_stream,
+                    default = DEFAULT_MAX_BYTES_PER_STREAM,
+                    "max_bytes_per_stream exceeds 4 MiB; Bazel and other REAPI clients \
+                     typically have a 4 MiB gRPC inbound message limit and will reject \
+                     oversized ByteStream.Read chunks with RESOURCE_EXHAUSTED"
+                );
+            }
             config.max_bytes_per_stream
         };
 
@@ -494,8 +575,18 @@ impl ByteStreamServer {
         // Parse UUID string to u128 key for efficient HashMap operations
         let uuid_key = parse_uuid_to_key(uuid_str);
 
-        let (uuid, bytes_received, is_collision) =
-            match instance.active_uploads.lock().entry(uuid_key) {
+        // We handle the three cases in two phases to avoid holding the
+        // mutex guard across a second .lock() call (which would deadlock
+        // on parking_lot::Mutex since it is not reentrant).
+        enum UploadAction {
+            Resume(Box<ActiveStreamGuard>),
+            New(u128, Arc<AtomicU64>),
+            Collision(u128),
+        }
+
+        let action = {
+            let mut active_uploads = instance.active_uploads.lock();
+            match active_uploads.entry(uuid_key) {
                 Entry::Occupied(mut entry) => {
                     let maybe_idle_stream = entry.get_mut();
                     if let Some(idle_stream) = maybe_idle_stream.1.take() {
@@ -510,34 +601,41 @@ impl ByteStreamServer {
                             .metrics
                             .resumed_uploads
                             .fetch_add(1, Ordering::Relaxed);
-                        return idle_stream.into_active_stream(bytes_received, instance);
+                        UploadAction::Resume(Box::new(
+                            idle_stream.into_active_stream(bytes_received, instance),
+                        ))
+                    } else {
+                        // Case 3: Stream is active - generate a unique UUID to avoid collision
+                        let original_key = *entry.key();
+                        let unique_key = Self::generate_unique_uuid_key(original_key);
+                        warn!(
+                            msg = "UUID collision detected, generating unique UUID to prevent conflict",
+                            original_uuid = format!("{:032x}", original_key),
+                            unique_uuid = format!("{:032x}", unique_key)
+                        );
+                        UploadAction::Collision(unique_key)
                     }
-                    // Case 3: Stream is active - generate a unique UUID to avoid collision
-                    // Using nanosecond timestamp makes collision probability essentially zero
-                    let original_key = *entry.key();
-                    let unique_key = Self::generate_unique_uuid_key(original_key);
-                    warn!(
-                        msg = "UUID collision detected, generating unique UUID to prevent conflict",
-                        original_uuid = format!("{:032x}", original_key),
-                        unique_uuid = format!("{:032x}", unique_key)
-                    );
-                    // Entry goes out of scope here, releasing the lock
-
-                    let bytes_received = Arc::new(AtomicU64::new(0));
-                    let mut active_uploads = instance.active_uploads.lock();
-                    // Insert with the unique UUID - this should never collide due to nanosecond precision
-                    active_uploads.insert(unique_key, (bytes_received.clone(), None));
-                    (unique_key, bytes_received, true)
                 }
                 Entry::Vacant(entry) => {
                     // Case 1: UUID doesn't exist, create new stream
                     let bytes_received = Arc::new(AtomicU64::new(0));
                     let uuid = *entry.key();
-                    // Our stream is "in use" if the key is in the map, but the value is None.
                     entry.insert((bytes_received.clone(), None));
-                    (uuid, bytes_received, false)
+                    UploadAction::New(uuid, bytes_received)
                 }
-            };
+            }
+        }; // First lock guard dropped here.
+
+        let (uuid, bytes_received, is_collision) = match action {
+            UploadAction::Resume(guard) => return *guard,
+            UploadAction::New(uuid, bytes_received) => (uuid, bytes_received, false),
+            UploadAction::Collision(unique_key) => {
+                let bytes_received = Arc::new(AtomicU64::new(0));
+                let mut active_uploads = instance.active_uploads.lock();
+                active_uploads.insert(unique_key, (bytes_received.clone(), None));
+                (unique_key, bytes_received, true)
+            }
+        };
 
         // Track metrics for new upload
         instance
@@ -555,7 +653,9 @@ impl ByteStreamServer {
         // removing the entry from the map, otherwise that UUID becomes
         // unusable.
 
-        let (tx, rx) = make_buf_channel_pair();
+        // Use a larger buffer (256 slots = ~64MiB at 256KiB chunks) to sustain
+        // high-throughput streaming at 10Gbps+ without backpressure stalls.
+        let (tx, rx) = make_buf_channel_pair_with_size(256);
         let store = instance.store.clone();
         let store_update_fut = Box::pin(async move {
             // We need to wrap `Store::update()` in a another future because we need to capture
@@ -582,6 +682,7 @@ impl ByteStreamServer {
         instance: &InstanceInfo,
         digest: DigestInfo,
         read_request: ReadRequest,
+        is_worker: bool,
     ) -> Result<impl Stream<Item = Result<ReadResponse, Status>> + Send + use<>, Error> {
         struct ReaderState {
             max_bytes_per_stream: usize,
@@ -593,7 +694,9 @@ impl ByteStreamServer {
         let read_limit = u64::try_from(read_request.read_limit)
             .err_tip(|| "Could not convert read_limit to u64")?;
 
-        let (tx, rx) = make_buf_channel_pair();
+        // Use a larger buffer (256 slots = ~64MiB at 256KiB chunks) to sustain
+        // high-throughput streaming at 10Gbps+ without backpressure stalls.
+        let (tx, rx) = make_buf_channel_pair_with_size(256);
 
         let read_limit = if read_limit != 0 {
             Some(read_limit)
@@ -608,14 +711,21 @@ impl ByteStreamServer {
             max_bytes_per_stream: instance.max_bytes_per_stream,
             maybe_get_part_result: None,
             get_part_fut: Box::pin(async move {
-                store
-                    .get_part(
-                        digest,
-                        tx,
-                        u64::try_from(read_request.read_offset)
-                            .err_tip(|| "Could not convert read_offset to u64")?,
-                        read_limit,
-                    )
+                // Propagate the worker/non-worker distinction into the store
+                // layer so WorkerProxyStore can decide whether to proxy or
+                // redirect.
+                IS_WORKER_REQUEST
+                    .scope(is_worker, async {
+                        store
+                            .get_part(
+                                digest,
+                                tx,
+                                u64::try_from(read_request.read_offset)
+                                    .err_tip(|| "Could not convert read_offset to u64")?,
+                                read_limit,
+                            )
+                            .await
+                    })
                     .await
             }),
         });
@@ -762,8 +872,14 @@ impl ByteStreamServer {
                     )
                 } else {
                     if write_offset != tx.get_bytes_written() {
-                        return Err(make_input_err!(
-                            "Received out of order data. Got {}, expected {}",
+                        // The client is trying to resume at an offset we
+                        // don't have (e.g. the idle stream was swept).
+                        // Return UNAVAILABLE so the client retries with
+                        // QueryWriteStatus → committed_size=0 → restart.
+                        return Err(make_err!(
+                            Code::Unavailable,
+                            "Received out of order data (write_offset {} but server has {}). \
+                             Partial upload state was lost; retry from committed offset.",
                             write_offset,
                             tx.get_bytes_written()
                         ));
@@ -785,6 +901,17 @@ impl ByteStreamServer {
                     return Err(make_input_err!("Received more bytes than expected"));
                 }
                 if write_request.finish_write {
+                    // Validate that we received the expected number of bytes
+                    // before accepting the upload. The stream wrapper only
+                    // validates on a *subsequent* poll_next after finish_write,
+                    // which we never perform, so check here explicitly.
+                    if tx.get_bytes_written() != expected_size {
+                        return Err(make_input_err!(
+                            "Client declared size {} but only sent {} bytes",
+                            expected_size,
+                            tx.get_bytes_written()
+                        ));
+                    }
                     // Gracefully close our stream.
                     tx.send_eof()
                         .err_tip(|| "Failed to send EOF in ByteStream::write")?;
@@ -879,8 +1006,10 @@ impl ByteStreamServer {
                     .slice(usize::try_from(bytes_received - write_offset).unwrap_or(usize::MAX)..)
             } else {
                 if write_offset != bytes_received {
-                    return Err(make_input_err!(
-                        "Received out of order data. Got {}, expected {}",
+                    return Err(make_err!(
+                        Code::Unavailable,
+                        "Received out of order data (write_offset {} but server has {}). \
+                         Partial upload state was lost; retry from committed offset.",
                         write_offset,
                         bytes_received
                     ));
@@ -898,6 +1027,15 @@ impl ByteStreamServer {
             }
 
             if write_request.finish_write {
+                // Validate that we received the expected number of bytes
+                // before accepting the upload.
+                if bytes_received != expected_size {
+                    return Err(make_input_err!(
+                        "Client declared size {} but only sent {} bytes",
+                        expected_size,
+                        bytes_received
+                    ));
+                }
                 break;
             }
         }
@@ -994,6 +1132,9 @@ impl ByteStream for ByteStreamServer {
     ) -> Result<Response<Self::ReadStream>, Status> {
         let start_time = Instant::now();
 
+        let is_worker = grpc_request
+            .metadata()
+            .contains_key("x-nativelink-worker");
         let read_request = grpc_request.into_inner();
         let resource_info = ResourceInfo::new(&read_request.resource_name, false)?;
         let instance_name = resource_info.instance_name.as_ref();
@@ -1024,15 +1165,31 @@ impl ByteStream for ByteStreamServer {
             DigestHasherFunc::try_from,
         )?;
 
+        // Covers stream setup only (inner_read returns a Stream).
+        // Actual data transfer stalls are not covered by this guard.
+        let _stall_guard = StallGuard::new(
+            nativelink_util::stall_detector::DEFAULT_STALL_THRESHOLD,
+            "ByteStream::read",
+        );
         let resp = self
-            .inner_read(instance, digest, read_request)
+            .inner_read(instance, digest, read_request, is_worker)
             .instrument(error_span!("bytestream_read"))
             .with_context(
                 make_ctx_for_hash_func(digest_function).err_tip(|| "In BytestreamServer::read")?,
             )
             .await
             .err_tip(|| "In ByteStreamServer::read")
-            .map(|stream| -> Response<Self::ReadStream> { Response::new(Box::pin(stream)) });
+            .map(|stream| -> Response<Self::ReadStream> {
+                // Wrap in LoggingReadStream to log when the client finishes
+                // consuming all data (or drops the stream early).
+                let logging = LoggingReadStream::new(
+                    Box::pin(stream),
+                    start_time,
+                    digest,
+                    expected_size,
+                );
+                Response::new(Box::pin(logging))
+            });
 
         // Track metrics based on result
         #[allow(clippy::cast_possible_truncation)]
@@ -1044,6 +1201,12 @@ impl ByteStream for ByteStreamServer {
 
         match &resp {
             Ok(_) => {
+                debug!(
+                    %digest,
+                    size_bytes = expected_size,
+                    elapsed_ms = start_time.elapsed().as_millis() as u64,
+                    "ByteStream::read: CAS read stream created",
+                );
                 instance
                     .metrics
                     .read_requests_success
@@ -1052,9 +1215,15 @@ impl ByteStream for ByteStreamServer {
                     .metrics
                     .bytes_read_total
                     .fetch_add(expected_size, Ordering::Relaxed);
-                debug!(return = "Ok(<stream>)");
             }
-            Err(_) => {
+            Err(e) => {
+                error!(
+                    %digest,
+                    size_bytes = expected_size,
+                    elapsed_ms = start_time.elapsed().as_millis() as u64,
+                    ?e,
+                    "ByteStream::read: failed",
+                );
                 instance
                     .metrics
                     .read_requests_failure
@@ -1149,6 +1318,18 @@ impl ByteStream for ByteStreamServer {
             false
         };
 
+        let oneshot = use_oneshot;
+        debug!(
+            %digest,
+            expected_size,
+            oneshot,
+            "ByteStream::write: starting upload",
+        );
+
+        let _stall_guard = StallGuard::new(
+            nativelink_util::stall_detector::DEFAULT_STALL_THRESHOLD,
+            "ByteStream::write",
+        );
         let result = if use_oneshot {
             self.inner_write_oneshot(instance, digest, stream)
                 .instrument(error_span!("bytestream_write_oneshot"))
@@ -1179,6 +1360,15 @@ impl ByteStream for ByteStreamServer {
 
         match &result {
             Ok(_) => {
+                let elapsed = start_time.elapsed();
+                debug!(
+                    %digest,
+                    size_bytes = expected_size,
+                    elapsed_ms = elapsed.as_millis() as u64,
+                    throughput_mbps = format!("{:.1}", throughput_mbps(expected_size, elapsed)),
+                    oneshot,
+                    "ByteStream::write: CAS write completed",
+                );
                 instance
                     .metrics
                     .write_requests_success
@@ -1188,7 +1378,15 @@ impl ByteStream for ByteStreamServer {
                     .bytes_written_total
                     .fetch_add(expected_size, Ordering::Relaxed);
             }
-            Err(_) => {
+            Err(e) => {
+                error!(
+                    %digest,
+                    expected_size,
+                    elapsed_ms = start_time.elapsed().as_millis() as u64,
+                    oneshot,
+                    ?e,
+                    "ByteStream::write: upload failed",
+                );
                 instance
                     .metrics
                     .write_requests_failure
diff --git a/nativelink-service/src/capabilities_server.rs b/nativelink-service/src/capabilities_server.rs
index e7058baec..11accd4e3 100644
--- a/nativelink-service/src/capabilities_server.rs
+++ b/nativelink-service/src/capabilities_server.rs
@@ -33,7 +33,9 @@ use nativelink_util::operation_state_manager::ClientStateManager;
 use tonic::{Request, Response, Status};
 use tracing::{Level, instrument, warn};
 
-const MAX_BATCH_TOTAL_SIZE: i64 = 64 * 1024;
+// Must leave headroom below Bazel's 4 MiB client-side gRPC inbound limit
+// so that BatchReadBlobs responses (blob data + protobuf framing) fit.
+const MAX_BATCH_TOTAL_SIZE: i64 = 3 * 1024 * 1024 + 512 * 1024; // 3.5 MiB
 
 #[derive(Debug, Default)]
 pub struct CapabilitiesServer {
diff --git a/nativelink-service/src/cas_server.rs b/nativelink-service/src/cas_server.rs
index 7e0f5f437..fa2f4afb6 100644
--- a/nativelink-service/src/cas_server.rs
+++ b/nativelink-service/src/cas_server.rs
@@ -14,7 +14,7 @@
 
 use core::convert::Into;
 use core::pin::Pin;
-use std::collections::{HashMap, VecDeque};
+use std::collections::{HashMap, HashSet, VecDeque};
 
 use bytes::Bytes;
 use futures::stream::{FuturesUnordered, Stream};
@@ -36,10 +36,13 @@ use nativelink_store::grpc_store::GrpcStore;
 use nativelink_store::store_manager::StoreManager;
 use nativelink_util::common::DigestInfo;
 use nativelink_util::digest_hasher::make_ctx_for_hash_func;
-use nativelink_util::store_trait::{Store, StoreLike};
+use nativelink_util::log_utils::throughput_mbps;
+use nativelink_util::stall_detector::StallGuard;
+use nativelink_util::store_trait::{IS_WORKER_REQUEST, Store, StoreLike};
 use opentelemetry::context::FutureExt;
+use prost::Message;
 use tonic::{Request, Response, Status};
-use tracing::{Instrument, Level, debug, error_span, instrument};
+use tracing::{Instrument, Level, debug, error, error_span, instrument, warn};
 
 #[derive(Debug)]
 pub struct CasServer {
@@ -86,12 +89,24 @@ impl CasServer {
             .has_many(&requested_blobs)
             .await
             .err_tip(|| "In find_missing_blobs")?;
-        let missing_blob_digests = sizes
+        let missing_blob_digests: Vec<_> = sizes
             .into_iter()
             .zip(request.blob_digests)
             .filter_map(|(maybe_size, digest)| maybe_size.map_or_else(|| Some(digest), |_| None))
             .collect();
 
+        debug!(
+            requested = requested_blobs.len(),
+            missing = missing_blob_digests.len(),
+            "FindMissingBlobs",
+        );
+        if !missing_blob_digests.is_empty() {
+            debug!(
+                digests = ?missing_blob_digests.iter().map(|d| format!("{}-{}", d.hash, d.size_bytes)).collect::<Vec<_>>(),
+                "FindMissingBlobs: missing digests",
+            );
+        }
+
         Ok(Response::new(FindMissingBlobsResponse {
             missing_blob_digests,
         }))
@@ -135,10 +150,38 @@ impl CasServer {
                     size_bytes,
                     request_data.len()
                 );
+                debug!(
+                    %digest_info,
+                    size_bytes,
+                    "BatchUpdateBlobs: starting upload",
+                );
+                let upload_start = std::time::Instant::now();
                 let result = store_ref
                     .update_oneshot(digest_info, request_data)
                     .await
                     .err_tip(|| "Error writing to store");
+                match &result {
+                    Ok(()) => {
+                        let elapsed = upload_start.elapsed();
+                        debug!(
+                            %digest_info,
+                            size_bytes,
+                            elapsed_ms = elapsed.as_millis() as u64,
+                            throughput_mbps = format!("{:.1}", throughput_mbps(size_bytes as u64, elapsed)),
+                            "BatchUpdateBlobs: CAS write completed",
+                        );
+                    }
+                    Err(e) => {
+                        let elapsed = upload_start.elapsed();
+                        error!(
+                            %digest_info,
+                            size_bytes,
+                            elapsed_ms = elapsed.as_millis() as u64,
+                            ?e,
+                            "BatchUpdateBlobs: upload failed",
+                        );
+                    }
+                }
                 Ok::<_, Error>(batch_update_blobs_response::Response {
                     digest: Some(digest),
                     status: Some(result.map_or_else(Into::into, |()| GrpcStatus::default())),
@@ -178,12 +221,22 @@ impl CasServer {
             .map(|digest| async move {
                 let digest_copy = DigestInfo::try_from(digest.clone())?;
                 // TODO(palfrey) There is a security risk here of someone taking all the memory on the instance.
+                let read_start = std::time::Instant::now();
                 let result = store_ref
                     .get_part_unchunked(digest_copy, 0, None)
                     .await
                     .err_tip(|| "Error reading from store");
                 let (status, data) = result.map_or_else(
                     |mut e| {
+                        let elapsed = read_start.elapsed();
+                        if e.code != Code::NotFound {
+                            error!(
+                                %digest_copy,
+                                elapsed_ms = elapsed.as_millis() as u64,
+                                ?e,
+                                "BatchReadBlobs: CAS read failed",
+                            );
+                        }
                         if e.code == Code::NotFound {
                             // Trim the error code. Not Found is quite common and we don't want to send a large
                             // error (debug) message for something that is common. We resize to just the last
@@ -192,7 +245,18 @@ impl CasServer {
                         }
                         (e.into(), Bytes::new())
                     },
-                    |v| (GrpcStatus::default(), v),
+                    |v| {
+                        let elapsed = read_start.elapsed();
+                        let size_bytes = v.len() as u64;
+                        debug!(
+                            %digest_copy,
+                            size_bytes,
+                            elapsed_ms = elapsed.as_millis() as u64,
+                            throughput_mbps = format!("{:.1}", throughput_mbps(size_bytes, elapsed)),
+                            "BatchReadBlobs: CAS read completed",
+                        );
+                        (GrpcStatus::default(), v)
+                    },
                 );
                 Ok::<_, Error>(batch_read_blobs_response::Response {
                     status: Some(status),
@@ -231,6 +295,7 @@ impl CasServer {
                 .into_inner();
             return Ok(stream.left_stream());
         }
+        let tree_start = std::time::Instant::now();
         let root_digest: DigestInfo = request
             .root_digest
             .err_tip(|| "Expected root_digest to exist in GetTreeRequest")?
@@ -238,6 +303,15 @@ impl CasServer {
             .err_tip(|| "In GetTreeRequest::root_digest")?;
 
         let mut deque: VecDeque<DigestInfo> = VecDeque::new();
+        // Track all digests we have ever enqueued to avoid fetching/processing
+        // the same directory twice. In a Merkle tree, identical subdirectory
+        // structures share the same digest, so multiple parents at the same BFS
+        // level can reference the same child digest. Without deduplication:
+        //   1. We fetch the same blob N times concurrently (wasteful).
+        //   2. `level_results.remove()` succeeds for the first occurrence but
+        //      returns None for duplicates, causing a spurious
+        //      "Directory missing from level results" error.
+        let mut seen: HashSet<DigestInfo> = HashSet::new();
         let mut directories: Vec<Directory> = Vec::new();
         // `page_token` will return the `{hash_str}-{size_bytes}` of the current request's first directory digest.
         let page_token_digest = if request.page_token.is_empty() {
@@ -257,43 +331,169 @@ impl CasServer {
             .err_tip(|| "Failed to parse `page_token` as `Digest` in `GetTreeRequest`")?
         };
         let page_size = request.page_size;
-        // If `page_size` is 0, paging is not necessary.
+        // If `page_size` is 0, paging is not necessary — return all directories.
+        let page_size_limit = if page_size == 0 {
+            usize::MAX
+        } else {
+            usize::try_from(page_size).unwrap_or(usize::MAX)
+        };
         let mut page_token_matched = page_size == 0;
+        seen.insert(root_digest);
         deque.push_back(root_digest);
-
-        while !deque.is_empty() {
-            let digest: DigestInfo = deque.pop_front().err_tip(|| "In VecDeque::pop_front")?;
-            let directory = get_and_decode_digest::<Directory>(&store, digest.into())
-                .await
-                .err_tip(|| "Converting digest to Directory")?;
-            if digest == page_token_digest {
-                page_token_matched = true;
+        let mut page_filled = false;
+
+        // Per-level timing and dedup tracking for diagnostics.
+        let mut bfs_level: u32 = 0;
+        let mut total_duplicates_skipped: u64 = 0;
+        let mut level_timings: Vec<(u32, usize, u64, u64)> = Vec::new(); // (level, dirs_fetched, children_discovered, elapsed_ms)
+
+        while !deque.is_empty() && !page_filled {
+            let level_start = std::time::Instant::now();
+            let level: Vec<DigestInfo> = deque.drain(..).collect();
+            // Fetch all directories in this BFS level concurrently.
+            let mut futs = FuturesUnordered::new();
+            for digest in &level {
+                let store = store.clone();
+                let digest = *digest;
+                futs.push(async move {
+                    let dir = get_and_decode_digest::<Directory>(&store, digest.into())
+                        .await
+                        .err_tip(|| {
+                            format!(
+                                "Converting digest to Directory (digest: {})",
+                                digest,
+                            )
+                        })?;
+                    Ok::<_, Error>((digest, dir))
+                });
             }
-            for directory in &directory.directories {
-                let digest: DigestInfo = directory
-                    .digest
-                    .clone()
-                    .err_tip(|| "Expected Digest to exist in Directory::directories::digest")?
-                    .try_into()
-                    .err_tip(|| "In Directory::file::digest")?;
-                deque.push_back(digest);
+            // Collect results into a map so we can iterate in deterministic (discovery) order.
+            let mut level_results: HashMap<DigestInfo, Directory> =
+                HashMap::with_capacity(level.len());
+            while let Some(result) = futs.next().await {
+                let (digest, directory) = result?;
+                level_results.insert(digest, directory);
+            }
+            // Process directories in the order they appeared in the deque (BFS discovery order).
+            let mut level_new_children: u64 = 0;
+            let mut level_duplicates: u64 = 0;
+            for (i, digest) in level.iter().enumerate() {
+                let directory = level_results
+                    .get(digest)
+                    .cloned()
+                    .err_tip(|| {
+                        format!(
+                            "Directory missing from level results (digest: {}, level_size: {}, results_size: {})",
+                            digest,
+                            level.len(),
+                            level_results.len(),
+                        )
+                    })?;
+                if *digest == page_token_digest {
+                    page_token_matched = true;
+                }
+                // Always enqueue children so BFS traversal finds the page token
+                // even when it's deeper in the tree.
+                for child in &directory.directories {
+                    let child_digest: DigestInfo = child
+                        .digest
+                        .clone()
+                        .err_tip(|| {
+                            "Expected Digest to exist in Directory::directories::digest"
+                        })?
+                        .try_into()
+                        .err_tip(|| "In Directory::file::digest")?;
+                    // Only enqueue children we haven't seen before to avoid
+                    // duplicate fetches and processing.
+                    if seen.insert(child_digest) {
+                        deque.push_back(child_digest);
+                        level_new_children += 1;
+                    } else {
+                        level_duplicates += 1;
+                    }
+                }
+                if page_token_matched {
+                    directories.push(directory);
+                    if directories.len() >= page_size_limit {
+                        // Put remaining unprocessed items from this level back
+                        // into the front of the deque for the next page token.
+                        let remaining: Vec<DigestInfo> =
+                            level[i + 1..].iter().copied().collect();
+                        // Prepend remaining items before any children already in deque.
+                        for (j, rem) in remaining.into_iter().enumerate() {
+                            deque.insert(j, rem);
+                        }
+                        page_filled = true;
+                        break;
+                    }
+                }
             }
 
-            let page_size_usize = usize::try_from(page_size).unwrap_or(usize::MAX);
+            let level_elapsed_ms = level_start.elapsed().as_millis() as u64;
+            total_duplicates_skipped += level_duplicates;
 
-            if page_token_matched {
-                directories.push(directory);
-                if directories.len() == page_size_usize {
-                    break;
-                }
+            if level_duplicates > 0 {
+                debug!(
+                    ?root_digest,
+                    bfs_level,
+                    duplicates_skipped = level_duplicates,
+                    "GetTree: deduplication skipped children at this level",
+                );
+            }
+
+            debug!(
+                ?root_digest,
+                bfs_level,
+                dirs_fetched = level.len(),
+                new_children = level_new_children,
+                duplicates_skipped = level_duplicates,
+                elapsed_ms = level_elapsed_ms,
+                "GetTree: BFS level completed",
+            );
+
+            if level_elapsed_ms > 100 {
+                warn!(
+                    ?root_digest,
+                    bfs_level,
+                    dirs_fetched = level.len(),
+                    new_children = level_new_children,
+                    elapsed_ms = level_elapsed_ms,
+                    "GetTree: slow BFS level (>100ms)",
+                );
             }
+
+            level_timings.push((bfs_level, level.len(), level_new_children, level_elapsed_ms));
+            bfs_level += 1;
         }
-        // `next_page_token` will return the `{hash_str}:{size_bytes}` of the next request's first directory digest.
+        // `next_page_token` will return the `{hash_str}-{size_bytes}` of the next request's first directory digest.
         // It will be an empty string when it reached the end of the directory tree.
         let next_page_token: String = deque
             .front()
             .map_or_else(String::new, |value| format!("{value}"));
 
+        let elapsed = tree_start.elapsed();
+        let total_bytes: u64 = directories.iter().map(|d| d.encoded_len() as u64).sum();
+
+        // Build per-level timing breakdown string for the summary log.
+        let level_breakdown: String = level_timings
+            .iter()
+            .map(|(lvl, dirs, children, ms)| {
+                format!("L{lvl}:{dirs}dirs/{children}children/{ms}ms")
+            })
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        debug!(
+            ?root_digest,
+            dir_count = directories.len(),
+            total_bytes,
+            total_duplicates_skipped,
+            bfs_levels = bfs_level,
+            elapsed_ms = elapsed.as_millis() as u64,
+            level_breakdown = %level_breakdown,
+            "GetTree: resolved directory tree",
+        );
+
         Ok(futures::stream::once(async {
             Ok(GetTreeResponse {
                 directories,
@@ -350,6 +550,10 @@ impl ContentAddressableStorage for CasServer {
         let request = grpc_request.into_inner();
         let digest_function = request.digest_function;
 
+        let _stall_guard = StallGuard::new(
+            nativelink_util::stall_detector::DEFAULT_STALL_THRESHOLD,
+            "BatchUpdateBlobs",
+        );
         self.inner_batch_update_blobs(request)
             .instrument(error_span!("cas_server_batch_update_blobs"))
             .with_context(
@@ -372,14 +576,25 @@ impl ContentAddressableStorage for CasServer {
         &self,
         grpc_request: Request<BatchReadBlobsRequest>,
     ) -> Result<Response<BatchReadBlobsResponse>, Status> {
+        let is_worker = grpc_request
+            .metadata()
+            .contains_key("x-nativelink-worker");
         let request = grpc_request.into_inner();
         let digest_function = request.digest_function;
 
-        self.inner_batch_read_blobs(request)
-            .instrument(error_span!("cas_server_batch_read_blobs"))
-            .with_context(
-                make_ctx_for_hash_func(digest_function)
-                    .err_tip(|| "In CasServer::batch_read_blobs")?,
+        let _stall_guard = StallGuard::new(
+            nativelink_util::stall_detector::DEFAULT_STALL_THRESHOLD,
+            "BatchReadBlobs",
+        );
+        IS_WORKER_REQUEST
+            .scope(
+                is_worker,
+                self.inner_batch_read_blobs(request)
+                    .instrument(error_span!("cas_server_batch_read_blobs"))
+                    .with_context(
+                        make_ctx_for_hash_func(digest_function)
+                            .err_tip(|| "In CasServer::batch_read_blobs")?,
+                    ),
             )
             .await
             .err_tip(|| "Failed on batch_read_blobs() command")
diff --git a/nativelink-service/src/worker_api_server.rs b/nativelink-service/src/worker_api_server.rs
index 9b6918155..733c9a09e 100644
--- a/nativelink-service/src/worker_api_server.rs
+++ b/nativelink-service/src/worker_api_server.rs
@@ -28,8 +28,11 @@ use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::
     WorkerApi, WorkerApiServer as Server,
 };
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
-    execute_result, ExecuteComplete, ExecuteResult, GoingAwayRequest, KeepAliveRequest, UpdateForScheduler, UpdateForWorker
+    execute_result, ExecuteComplete, ExecuteResult, GoingAwayRequest, KeepAliveRequest,
+    UpdateForScheduler, UpdateForWorker,
 };
+use nativelink_util::blob_locality_map::SharedBlobLocalityMap;
+use nativelink_util::common::DigestInfo;
 use nativelink_scheduler::worker::Worker;
 use nativelink_scheduler::worker_scheduler::WorkerScheduler;
 use nativelink_util::background_spawn;
@@ -40,7 +43,7 @@ use rand::RngCore;
 use tokio::sync::mpsc;
 use tokio::time::interval;
 use tonic::{Response, Status};
-use tracing::{debug, error, warn, instrument, Level};
+use tracing::{debug, error, info, warn, instrument, Level};
 use uuid::Uuid;
 
 pub type ConnectWorkerStream =
@@ -52,6 +55,7 @@ pub struct WorkerApiServer {
     scheduler: Arc<dyn WorkerScheduler>,
     now_fn: Arc<NowFn>,
     node_id: [u8; 6],
+    locality_map: Option<SharedBlobLocalityMap>,
 }
 
 impl core::fmt::Debug for WorkerApiServer {
@@ -66,6 +70,7 @@ impl WorkerApiServer {
     pub fn new(
         config: &WorkerApiConfig,
         schedulers: &HashMap<String, Arc<dyn WorkerScheduler>>,
+        locality_map: Option<SharedBlobLocalityMap>,
     ) -> Result<Self, Error> {
         let node_id = {
             let mut out = [0; 6];
@@ -108,6 +113,7 @@ impl WorkerApiServer {
                     .map_err(|_| make_err!(Code::Internal, "System time is now behind unix epoch"))
             }),
             node_id,
+            locality_map,
         )
     }
 
@@ -118,6 +124,7 @@ impl WorkerApiServer {
         schedulers: &HashMap<String, Arc<dyn WorkerScheduler>>,
         now_fn: NowFn,
         node_id: [u8; 6],
+        locality_map: Option<SharedBlobLocalityMap>,
     ) -> Result<Self, Error> {
         let scheduler = schedulers
             .get(&config.scheduler)
@@ -132,6 +139,7 @@ impl WorkerApiServer {
             scheduler,
             now_fn: Arc::new(now_fn),
             node_id,
+            locality_map,
         })
     }
 
@@ -159,6 +167,8 @@ impl WorkerApiServer {
             ));
         };
 
+        let worker_cas_endpoint = connect_worker_request.cas_endpoint.clone();
+
         let (tx, rx) = mpsc::unbounded_channel();
 
         // First convert our proto platform properties into one our scheduler understands.
@@ -184,12 +194,13 @@ impl WorkerApiServer {
                 connect_worker_request.worker_id_prefix,
                 Uuid::now_v6(&self.node_id).hyphenated()
             ));
-            let worker = Worker::new(
+            let worker = Worker::new_with_cas_endpoint(
                 worker_id.clone(),
                 platform_properties,
                 tx,
                 (self.now_fn)()?.as_secs(),
                 connect_worker_request.max_inflight_tasks,
+                worker_cas_endpoint.clone(),
             );
             self.scheduler
                 .add_worker(worker)
@@ -202,6 +213,8 @@ impl WorkerApiServer {
             self.scheduler.clone(),
             self.now_fn.clone(),
             worker_id.clone(),
+            self.locality_map.clone(),
+            worker_cas_endpoint,
             update_stream,
         );
 
@@ -259,6 +272,8 @@ struct WorkerConnection {
     scheduler: Arc<dyn WorkerScheduler>,
     now_fn: Arc<NowFn>,
     worker_id: WorkerId,
+    locality_map: Option<SharedBlobLocalityMap>,
+    cas_endpoint: String,
 }
 
 impl WorkerConnection {
@@ -266,12 +281,16 @@ impl WorkerConnection {
         scheduler: Arc<dyn WorkerScheduler>,
         now_fn: Arc<NowFn>,
         worker_id: WorkerId,
+        locality_map: Option<SharedBlobLocalityMap>,
+        cas_endpoint: String,
         mut connection: impl Stream<Item = Result<UpdateForScheduler, Status>> + Unpin + Send + 'static,
     ) {
         let instance = Self {
             scheduler,
             now_fn,
             worker_id,
+            locality_map,
+            cas_endpoint,
         };
 
         background_spawn!("worker_api", async move {
@@ -307,23 +326,52 @@ impl WorkerConnection {
                     Update::ExecuteComplete(execute_complete) => {
                         instance.execution_complete(execute_complete).await
                     }
+                    Update::BlobsAvailable(notification) => {
+                        instance.handle_blobs_available(notification).await
+                    }
+                    Update::BlobsEvicted(_notification) => {
+                        // Dead code path: evictions now go through
+                        // BlobsAvailableNotification.evicted_digests.
+                        // Kept for wire compatibility with older workers.
+                        Ok(())
+                    }
                 };
                 if let Err(err) = result {
                     tracing::warn!(worker_id=?instance.worker_id, ?err, "Error processing worker message");
                 }
             }
             tracing::debug!(worker_id=?instance.worker_id, "Update for scheduler dropped");
+
+            // Clean up locality map on disconnect.
+            if !instance.cas_endpoint.is_empty() {
+                if let Some(ref locality_map) = instance.locality_map {
+                    locality_map.write().remove_endpoint(&instance.cas_endpoint);
+                    info!(
+                        worker_id=?instance.worker_id,
+                        endpoint=%instance.cas_endpoint,
+                        "Removed worker from blob locality map on disconnect"
+                    );
+                }
+            }
+
             if !had_going_away {
                 drop(instance.scheduler.remove_worker(&instance.worker_id).await);
             }
         });
     }
 
-    async fn inner_keep_alive(&self, _keep_alive_request: KeepAliveRequest) -> Result<(), Error> {
+    async fn inner_keep_alive(&self, keep_alive_request: KeepAliveRequest) -> Result<(), Error> {
         self.scheduler
             .worker_keep_alive_received(&self.worker_id, (self.now_fn)()?.as_secs())
             .await
             .err_tip(|| "Could not process keep_alive from worker in inner_keep_alive()")?;
+        let cpu_load_pct = keep_alive_request.cpu_load_pct;
+        if cpu_load_pct > 0 {
+            debug!(worker_id=?self.worker_id, cpu_load_pct, "KeepAlive received with CPU load");
+            if let Err(err) = self.scheduler.update_worker_load(&self.worker_id, cpu_load_pct).await {
+                warn!(worker_id=?self.worker_id, ?err, cpu_load_pct, "Failed to update worker load");
+            }
+        }
         Ok(())
     }
 
@@ -335,6 +383,51 @@ impl WorkerConnection {
         Ok(())
     }
 
+    fn register_action_result_digests(
+        locality_map: &SharedBlobLocalityMap,
+        endpoint: &str,
+        execute_response: &nativelink_proto::build::bazel::remote::execution::v2::ExecuteResponse,
+    ) {
+        let Some(ref action_result) = execute_response.result else {
+            return;
+        };
+        let now = SystemTime::now();
+        let mut digests = Vec::new();
+        for file in &action_result.output_files {
+            if let Some(ref d) = file.digest {
+                if let Ok(di) = DigestInfo::try_from(d.clone()) {
+                    digests.push((di, now));
+                }
+            }
+        }
+        for dir in &action_result.output_directories {
+            if let Some(ref d) = dir.tree_digest {
+                if let Ok(di) = DigestInfo::try_from(d.clone()) {
+                    digests.push((di, now));
+                }
+            }
+        }
+        if let Some(ref d) = action_result.stdout_digest {
+            if d.size_bytes > 0 {
+                if let Ok(di) = DigestInfo::try_from(d.clone()) {
+                    digests.push((di, now));
+                }
+            }
+        }
+        if let Some(ref d) = action_result.stderr_digest {
+            if d.size_bytes > 0 {
+                if let Ok(di) = DigestInfo::try_from(d.clone()) {
+                    digests.push((di, now));
+                }
+            }
+        }
+        if !digests.is_empty() {
+            locality_map
+                .write()
+                .register_blobs_with_timestamps(endpoint, &digests);
+        }
+    }
+
     async fn inner_execution_response(&self, execute_result: ExecuteResult) -> Result<(), Error> {
         let operation_id = OperationId::from(execute_result.operation_id);
 
@@ -343,6 +436,18 @@ impl WorkerConnection {
             .err_tip(|| "Expected result to exist in ExecuteResult")?
         {
             execute_result::Result::ExecuteResponse(finished_result) => {
+                // Register output digests in the locality map so the server
+                // can proxy blob reads back to the worker immediately, even
+                // before the BlobsAvailableNotification arrives.
+                if let Some(ref locality_map) = self.locality_map {
+                    if !self.cas_endpoint.is_empty() {
+                        Self::register_action_result_digests(
+                            locality_map,
+                            &self.cas_endpoint,
+                            &finished_result,
+                        );
+                    }
+                }
                 let action_stage = finished_result
                     .try_into()
                     .err_tip(|| "Failed to convert ExecuteResponse into an ActionStage")?;
@@ -369,7 +474,176 @@ impl WorkerConnection {
         Ok(())
     }
 
+    async fn handle_blobs_available(
+        &self,
+        notification: nativelink_proto::com::github::trace_machina::nativelink::remote_execution::BlobsAvailableNotification,
+    ) -> Result<(), Error> {
+        let cpu_load_pct = notification.cpu_load_pct;
+        if cpu_load_pct > 0 {
+            debug!(worker_id=?self.worker_id, cpu_load_pct, "BlobsAvailable received with CPU load");
+            if let Err(err) = self.scheduler.update_worker_load(&self.worker_id, cpu_load_pct).await {
+                warn!(worker_id=?self.worker_id, ?err, cpu_load_pct, "Failed to update worker load");
+            }
+        }
+
+        // Update the worker's cached directory digests if any were reported (legacy path).
+        if !notification.cached_directory_digests.is_empty() && !notification.is_full_subtree_snapshot {
+            let cached_dirs: std::collections::HashSet<DigestInfo> = notification
+                .cached_directory_digests
+                .iter()
+                .filter_map(|d| DigestInfo::try_from(d.clone()).ok())
+                .collect();
+            let count = cached_dirs.len();
+            debug!(worker_id=?self.worker_id, count, "BlobsAvailable received with cached directory digests");
+            if let Err(err) = self.scheduler.update_cached_directories(&self.worker_id, cached_dirs).await {
+                warn!(worker_id=?self.worker_id, ?err, count, "Failed to update cached directory digests");
+            }
+        }
+
+        // Handle delta-encoded subtree digest updates.
+        let has_subtree_update = notification.is_full_subtree_snapshot
+            || !notification.added_subtree_digests.is_empty()
+            || !notification.removed_subtree_digests.is_empty();
+        if has_subtree_update {
+            let is_full = notification.is_full_subtree_snapshot;
+            let full_set: Vec<DigestInfo> = if is_full {
+                notification
+                    .cached_directory_digests
+                    .iter()
+                    .filter_map(|d| DigestInfo::try_from(d.clone()).ok())
+                    .collect()
+            } else {
+                Vec::new()
+            };
+            let added: Vec<DigestInfo> = notification
+                .added_subtree_digests
+                .iter()
+                .filter_map(|d| DigestInfo::try_from(d.clone()).ok())
+                .collect();
+            let removed: Vec<DigestInfo> = notification
+                .removed_subtree_digests
+                .iter()
+                .filter_map(|d| DigestInfo::try_from(d.clone()).ok())
+                .collect();
+            let full_count = full_set.len();
+            let added_count = added.len();
+            let removed_count = removed.len();
+            debug!(
+                worker_id=?self.worker_id,
+                is_full,
+                full_count,
+                added_count,
+                removed_count,
+                "BlobsAvailable received with subtree digest updates"
+            );
+            if let Err(err) = self
+                .scheduler
+                .update_cached_subtrees(
+                    &self.worker_id,
+                    is_full,
+                    full_set,
+                    added,
+                    removed,
+                )
+                .await
+            {
+                warn!(
+                    worker_id=?self.worker_id,
+                    ?err,
+                    is_full,
+                    full_count,
+                    added_count,
+                    removed_count,
+                    "Failed to update cached subtree digests"
+                );
+            }
+        }
+
+        let Some(ref locality_map) = self.locality_map else {
+            return Ok(());
+        };
+        let endpoint = if notification.worker_cas_endpoint.is_empty() {
+            &self.cas_endpoint
+        } else {
+            &notification.worker_cas_endpoint
+        };
+        if endpoint.is_empty() {
+            return Ok(());
+        }
+
+        let is_full_snapshot = notification.is_full_snapshot;
+
+        // Process evicted digests (incremental updates report evictions here).
+        let evicted: Vec<DigestInfo> = notification
+            .evicted_digests
+            .into_iter()
+            .filter_map(|d| d.try_into().ok())
+            .collect();
+
+        // Collect digests with timestamps from digest_infos (preferred).
+        let mut digests_with_ts: Vec<(DigestInfo, SystemTime)> = notification
+            .digest_infos
+            .into_iter()
+            .filter_map(|info| {
+                let digest = info.digest.and_then(|d| DigestInfo::try_from(d).ok())?;
+                let ts = if info.last_access_timestamp > 0 {
+                    UNIX_EPOCH + Duration::from_secs(info.last_access_timestamp as u64)
+                } else {
+                    SystemTime::now()
+                };
+                Some((digest, ts))
+            })
+            .collect();
+        // Also include plain digests for backward compatibility / simple notifications.
+        let now = SystemTime::now();
+        digests_with_ts.extend(
+            notification
+                .digests
+                .into_iter()
+                .filter_map(|d| DigestInfo::try_from(d).ok())
+                .map(|d| (d, now)),
+        );
+
+        // Acquire the write lock once for all mutations to avoid repeated
+        // lock acquisition and eliminate inconsistency windows.
+        let mut map = locality_map.write();
+
+        if is_full_snapshot {
+            // Remove all existing entries for this endpoint first.
+            map.remove_endpoint(endpoint);
+        }
+
+        if !evicted.is_empty() {
+            debug!(
+                worker_id=?self.worker_id,
+                endpoint,
+                count=evicted.len(),
+                "Processing evicted digests from BlobsAvailable"
+            );
+            map.evict_blobs(endpoint, &evicted);
+        }
+
+        if !digests_with_ts.is_empty() {
+            debug!(
+                worker_id=?self.worker_id,
+                endpoint,
+                count=digests_with_ts.len(),
+                is_full_snapshot,
+                "Registering blobs available from worker"
+            );
+            map.register_blobs_with_timestamps(endpoint, &digests_with_ts);
+        }
+        Ok(())
+    }
+
     async fn execution_complete(&self, execute_complete: ExecuteComplete) -> Result<(), Error> {
+        let cpu_load_pct = execute_complete.cpu_load_pct;
+        if cpu_load_pct > 0 {
+            debug!(worker_id=?self.worker_id, cpu_load_pct, "ExecuteComplete received with CPU load");
+            if let Err(err) = self.scheduler.update_worker_load(&self.worker_id, cpu_load_pct).await {
+                warn!(worker_id=?self.worker_id, ?err, cpu_load_pct, "Failed to update worker load");
+            }
+        }
         let operation_id = OperationId::from(execute_complete.operation_id);
         self.scheduler
             .update_action(
diff --git a/nativelink-service/tests/bep_server_test.rs b/nativelink-service/tests/bep_server_test.rs
index d6461875d..ee8baf51c 100644
--- a/nativelink-service/tests/bep_server_test.rs
+++ b/nativelink-service/tests/bep_server_test.rs
@@ -44,7 +44,8 @@ use nativelink_util::store_trait::{Store, StoreKey, StoreLike};
 use pretty_assertions::assert_eq;
 use prost::Message;
 use prost_types::Timestamp;
-use tonic::codec::{Codec, ProstCodec};
+use tonic::codec::Codec;
+use tonic_prost::ProstCodec;
 use tonic::{Request, Streaming};
 
 const BEP_STORE_NAME: &str = "main_bep";
diff --git a/nativelink-service/tests/bytestream_server_test.rs b/nativelink-service/tests/bytestream_server_test.rs
index 7089e1613..2c35d50a4 100644
--- a/nativelink-service/tests/bytestream_server_test.rs
+++ b/nativelink-service/tests/bytestream_server_test.rs
@@ -25,7 +25,7 @@ use hyper_util::server::conn::auto;
 use hyper_util::service::TowerToHyperService;
 use nativelink_config::cas_server::{ByteStreamConfig, HttpListener, WithInstanceName};
 use nativelink_config::stores::{MemorySpec, StoreSpec};
-use nativelink_error::{Code, Error, ResultExt, make_err};
+use nativelink_error::{Code, Error, ResultExt};
 use nativelink_macro::nativelink_test;
 use nativelink_proto::google::bytestream::byte_stream_client::ByteStreamClient;
 use nativelink_proto::google::bytestream::byte_stream_server::ByteStream;
@@ -47,7 +47,8 @@ use tokio::sync::mpsc::unbounded_channel;
 use tokio::task::yield_now;
 use tokio_stream::StreamExt;
 use tokio_stream::wrappers::UnboundedReceiverStream;
-use tonic::codec::{Codec, CompressionEncoding, ProstCodec};
+use tonic::codec::{Codec, CompressionEncoding};
+use tonic_prost::ProstCodec;
 use tonic::transport::{Channel, Endpoint};
 use tonic::{Request, Response, Streaming};
 use tower::service_fn;
@@ -855,13 +856,12 @@ pub async fn read_with_not_found_does_not_deadlock() -> Result<(), Error> {
         let result_fut = read_stream.next();
 
         let result = result_fut.await.err_tip(|| "Expected result to be ready")?;
-        let expected_err_str = concat!(
-            "status: NotFound, message: \"Key Digest(DigestInfo(\\\"0123456789abcdef000000000000000000000000000000000123456789abcdef-55\\\")) not found\", details: [], metadata: MetadataMap { headers: {} }",
-        );
-        assert_eq!(
-            Error::from(result.unwrap_err()),
-            make_err!(Code::NotFound, "{expected_err_str}"),
-            "Expected error data to match"
+        let err = Error::from(result.unwrap_err());
+        assert_eq!(err.code, Code::NotFound, "Expected NotFound error code");
+        let msg = err.messages.join(" ");
+        assert!(
+            msg.contains("0123456789abcdef000000000000000000000000000000000123456789abcdef-55"),
+            "Expected error message to contain the digest, got: {msg}"
         );
     }
     Ok(())
@@ -991,7 +991,7 @@ pub async fn max_decoding_message_size_test() -> Result<(), Box<dyn core::error:
         // Test to ensure if we send exactly our max message size, it will succeed.
         let data = Bytes::from(vec![0u8; MAX_MESSAGE_SIZE - WRITE_REQUEST_MSG_WRAPPER_SIZE]);
         let write_request = WriteRequest {
-            resource_name: make_resource_name(MAX_MESSAGE_SIZE),
+            resource_name: make_resource_name(MAX_MESSAGE_SIZE - WRITE_REQUEST_MSG_WRAPPER_SIZE),
             write_offset: 0,
             finish_write: true,
             data,
@@ -1012,7 +1012,7 @@ pub async fn max_decoding_message_size_test() -> Result<(), Box<dyn core::error:
             MAX_MESSAGE_SIZE - WRITE_REQUEST_MSG_WRAPPER_SIZE + 1
         ]);
         let write_request = WriteRequest {
-            resource_name: make_resource_name(MAX_MESSAGE_SIZE),
+            resource_name: make_resource_name(MAX_MESSAGE_SIZE - WRITE_REQUEST_MSG_WRAPPER_SIZE + 1),
             write_offset: 0,
             finish_write: true,
             data,
diff --git a/nativelink-service/tests/worker_api_server_test.rs b/nativelink-service/tests/worker_api_server_test.rs
index ef31b945a..a98339f35 100644
--- a/nativelink-service/tests/worker_api_server_test.rs
+++ b/nativelink-service/tests/worker_api_server_test.rs
@@ -31,7 +31,8 @@ use nativelink_proto::build::bazel::remote::execution::v2::{
 };
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::update_for_scheduler::Update;
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
-    execute_result, update_for_worker, ConnectWorkerRequest, ExecuteResult, KeepAliveRequest, UpdateForScheduler
+    execute_result, update_for_worker, BlobsAvailableNotification, BlobsEvictedNotification,
+    ConnectWorkerRequest, ExecuteResult, KeepAliveRequest, UpdateForScheduler,
 };
 use nativelink_proto::google::rpc::Status as ProtoStatus;
 use nativelink_scheduler::api_worker_scheduler::ApiWorkerScheduler;
@@ -42,6 +43,7 @@ use nativelink_service::worker_api_server::{ConnectWorkerStream, NowFn, WorkerAp
 use nativelink_util::action_messages::{
     ActionInfo, ActionUniqueKey, ActionUniqueQualifier, OperationId, WorkerId,
 };
+use nativelink_util::blob_locality_map::{SharedBlobLocalityMap, new_shared_blob_locality_map};
 use nativelink_util::common::DigestInfo;
 use nativelink_util::digest_hasher::DigestHasherFunc;
 use nativelink_util::operation_state_manager::{UpdateOperationType, WorkerStateManager};
@@ -178,6 +180,7 @@ async fn setup_api_server_with_task_limit(
         &schedulers,
         now_fn,
         [1u8; 6],
+        None,
     )
     .err_tip(|| "Error creating WorkerApiServer")?;
 
@@ -268,8 +271,25 @@ pub async fn server_times_out_workers_test() -> Result<(), Box<dyn core::error::
         assert!(worker_exists, "Expected worker to exist in worker map");
     }
     {
-        // Now add 1 second and our worker should have been evicted due to timeout.
+        // At exactly 1x timeout the worker is quarantined (stops receiving
+        // new work) but still exists in the map.
         now_timestamp += 1;
+        test_context
+            .scheduler
+            .remove_timedout_workers(now_timestamp)
+            .await?;
+        let worker_exists = test_context
+            .scheduler
+            .contains_worker_for_test(&test_context.worker_id)
+            .await;
+        assert!(
+            worker_exists,
+            "Expected worker to still exist (quarantined, not yet evicted)"
+        );
+    }
+    {
+        // At 2x timeout the worker is fully evicted from the pool.
+        now_timestamp += BASE_WORKER_TIMEOUT_S;
         test_context
             .scheduler
             .remove_timedout_workers(now_timestamp)
@@ -317,7 +337,7 @@ pub async fn server_does_not_timeout_if_keep_alive_test() -> Result<(), Box<dyn
         // Now send keep alive.
         test_context
             .worker_stream
-            .send(Update::KeepAliveRequest(KeepAliveRequest {}))
+            .send(Update::KeepAliveRequest(KeepAliveRequest { cpu_load_pct: 0 }))
             .await
             .map_err(|e| make_err!(tonic::Code::Internal, "Error sending keep alive {e}"))?;
         // Wait for a moment to allow it to be processed.
@@ -629,3 +649,544 @@ pub async fn workers_only_allow_max_tasks() -> Result<(), Box<dyn core::error::E
 
     Ok(())
 }
+
+// --- Blob locality map tests ---
+
+struct LocalityTestContext {
+    _scheduler: Arc<ApiWorkerScheduler>,
+    _worker_api_server: WorkerApiServer,
+    connection_worker_stream: ConnectWorkerStream,
+    _worker_id: WorkerId,
+    worker_stream: mpsc::Sender<Update>,
+    locality_map: SharedBlobLocalityMap,
+}
+
+/// Sets up a WorkerApiServer with a real SharedBlobLocalityMap and a worker
+/// that has a CAS endpoint set. Returns the context needed to send updates
+/// and verify the locality map.
+async fn setup_api_server_with_locality(
+    cas_endpoint: &str,
+) -> Result<LocalityTestContext, Error> {
+    const SCHEDULER_NAME: &str = "DUMMY_SCHEDULE_NAME";
+    const UUID_SIZE: usize = 36;
+
+    let platform_property_manager = Arc::new(PlatformPropertyManager::new(HashMap::new()));
+    let tasks_or_worker_change_notify = Arc::new(Notify::new());
+    let state_manager = Arc::new(MockWorkerStateManager::new());
+    let worker_registry = Arc::new(WorkerRegistry::new());
+    let scheduler = ApiWorkerScheduler::new(
+        state_manager.clone(),
+        platform_property_manager,
+        WorkerAllocationStrategy::default(),
+        tasks_or_worker_change_notify,
+        BASE_WORKER_TIMEOUT_S,
+        worker_registry,
+    );
+
+    let locality_map = new_shared_blob_locality_map();
+
+    let mut schedulers: HashMap<String, Arc<dyn WorkerScheduler>> = HashMap::new();
+    schedulers.insert(SCHEDULER_NAME.to_string(), scheduler.clone());
+    let worker_api_server = WorkerApiServer::new_with_now_fn(
+        &WorkerApiConfig {
+            scheduler: SCHEDULER_NAME.to_string(),
+        },
+        &schedulers,
+        Box::new(static_now_fn),
+        [1u8; 6],
+        Some(locality_map.clone()),
+    )
+    .err_tip(|| "Error creating WorkerApiServer")?;
+
+    let connect_worker_request = ConnectWorkerRequest {
+        cas_endpoint: cas_endpoint.to_string(),
+        ..Default::default()
+    };
+    let (tx, rx) = mpsc::channel(1);
+    tx.send(Update::ConnectWorkerRequest(connect_worker_request))
+        .await
+        .unwrap();
+    let update_stream = Box::pin(futures::stream::unfold(rx, |mut rx| async move {
+        rx.recv().await.map(|update| {
+            let update = Ok(UpdateForScheduler {
+                update: Some(update),
+            });
+            (update, rx)
+        })
+    }));
+    let mut connection_worker_stream = worker_api_server
+        .inner_connect_worker_for_testing(update_stream)
+        .await?
+        .into_inner();
+
+    let maybe_first_message = connection_worker_stream.next().await;
+    assert!(
+        maybe_first_message.is_some(),
+        "Expected first message from stream"
+    );
+    let first_update = maybe_first_message
+        .unwrap()
+        .err_tip(|| "Expected success result")?
+        .update
+        .err_tip(|| "Expected update field to be populated")?;
+    let worker_id = match first_update {
+        update_for_worker::Update::ConnectionResult(connection_result) => {
+            connection_result.worker_id
+        }
+        other => unreachable!("Expected ConnectionResult, got {:?}", other),
+    };
+
+    assert_eq!(
+        worker_id.len(),
+        UUID_SIZE,
+        "Worker ID should be 36 characters"
+    );
+
+    Ok(LocalityTestContext {
+        _scheduler: scheduler,
+        _worker_api_server: worker_api_server,
+        connection_worker_stream,
+        _worker_id: worker_id.into(),
+        worker_stream: tx,
+        locality_map,
+    })
+}
+
+#[nativelink_test]
+pub async fn handle_blobs_available_populates_locality_map_test()
+-> Result<(), Box<dyn core::error::Error>> {
+    let cas_endpoint = "grpc://192.168.1.10:50081";
+    let test_context = setup_api_server_with_locality(cas_endpoint).await?;
+
+    let d1 = DigestInfo::new([1u8; 32], 100);
+    let d2 = DigestInfo::new([2u8; 32], 200);
+
+    // Send a BlobsAvailable notification with two digests.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(), // Empty means use the worker's registered endpoint.
+            digests: vec![d1.into(), d2.into()],
+            is_full_snapshot: false,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending blobs available: {e}"))?;
+
+    // Allow background task to process the update.
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Verify the locality map has both digests registered to the endpoint.
+    let map = test_context.locality_map.read();
+    let workers_d1 = map.lookup_workers(&d1);
+    assert_eq!(
+        workers_d1.len(),
+        1,
+        "Expected d1 to have 1 endpoint, got {workers_d1:?}"
+    );
+    assert_eq!(&*workers_d1[0], cas_endpoint);
+
+    let workers_d2 = map.lookup_workers(&d2);
+    assert_eq!(
+        workers_d2.len(),
+        1,
+        "Expected d2 to have 1 endpoint, got {workers_d2:?}"
+    );
+    assert_eq!(&*workers_d2[0], cas_endpoint);
+
+    assert_eq!(map.digest_count(), 2);
+    assert_eq!(map.endpoint_count(), 1);
+
+    Ok(())
+}
+
+#[nativelink_test]
+pub async fn full_snapshot_replaces_endpoint_view_test()
+-> Result<(), Box<dyn core::error::Error>> {
+    let cas_endpoint = "grpc://192.168.1.10:50081";
+    let test_context = setup_api_server_with_locality(cas_endpoint).await?;
+
+    let d1 = DigestInfo::new([1u8; 32], 100);
+    let d2 = DigestInfo::new([2u8; 32], 200);
+    let d3 = DigestInfo::new([3u8; 32], 300);
+
+    // First, register d1 and d2 with an incremental update.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![d1.into(), d2.into()],
+            is_full_snapshot: false,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Confirm d1 and d2 are present.
+    {
+        let map = test_context.locality_map.read();
+        assert_eq!(map.digest_count(), 2);
+        assert!(!map.lookup_workers(&d1).is_empty());
+        assert!(!map.lookup_workers(&d2).is_empty());
+    }
+
+    // Now send a full snapshot containing only d3.
+    // This should clear d1 and d2 and only have d3.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![d3.into()],
+            is_full_snapshot: true,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Verify: d1 and d2 should be gone, only d3 remains.
+    let map = test_context.locality_map.read();
+    assert!(
+        map.lookup_workers(&d1).is_empty(),
+        "d1 should have been cleared by full snapshot"
+    );
+    assert!(
+        map.lookup_workers(&d2).is_empty(),
+        "d2 should have been cleared by full snapshot"
+    );
+    let workers_d3 = map.lookup_workers(&d3);
+    assert_eq!(
+        workers_d3.len(),
+        1,
+        "d3 should be registered after full snapshot"
+    );
+    assert_eq!(&*workers_d3[0], cas_endpoint);
+    assert_eq!(map.digest_count(), 1);
+
+    Ok(())
+}
+
+#[nativelink_test]
+pub async fn incremental_update_preserves_existing_blobs_test()
+-> Result<(), Box<dyn core::error::Error>> {
+    let cas_endpoint = "grpc://192.168.1.10:50081";
+    let test_context = setup_api_server_with_locality(cas_endpoint).await?;
+
+    let d1 = DigestInfo::new([1u8; 32], 100);
+    let d2 = DigestInfo::new([2u8; 32], 200);
+    let d3 = DigestInfo::new([3u8; 32], 300);
+
+    // First update: register d1 and d2.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![d1.into(), d2.into()],
+            is_full_snapshot: false,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Second update (incremental): register d3 only.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![d3.into()],
+            is_full_snapshot: false,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // All three digests should be present.
+    let map = test_context.locality_map.read();
+    assert_eq!(
+        map.digest_count(),
+        3,
+        "All three digests should be present after incremental update"
+    );
+    assert!(!map.lookup_workers(&d1).is_empty(), "d1 should still exist");
+    assert!(!map.lookup_workers(&d2).is_empty(), "d2 should still exist");
+    assert!(!map.lookup_workers(&d3).is_empty(), "d3 should be added");
+
+    Ok(())
+}
+
+#[nativelink_test]
+pub async fn eviction_removes_digests_from_locality_map_test()
+-> Result<(), Box<dyn core::error::Error>> {
+    let cas_endpoint = "grpc://192.168.1.10:50081";
+    let test_context = setup_api_server_with_locality(cas_endpoint).await?;
+
+    let d1 = DigestInfo::new([1u8; 32], 100);
+    let d2 = DigestInfo::new([2u8; 32], 200);
+    let d3 = DigestInfo::new([3u8; 32], 300);
+
+    // Register d1, d2, d3.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![d1.into(), d2.into(), d3.into()],
+            is_full_snapshot: false,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Now send an incremental update with evicted_digests containing d1 and d2.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![],
+            is_full_snapshot: false,
+            evicted_digests: vec![d1.into(), d2.into()],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // d1 and d2 should be evicted, d3 remains.
+    let map = test_context.locality_map.read();
+    assert!(
+        map.lookup_workers(&d1).is_empty(),
+        "d1 should have been evicted"
+    );
+    assert!(
+        map.lookup_workers(&d2).is_empty(),
+        "d2 should have been evicted"
+    );
+    assert_eq!(
+        map.lookup_workers(&d3).len(),
+        1,
+        "d3 should still be present"
+    );
+    assert_eq!(map.digest_count(), 1);
+
+    Ok(())
+}
+
+#[nativelink_test]
+pub async fn worker_disconnect_cleans_up_locality_map_test()
+-> Result<(), Box<dyn core::error::Error>> {
+    let cas_endpoint = "grpc://192.168.1.10:50081";
+    let test_context = setup_api_server_with_locality(cas_endpoint).await?;
+
+    let d1 = DigestInfo::new([1u8; 32], 100);
+    let d2 = DigestInfo::new([2u8; 32], 200);
+
+    // Register d1 and d2.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![d1.into(), d2.into()],
+            is_full_snapshot: false,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Confirm blobs are present.
+    {
+        let map = test_context.locality_map.read();
+        assert_eq!(map.digest_count(), 2);
+        assert_eq!(map.endpoint_count(), 1);
+    }
+
+    // Drop the worker stream sender to simulate disconnect.
+    // The background task in WorkerConnection will see the stream end
+    // and call remove_endpoint on the locality map.
+    drop(test_context.worker_stream);
+    drop(test_context.connection_worker_stream);
+
+    // Allow the background cleanup task to run.
+    tokio::time::sleep(Duration::from_millis(100)).await;
+
+    // All entries for this endpoint should be removed.
+    let map = test_context.locality_map.read();
+    assert!(
+        map.lookup_workers(&d1).is_empty(),
+        "d1 should be removed after worker disconnect"
+    );
+    assert!(
+        map.lookup_workers(&d2).is_empty(),
+        "d2 should be removed after worker disconnect"
+    );
+    assert_eq!(
+        map.endpoint_count(),
+        0,
+        "No endpoints should remain after disconnect"
+    );
+    assert_eq!(
+        map.digest_count(),
+        0,
+        "No digests should remain after disconnect"
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+pub async fn blobs_available_with_malformed_digests_test()
+-> Result<(), Box<dyn core::error::Error>> {
+    use nativelink_proto::build::bazel::remote::execution::v2::Digest as ProtoDigest;
+
+    let cas_endpoint = "grpc://192.168.1.10:50081";
+    let test_context = setup_api_server_with_locality(cas_endpoint).await?;
+
+    let d1 = DigestInfo::new([1u8; 32], 100);
+    let d2 = DigestInfo::new([2u8; 32], 200);
+
+    // Build the digests list: 2 valid + 1 malformed (hash too short).
+    let valid1: ProtoDigest = d1.into();
+    let valid2: ProtoDigest = d2.into();
+    let malformed = ProtoDigest {
+        hash: "deadbeef".to_string(), // Only 8 hex chars, not 64.
+        size_bytes: 999,
+        ..Default::default()
+    };
+
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![valid1, malformed, valid2],
+            is_full_snapshot: false,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Only the 2 valid digests should appear in the locality map.
+    let map = test_context.locality_map.read();
+    assert_eq!(
+        map.digest_count(),
+        2,
+        "Expected exactly 2 valid digests in locality map, got {}",
+        map.digest_count()
+    );
+    assert!(
+        !map.lookup_workers(&d1).is_empty(),
+        "Expected d1 to be registered"
+    );
+    assert!(
+        !map.lookup_workers(&d2).is_empty(),
+        "Expected d2 to be registered"
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+pub async fn blobs_evicted_is_noop_for_wire_compat_test()
+-> Result<(), Box<dyn core::error::Error>> {
+    let cas_endpoint = "grpc://192.168.1.10:50081";
+    let test_context = setup_api_server_with_locality(cas_endpoint).await?;
+
+    let d1 = DigestInfo::new([1u8; 32], 100);
+
+    // Register d1.
+    test_context
+        .worker_stream
+        .send(Update::BlobsAvailable(BlobsAvailableNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![d1.into()],
+            is_full_snapshot: false,
+            evicted_digests: vec![],
+            digest_infos: vec![],
+            cpu_load_pct: 0,
+            cached_directory_digests: vec![],
+            added_subtree_digests: vec![],
+            removed_subtree_digests: vec![],
+            is_full_subtree_snapshot: false,
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Send BlobsEvicted -- should be a no-op (handler returns Ok(())).
+    // The old BlobsEvicted RPC is kept for wire compatibility but ignored.
+    test_context
+        .worker_stream
+        .send(Update::BlobsEvicted(BlobsEvictedNotification {
+            worker_cas_endpoint: String::new(),
+            digests: vec![d1.into()],
+        }))
+        .await
+        .map_err(|e| make_err!(tonic::Code::Internal, "Error sending: {e}"))?;
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // d1 should STILL be present because BlobsEvicted is now a no-op.
+    let map = test_context.locality_map.read();
+    assert_eq!(
+        map.lookup_workers(&d1).len(),
+        1,
+        "d1 should still be present -- BlobsEvicted is a no-op for wire compat"
+    );
+
+    Ok(())
+}
diff --git a/nativelink-store/Cargo.toml b/nativelink-store/Cargo.toml
index 5a0a62928..df450c440 100644
--- a/nativelink-store/Cargo.toml
+++ b/nativelink-store/Cargo.toml
@@ -33,7 +33,7 @@ bincode = { version = "2.0.1", default-features = false, features = [
   "alloc",
   "serde",
 ] }
-blake3 = { version = "1.8.0", default-features = false }
+blake3 = { version = "1.8.0", default-features = false, features = ["std", "rayon"] }
 byteorder = { version = "1.5.0", default-features = false }
 bytes = { version = "1.10.1", default-features = false }
 const_format = { version = "0.2.34", default-features = false }
@@ -53,7 +53,7 @@ hyper = { version = "1.6.0", default-features = false }
 hyper-rustls = { version = "0.27.5", default-features = false, features = [
   "http1",
   "http2",
-  "ring",
+  "aws-lc-rs",
   "rustls-native-certs",
   "rustls-platform-verifier",
 ] }
@@ -64,13 +64,13 @@ mongodb = { version = "3", features = [
   "compat-3-0-0",
   "rustls-tls",
 ], default-features = false }
-opentelemetry = { version = "0.29.1", default-features = false }
+opentelemetry = { version = "0.31.0", default-features = false }
 parking_lot = { version = "0.12.3", features = [
   "arc_lock",
   "send_guard",
 ], default-features = false }
 patricia_tree = { version = "0.9.0", default-features = false }
-prost = { version = "0.13.5", default-features = false }
+prost = { version = "0.14.3", default-features = false }
 rand = { version = "0.9.0", default-features = false, features = [
   "thread_rng",
 ] }
@@ -83,13 +83,13 @@ redis = { version = "1.0.0", default-features = false, features = [
   "tokio-comp",
 ] }
 regex = { version = "1.11.1", default-features = false }
-reqwest = { version = "0.12", default-features = false }
-reqwest-middleware = { version = "0.4.2", default-features = false }
+reqwest = { version = "0.13.2", default-features = false }
+reqwest-middleware = { version = "0.5.1", default-features = false }
 rustls = { version = "0.23.27", default-features = false, features = [] }
 rustls-pki-types = { version = "1.13.1", default-features = false }
 serde = { version = "1.0.219", default-features = false }
 serde_json = { version = "1.0.140", default-features = false }
-sha2 = { version = "0.10.8", default-features = false }
+sha2 = { version = "0.10.8", default-features = false, features = ["asm"] }
 tokio = { version = "1.44.1", features = [
   "fs",
   "io-util",
@@ -100,8 +100,8 @@ tokio-stream = { version = "0.1.17", features = [
   "fs",
 ], default-features = false }
 tokio-util = { version = "0.7.14", default-features = false }
-tonic = { version = "0.13.0", features = [
-  "tls-ring",
+tonic = { version = "0.14.5", features = [
+  "tls-aws-lc",
   "transport",
 ], default-features = false }
 tracing = { version = "0.1.41", default-features = false }
diff --git a/nativelink-store/src/ac_utils.rs b/nativelink-store/src/ac_utils.rs
index 7e24270cb..1a72ca134 100644
--- a/nativelink-store/src/ac_utils.rs
+++ b/nativelink-store/src/ac_utils.rs
@@ -24,8 +24,10 @@ use futures::TryFutureExt;
 use nativelink_error::{Code, Error, ResultExt};
 use nativelink_util::common::DigestInfo;
 use nativelink_util::digest_hasher::DigestHasher;
+use nativelink_util::log_utils::throughput_mbps;
 use nativelink_util::store_trait::{StoreKey, StoreLike};
 use prost::Message;
+use tracing::debug;
 
 // NOTE(aaronmondal) From some local testing it looks like action cache items are rarely greater than
 // 1.2k. Giving a bit more just in case to reduce allocs.
@@ -104,15 +106,25 @@ pub async fn serialize_and_upload_message<'a, T: Message>(
     let mut buffer = BytesMut::with_capacity(message.encoded_len());
     let digest = message_to_digest(message, &mut buffer, hasher)
         .err_tip(|| "In serialize_and_upload_message")?;
+    let size_bytes = buffer.len() as u64;
     // Note: For unknown reasons we appear to be hitting:
     // https://github.com/rust-lang/rust/issues/92096
     // or a smiliar issue if we try to use the non-store driver function, so we
     // are using the store driver function here.
+    let start = std::time::Instant::now();
     cas_store
         .as_store_driver_pin()
         .update_oneshot(digest.into(), buffer.freeze())
         .await
         .err_tip(|| "In serialize_and_upload_message")?;
+    let elapsed = start.elapsed();
+    debug!(
+        ?digest,
+        size_bytes,
+        elapsed_ms = elapsed.as_millis() as u64,
+        throughput_mbps = format!("{:.1}", throughput_mbps(size_bytes, elapsed)),
+        "serialize_and_upload_message: CAS write completed",
+    );
     Ok(digest)
 }
 
diff --git a/nativelink-store/src/callback_utils.rs b/nativelink-store/src/callback_utils.rs
index a18f20c52..4cc3ed405 100644
--- a/nativelink-store/src/callback_utils.rs
+++ b/nativelink-store/src/callback_utils.rs
@@ -17,22 +17,21 @@ use core::pin::Pin;
 use std::sync::Arc;
 
 use nativelink_util::evicting_map;
-use nativelink_util::store_trait::{RemoveItemCallback, StoreKey};
+use nativelink_util::store_trait::{ItemCallback, StoreKey};
 
-// Generic struct to hold a RemoveItemCallback ref for the purposes
-// of a RemoveStateCallback call
+// Generic struct to hold an ItemCallback ref for the purposes of an item callback call
 #[derive(Debug)]
-pub struct RemoveItemCallbackHolder {
-    callback: Arc<dyn RemoveItemCallback>,
+pub struct ItemCallbackHolder {
+    callback: Arc<dyn ItemCallback>,
 }
 
-impl RemoveItemCallbackHolder {
-    pub fn new(callback: Arc<dyn RemoveItemCallback>) -> Self {
+impl ItemCallbackHolder {
+    pub fn new(callback: Arc<dyn ItemCallback>) -> Self {
         Self { callback }
     }
 }
 
-impl<'a, Q> evicting_map::RemoveItemCallback<Q> for RemoveItemCallbackHolder
+impl<'a, Q> evicting_map::ItemCallback<Q> for ItemCallbackHolder
 where
     Q: Borrow<StoreKey<'a>>,
 {
@@ -42,4 +41,9 @@ where
         let store_key = store_key.borrow().into_owned();
         Box::pin(async move { callback.callback(store_key).await })
     }
+
+    fn on_insert(&self, store_key: &Q, size: u64) {
+        let store_key: &StoreKey<'_> = Borrow::<StoreKey<'_>>::borrow(store_key);
+        self.callback.on_insert(store_key.borrow().into_owned(), size);
+    }
 }
diff --git a/nativelink-store/src/completeness_checking_store.rs b/nativelink-store/src/completeness_checking_store.rs
index bbdbde8d9..6eb90f548 100644
--- a/nativelink-store/src/completeness_checking_store.rs
+++ b/nativelink-store/src/completeness_checking_store.rs
@@ -29,7 +29,7 @@ use nativelink_util::common::DigestInfo;
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::metrics_utils::CounterWithTime;
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 use parking_lot::Mutex;
 use tokio::sync::Notify;
@@ -390,12 +390,12 @@ impl StoreDriver for CompletenessCheckingStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.ac_store.register_remove_callback(callback.clone())?;
-        self.cas_store.register_remove_callback(callback)?;
+        self.ac_store.register_item_callback(callback.clone())?;
+        self.cas_store.register_item_callback(callback)?;
         Ok(())
     }
 }
diff --git a/nativelink-store/src/compression_store.rs b/nativelink-store/src/compression_store.rs
index 345e06703..71655170e 100644
--- a/nativelink-store/src/compression_store.rs
+++ b/nativelink-store/src/compression_store.rs
@@ -31,7 +31,7 @@ use nativelink_util::buf_channel::{
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::spawn;
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 use serde::{Deserialize, Serialize};
 
@@ -44,7 +44,7 @@ pub const CURRENT_STREAM_FORMAT_VERSION: u8 = 1;
 // Default block size that will be used to slice stream into.
 pub const DEFAULT_BLOCK_SIZE: u32 = 64 * 1024;
 
-const U32_SZ: u64 = size_of::<u8>() as u64;
+const U32_SZ: u64 = size_of::<u32>() as u64;
 
 // We use a custom frame format here because I wanted the ability in the future to:
 // * Read a random part of the data without needing to parse entire file.
@@ -630,14 +630,16 @@ impl StoreDriver for CompressionStore {
         };
 
         let (read_result, get_part_fut_result) = tokio::join!(read_fut, get_part_fut);
-        if let Err(mut e) = read_result {
-            // We may need to propagate the error from reading the data through first.
-            if let Err(err) = get_part_fut_result {
-                e = err.merge(e);
-            }
-            return Err(e);
+        // Propagate errors from both futures. Previously, if read_fut
+        // succeeded but get_part_fut failed (e.g., inner store returned
+        // NotFound), the error was silently swallowed — masking real
+        // data-loss errors from the caller.
+        match (read_result, get_part_fut_result) {
+            (Ok(()), Ok(())) => Ok(()),
+            (Err(e), Ok(())) => Err(e),
+            (Ok(()), Err(e)) => Err(e),
+            (Err(read_err), Err(get_err)) => Err(get_err.merge(read_err)),
         }
-        Ok(())
     }
 
     fn inner_store(&self, _digest: Option<StoreKey>) -> &dyn StoreDriver {
@@ -652,11 +654,11 @@ impl StoreDriver for CompressionStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.inner_store.register_remove_callback(callback)
+        self.inner_store.register_item_callback(callback)
     }
 }
 
diff --git a/nativelink-store/src/dedup_store.rs b/nativelink-store/src/dedup_store.rs
index 252411a45..c10edd893 100644
--- a/nativelink-store/src/dedup_store.rs
+++ b/nativelink-store/src/dedup_store.rs
@@ -27,7 +27,7 @@ use nativelink_util::common::DigestInfo;
 use nativelink_util::fastcdc::FastCDC;
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 use serde::{Deserialize, Serialize};
 use tokio_util::codec::FramedRead;
@@ -209,16 +209,13 @@ impl StoreDriver for DedupStore {
             .map_ok(|frame| async move {
                 let hash = blake3::hash(&frame[..]).into();
                 let index_entry = DigestInfo::new(hash, frame.len() as u64);
-                if self
-                    .content_store
-                    .has(index_entry)
-                    .await
-                    .err_tip(|| "Failed to call .has() in DedupStore::update()")?
-                    .is_some()
-                {
-                    // If our store has this digest, we don't need to upload it.
-                    return Result::<_, Error>::Ok(index_entry);
-                }
+                // Always upload the chunk unconditionally. A previous has()
+                // check here skipped the upload when the chunk appeared to
+                // exist, but the chunk could be evicted between that check
+                // and the index commit — leaving the index pointing to a
+                // missing chunk and causing "Lost inputs" errors.
+                // Content-addressed upload is idempotent, so re-uploading
+                // an existing chunk is safe and cheap.
                 self.content_store
                     .update_oneshot(index_entry, frame)
                     .await
@@ -379,13 +376,13 @@ impl StoreDriver for DedupStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         self.index_store
-            .register_remove_callback(callback.clone())?;
-        self.content_store.register_remove_callback(callback)?;
+            .register_item_callback(callback.clone())?;
+        self.content_store.register_item_callback(callback)?;
         Ok(())
     }
 }
diff --git a/nativelink-store/src/existence_cache_store.rs b/nativelink-store/src/existence_cache_store.rs
index a59d48e70..fb9370b50 100644
--- a/nativelink-store/src/existence_cache_store.rs
+++ b/nativelink-store/src/existence_cache_store.rs
@@ -29,7 +29,7 @@ use nativelink_util::evicting_map::{EvictingMap, LenEntry};
 use nativelink_util::health_utils::{HealthStatus, HealthStatusIndicator};
 use nativelink_util::instant_wrapper::InstantWrapper;
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 use parking_lot::Mutex;
 use tracing::{debug, info, trace};
@@ -59,7 +59,7 @@ pub struct ExistenceCacheStore<I: InstantWrapper> {
     // as if it immediately expires them, we should only apply the remove callbacks
     // afterwards. If this is None, we're not pausing; if it's Some it's the location to
     // store them in temporarily
-    pause_remove_callbacks: Mutex<Option<Vec<StoreKey<'static>>>>,
+    pause_item_callbacks: Mutex<Option<Vec<StoreKey<'static>>>>,
 }
 
 impl ExistenceCacheStore<SystemTime> {
@@ -68,7 +68,7 @@ impl ExistenceCacheStore<SystemTime> {
     }
 }
 
-impl<I: InstantWrapper> RemoveItemCallback for ExistenceCacheStore<I> {
+impl<I: InstantWrapper> ItemCallback for ExistenceCacheStore<I> {
     fn callback<'a>(
         &'a self,
         store_key: StoreKey<'a>,
@@ -89,14 +89,14 @@ struct ExistenceCacheCallback<I: InstantWrapper> {
     cache: Weak<ExistenceCacheStore<I>>,
 }
 
-impl<I: InstantWrapper> RemoveItemCallback for ExistenceCacheCallback<I> {
+impl<I: InstantWrapper> ItemCallback for ExistenceCacheCallback<I> {
     fn callback<'a>(
         &'a self,
         store_key: StoreKey<'a>,
     ) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>> {
         let cache = self.cache.upgrade();
         if let Some(local_cache) = cache {
-            if let Some(callbacks) = local_cache.pause_remove_callbacks.lock().as_mut() {
+            if let Some(callbacks) = local_cache.pause_item_callbacks.lock().as_mut() {
                 callbacks.push(store_key.into_owned());
             } else {
                 let store_key = store_key.into_owned();
@@ -109,6 +109,7 @@ impl<I: InstantWrapper> RemoveItemCallback for ExistenceCacheCallback<I> {
         }
         Box::pin(async {})
     }
+
 }
 
 impl<I: InstantWrapper> ExistenceCacheStore<I> {
@@ -122,13 +123,13 @@ impl<I: InstantWrapper> ExistenceCacheStore<I> {
         let existence_cache_store = Arc::new(Self {
             inner_store,
             existence_cache: EvictingMap::new(eviction_policy, anchor_time),
-            pause_remove_callbacks: Mutex::new(None),
+            pause_item_callbacks: Mutex::new(None),
         });
         let other_ref = Arc::downgrade(&existence_cache_store);
         existence_cache_store
             .inner_store
-            .register_remove_callback(Arc::new(ExistenceCacheCallback { cache: other_ref }))
-            .expect("Register remove callback should work");
+            .register_item_callback(Arc::new(ExistenceCacheCallback { cache: other_ref }))
+            .expect("Register item callback should work");
         existence_cache_store
     }
 
@@ -233,21 +234,33 @@ impl<I: InstantWrapper> StoreDriver for ExistenceCacheStore<I> {
         size_info: UploadSizeInfo,
     ) -> Result<(), Error> {
         let digest = key.into_digest();
+        // Check the inner store directly, bypassing the existence cache.
+        // The existence cache may have a stale positive for a blob that was
+        // evicted from the inner store (the async eviction callback may not
+        // have fired yet). Trusting the cache here would skip the upload,
+        // causing Bazel's "Lost inputs no longer available remotely" error.
         let mut exists = [None];
-        self.inner_has_with_results(&[digest], &mut exists)
+        self.inner_store
+            .has_with_results(&[digest.into()], &mut exists)
             .await
             .err_tip(|| "In ExistenceCacheStore::update")?;
         if exists[0].is_some() {
-            // We need to drain the reader to avoid the writer complaining that we dropped
-            // the connection prematurely.
+            // Blob genuinely exists in the inner store — safe to skip.
             reader
                 .drain()
                 .await
                 .err_tip(|| "In ExistenceCacheStore::update")?;
+            // Refresh the existence cache since we verified it exists.
+            let _ = self
+                .existence_cache
+                .insert(digest, ExistenceItem(exists[0].unwrap()))
+                .await;
             return Ok(());
         }
+        // If the existence cache had a stale entry, remove it now.
+        self.existence_cache.remove(&digest).await;
         {
-            let mut locked_callbacks = self.pause_remove_callbacks.lock();
+            let mut locked_callbacks = self.pause_item_callbacks.lock();
             if locked_callbacks.is_none() {
                 locked_callbacks.replace(vec![]);
             }
@@ -256,15 +269,37 @@ impl<I: InstantWrapper> StoreDriver for ExistenceCacheStore<I> {
         let result = self.inner_store.update(digest, reader, size_info).await;
         if result.is_ok() {
             trace!(?digest, "Inserting into existence cache");
-            if let UploadSizeInfo::ExactSize(size) = size_info {
-                let _ = self
-                    .existence_cache
-                    .insert(digest, ExistenceItem(size))
-                    .await;
+            // Cache on both ExactSize and MaxSize — the digest carries the
+            // authoritative size for content-addressed blobs.
+            let size = match size_info {
+                UploadSizeInfo::ExactSize(size) => size,
+                UploadSizeInfo::MaxSize(_) => digest.size_bytes(),
+            };
+            let _ = self
+                .existence_cache
+                .insert(digest, ExistenceItem(size))
+                .await;
+
+            // Diagnostic: verify the blob actually persisted in the inner store.
+            // If this fires, it means the inner store reported success but the
+            // blob is not findable immediately after write.
+            let mut verify = [None];
+            if let Ok(()) = self
+                .inner_store
+                .has_with_results(&[digest.into()], &mut verify)
+                .await
+            {
+                if verify[0].is_none() {
+                    tracing::error!(
+                        ?digest,
+                        "CRITICAL: inner store update() succeeded but has() returns \
+                         None immediately after! Blob was NOT persisted to slow store.",
+                    );
+                }
             }
         }
         {
-            let maybe_keys = self.pause_remove_callbacks.lock().take();
+            let maybe_keys = self.pause_item_callbacks.lock().take();
             if let Some(keys) = maybe_keys {
                 let mut callbacks: FuturesUnordered<_> = keys
                     .into_iter()
@@ -288,11 +323,20 @@ impl<I: InstantWrapper> StoreDriver for ExistenceCacheStore<I> {
             .inner_store
             .get_part(digest, writer, offset, length)
             .await;
-        if result.is_ok() {
-            let _ = self
-                .existence_cache
-                .insert(digest, ExistenceItem(digest.size_bytes()))
-                .await;
+        match &result {
+            Ok(()) => {
+                let _ = self
+                    .existence_cache
+                    .insert(digest, ExistenceItem(digest.size_bytes()))
+                    .await;
+            }
+            Err(err) if err.code == nativelink_error::Code::NotFound => {
+                // Blob was evicted from the inner store — remove the stale
+                // existence cache entry so subsequent has() calls get an
+                // accurate result.
+                self.existence_cache.remove(&digest).await;
+            }
+            Err(_) => {}
         }
         result
     }
@@ -309,11 +353,11 @@ impl<I: InstantWrapper> StoreDriver for ExistenceCacheStore<I> {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.inner_store.register_remove_callback(callback)
+        self.inner_store.register_item_callback(callback)
     }
 }
 
diff --git a/nativelink-store/src/fast_slow_store.rs b/nativelink-store/src/fast_slow_store.rs
index 1a52d7577..da0c55a4a 100644
--- a/nativelink-store/src/fast_slow_store.rs
+++ b/nativelink-store/src/fast_slow_store.rs
@@ -22,17 +22,18 @@ use std::ffi::OsString;
 use std::sync::{Arc, Weak};
 
 use async_trait::async_trait;
+use bytes::Bytes;
 use futures::{FutureExt, join};
 use nativelink_config::stores::{FastSlowSpec, StoreDirection};
 use nativelink_error::{Code, Error, ResultExt, make_err};
 use nativelink_metric::MetricsComponent;
 use nativelink_util::buf_channel::{
-    DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair,
+    DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair_with_size,
 };
 use nativelink_util::fs;
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, StoreOptimizations,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, StoreOptimizations,
     UploadSizeInfo, slow_update_store_with_file,
 };
 use parking_lot::Mutex;
@@ -100,9 +101,11 @@ impl Drop for LoaderGuard<'_> {
             return;
         };
 
+        // Pre-compute the owned key outside the lock to minimize lock hold time.
+        let owned_key = self.key.borrow().into_owned();
         let mut guard = store.populating_digests.lock();
         if let std::collections::hash_map::Entry::Occupied(occupied_entry) =
-            guard.entry(self.key.borrow().into_owned())
+            guard.entry(owned_key)
         {
             if Arc::ptr_eq(occupied_entry.get(), &loader) {
                 drop(loader);
@@ -136,6 +139,14 @@ impl FastSlowStore {
         &self.slow_store
     }
 
+    pub const fn fast_direction(&self) -> StoreDirection {
+        self.fast_direction
+    }
+
+    pub const fn slow_direction(&self) -> StoreDirection {
+        self.slow_direction
+    }
+
     pub fn get_arc(&self) -> Option<Arc<Self>> {
         self.weak_self.upgrade()
     }
@@ -143,10 +154,12 @@ impl FastSlowStore {
     fn get_loader<'a>(&self, key: StoreKey<'a>) -> LoaderGuard<'a> {
         // Get a single loader instance that's used to populate the fast store
         // for this digest.  If another request comes in then it's de-duplicated.
+        // Pre-compute the owned key outside the lock to minimize lock hold time.
+        let owned_key = key.borrow().into_owned();
         let loader = match self
             .populating_digests
             .lock()
-            .entry(key.borrow().into_owned())
+            .entry(owned_key)
         {
             std::collections::hash_map::Entry::Occupied(occupied_entry) => {
                 occupied_entry.get().clone()
@@ -187,6 +200,11 @@ impl FastSlowStore {
                     .await
                     .err_tip(|| "Failed to run has() on slow store")?
                     .ok_or_else(|| {
+                        debug!(
+                            %key,
+                            slow_store = %self.slow_store.inner_store(Some(key.borrow())).get_name(),
+                            "CAS read miss: blob not found in slow store"
+                        );
                         make_err!(
                             Code::NotFound,
                             "Object {} not found in either fast or slow store. \
@@ -201,8 +219,10 @@ impl FastSlowStore {
         let mut bytes_received: u64 = 0;
         let mut counted_hit = false;
 
-        let (mut fast_tx, fast_rx) = make_buf_channel_pair();
-        let (slow_tx, mut slow_rx) = make_buf_channel_pair();
+        // Use 128 slots (~32MiB at 256KiB chunks) for dual-store
+        // read-through to reduce backpressure between fast and slow stores.
+        let (mut fast_tx, fast_rx) = make_buf_channel_pair_with_size(128);
+        let (slow_tx, mut slow_rx) = make_buf_channel_pair_with_size(128);
         let data_stream_fut = async move {
             let mut maybe_writer_pin = maybe_writer.map(Pin::new);
             loop {
@@ -276,20 +296,10 @@ impl FastSlowStore {
         }
     }
 
-    /// Ensure our fast store is populated. This should be kept as a low
-    /// cost function. Since the data itself is shared and not copied it should be fairly
-    /// low cost to just discard the data, but does cost a few mutex locks while
-    /// streaming.
-    pub async fn populate_fast_store(&self, key: StoreKey<'_>) -> Result<(), Error> {
-        let maybe_size_info = self
-            .fast_store
-            .has(key.borrow())
-            .await
-            .err_tip(|| "While querying in populate_fast_store")?;
-        if maybe_size_info.is_some() {
-            return Ok(());
-        }
-
+    /// Internal helper: copy a blob from the slow store into the fast store,
+    /// using the de-duplicating loader. Assumes the caller has already verified
+    /// the blob is not in the fast store (or does not care).
+    async fn copy_slow_to_fast(&self, key: StoreKey<'_>) -> Result<(), Error> {
         // If the fast store is noop or read only or update only then this is an error.
         if self
             .fast_store
@@ -312,6 +322,31 @@ impl FastSlowStore {
             .err_tip(|| "Failed to populate()")
     }
 
+    /// Ensure our fast store is populated. This should be kept as a low
+    /// cost function. Since the data itself is shared and not copied it should be fairly
+    /// low cost to just discard the data, but does cost a few mutex locks while
+    /// streaming.
+    pub async fn populate_fast_store(&self, key: StoreKey<'_>) -> Result<(), Error> {
+        let maybe_size_info = self
+            .fast_store
+            .has(key.borrow())
+            .await
+            .err_tip(|| "While querying in populate_fast_store")?;
+        if maybe_size_info.is_some() {
+            return Ok(());
+        }
+
+        self.copy_slow_to_fast(key).await
+    }
+
+    /// Like [`populate_fast_store`](Self::populate_fast_store) but skips the
+    /// `has()` check on the fast store. Use this when the caller has already
+    /// verified that the blob is missing from the fast store (e.g. via a prior
+    /// batch `has_with_results` call) to avoid a redundant existence check.
+    pub async fn populate_fast_store_unchecked(&self, key: StoreKey<'_>) -> Result<(), Error> {
+        self.copy_slow_to_fast(key).await
+    }
+
     /// Returns the range of bytes that should be sent given a slice bounds
     /// offset so the output range maps the `received_range.start` to 0.
     // TODO(palfrey) This should be put into utils, as this logic is used
@@ -396,8 +431,10 @@ impl StoreDriver for FastSlowStore {
             return self.slow_store.update(key, reader, size_info).await;
         }
 
-        let (mut fast_tx, fast_rx) = make_buf_channel_pair();
-        let (mut slow_tx, slow_rx) = make_buf_channel_pair();
+        // Use 128 slots (~32MiB at 256KiB chunks) for dual-store
+        // update to reduce backpressure between fast and slow stores.
+        let (mut fast_tx, fast_rx) = make_buf_channel_pair_with_size(128);
+        let (mut slow_tx, slow_rx) = make_buf_channel_pair_with_size(128);
 
         let key_debug = format!("{key:?}");
         trace!(
@@ -460,26 +497,45 @@ impl StoreDriver for FastSlowStore {
             }
         };
 
-        let fast_store_fut = self.fast_store.update(key.borrow(), fast_rx, size_info);
-        let slow_store_fut = self.slow_store.update(key.borrow(), slow_rx, size_info);
+        let fast_start = std::time::Instant::now();
+        let fast_store_fut = async {
+            let res = self.fast_store.update(key.borrow(), fast_rx, size_info).await;
+            (res, fast_start.elapsed())
+        };
+        let slow_start = std::time::Instant::now();
+        let slow_store_fut = async {
+            let res = self.slow_store.update(key.borrow(), slow_rx, size_info).await;
+            (res, slow_start.elapsed())
+        };
 
-        let (data_stream_res, fast_res, slow_res) =
+        let (data_stream_res, (fast_res, fast_elapsed), (slow_res, slow_elapsed)) =
             join!(data_stream_fut, fast_store_fut, slow_store_fut);
 
         let total_elapsed = update_start.elapsed();
+        let fast_ms = fast_elapsed.as_millis();
+        let slow_ms = slow_elapsed.as_millis();
+        let slower_leg = if fast_ms >= slow_ms { "fast" } else { "slow" };
         if data_stream_res.is_err() || fast_res.is_err() || slow_res.is_err() {
             warn!(
                 key = %key_debug,
                 elapsed_ms = total_elapsed.as_millis(),
+                fast_ms,
+                slow_ms,
+                slower_leg,
+                total_bytes = bytes_sent,
                 data_stream_ok = data_stream_res.is_ok(),
                 fast_store_ok = fast_res.is_ok(),
                 slow_store_ok = slow_res.is_ok(),
                 "FastSlowStore::update: completed with error(s)",
             );
         } else {
-            trace!(
+            debug!(
                 key = %key_debug,
                 elapsed_ms = total_elapsed.as_millis(),
+                fast_ms,
+                slow_ms,
+                slower_leg,
+                total_bytes = bytes_sent,
                 "FastSlowStore::update: completed successfully",
             );
         }
@@ -487,6 +543,80 @@ impl StoreDriver for FastSlowStore {
         Ok(())
     }
 
+    async fn update_oneshot(
+        self: Pin<&Self>,
+        key: StoreKey<'_>,
+        data: Bytes,
+    ) -> Result<(), Error> {
+        let ignore_slow = self
+            .slow_store
+            .inner_store(Some(key.borrow()))
+            .optimized_for(StoreOptimizations::NoopUpdates)
+            || self.slow_direction == StoreDirection::ReadOnly
+            || self.slow_direction == StoreDirection::Get;
+        let ignore_fast = self
+            .fast_store
+            .inner_store(Some(key.borrow()))
+            .optimized_for(StoreOptimizations::NoopUpdates)
+            || self.fast_direction == StoreDirection::ReadOnly
+            || self.fast_direction == StoreDirection::Get;
+
+        if ignore_slow && ignore_fast {
+            return Ok(());
+        }
+        if ignore_slow {
+            return self.fast_store.update_oneshot(key, data).await;
+        }
+        if ignore_fast {
+            return self.slow_store.update_oneshot(key, data).await;
+        }
+
+        let oneshot_start = std::time::Instant::now();
+        let key_debug = format!("{key:?}");
+        let data_len = data.len();
+        let fast_oneshot_start = std::time::Instant::now();
+        let data_for_slow = data.clone();
+        let fast_fut = async {
+            let res = self.fast_store.update_oneshot(key.borrow(), data).await;
+            (res, fast_oneshot_start.elapsed())
+        };
+        let slow_oneshot_start = std::time::Instant::now();
+        let slow_fut = async {
+            let res = self.slow_store.update_oneshot(key.borrow(), data_for_slow).await;
+            (res, slow_oneshot_start.elapsed())
+        };
+        let ((fast_res, fast_elapsed), (slow_res, slow_elapsed)) = join!(fast_fut, slow_fut);
+        let total_elapsed = oneshot_start.elapsed();
+        let fast_ms = fast_elapsed.as_millis();
+        let slow_ms = slow_elapsed.as_millis();
+        let slower_leg = if fast_ms >= slow_ms { "fast" } else { "slow" };
+        if fast_res.is_err() || slow_res.is_err() {
+            warn!(
+                key = %key_debug,
+                elapsed_ms = total_elapsed.as_millis(),
+                fast_ms,
+                slow_ms,
+                slower_leg,
+                data_len,
+                fast_store_ok = fast_res.is_ok(),
+                slow_store_ok = slow_res.is_ok(),
+                "FastSlowStore::update_oneshot: completed with error(s)",
+            );
+        } else {
+            debug!(
+                key = %key_debug,
+                elapsed_ms = total_elapsed.as_millis(),
+                fast_ms,
+                slow_ms,
+                slower_leg,
+                data_len,
+                "FastSlowStore::update_oneshot: completed",
+            );
+        }
+        fast_res.merge(slow_res)?;
+        Ok(())
+    }
+
     /// `FastSlowStore` has optimizations for dealing with files.
     fn optimized_for(&self, optimization: StoreOptimizations) -> bool {
         optimization == StoreOptimizations::FileUpdates
@@ -520,10 +650,10 @@ impl StoreDriver for FastSlowStore {
             {
                 trace!("FastSlowStore::update_with_whole_file: uploading to slow_store");
                 let slow_start = std::time::Instant::now();
-                slow_update_store_with_file(
+                file = slow_update_store_with_file(
                     self.slow_store.as_store_driver_pin(),
                     key.borrow(),
-                    &mut file,
+                    file,
                     upload_size,
                 )
                 .await
@@ -555,10 +685,10 @@ impl StoreDriver for FastSlowStore {
                 || self.fast_direction == StoreDirection::ReadOnly
                 || self.fast_direction == StoreDirection::Get;
             if !ignore_fast {
-                slow_update_store_with_file(
+                file = slow_update_store_with_file(
                     self.fast_store.as_store_driver_pin(),
                     key.borrow(),
-                    &mut file,
+                    file,
                     upload_size,
                 )
                 .await
@@ -575,7 +705,7 @@ impl StoreDriver for FastSlowStore {
                 .await;
         }
 
-        slow_update_store_with_file(self, key, &mut file, upload_size)
+        let file = slow_update_store_with_file(self, key, file, upload_size)
             .await
             .err_tip(|| "In FastSlowStore::update_with_whole_file")?;
         Ok(Some(file))
@@ -588,19 +718,34 @@ impl StoreDriver for FastSlowStore {
         offset: u64,
         length: Option<u64>,
     ) -> Result<(), Error> {
-        // TODO(palfrey) Investigate if we should maybe ignore errors here instead of
-        // forwarding them up.
         if self.fast_store.has(key.borrow()).await?.is_some() {
-            self.metrics
-                .fast_store_hit_count
-                .fetch_add(1, Ordering::Acquire);
-            self.fast_store
-                .get_part(key, writer.borrow_mut(), offset, length)
-                .await?;
-            self.metrics
-                .fast_store_downloaded_bytes
-                .fetch_add(writer.get_bytes_written(), Ordering::Acquire);
-            return Ok(());
+            // Try the fast store first. If the item was evicted between the
+            // has() check and this get_part() call (TOCTOU race), fall through
+            // to the slow-store path instead of propagating NotFound.
+            match self
+                .fast_store
+                .get_part(key.borrow(), writer.borrow_mut(), offset, length)
+                .await
+            {
+                Ok(()) => {
+                    self.metrics
+                        .fast_store_hit_count
+                        .fetch_add(1, Ordering::Acquire);
+                    self.metrics
+                        .fast_store_downloaded_bytes
+                        .fetch_add(writer.get_bytes_written(), Ordering::Acquire);
+                    return Ok(());
+                }
+                Err(err) if err.code == Code::NotFound && writer.get_bytes_written() == 0 => {
+                    // Item was evicted between has() and get_part().
+                    // Only safe to fall through if no bytes were written yet.
+                    debug!(
+                        ?key,
+                        "Fast store item evicted between has() and get_part(), falling through to slow store"
+                    );
+                }
+                Err(err) => return Err(err),
+            }
         }
 
         // If the fast store is noop or read only or update only then bypass it.
@@ -630,15 +775,36 @@ impl StoreDriver for FastSlowStore {
             })
             .await?;
 
-        // If we didn't stream then re-enter which will stream from the fast
-        // store, or retry the download.  We should not get in a loop here
-        // because OnceCell has the good sense to retry for all callers so in
-        // order to get here the fast store will have been populated.  There's
-        // an outside chance it was evicted, but that's slim.
+        // If we were a waiter (not the streaming thread), read from the
+        // fast store which was just populated. If the blob was evicted
+        // between populate and this read, fall back directly to the slow
+        // store instead of recursing (which could loop indefinitely under
+        // heavy eviction pressure).
         if let Some(writer) = writer.take() {
-            self.get_part(key, writer, offset, length).await
+            let bytes_before = writer.get_bytes_written();
+            match self
+                .fast_store
+                .get_part(key.borrow(), &mut *writer, offset, length)
+                .await
+            {
+                Ok(()) => Ok(()),
+                Err(err)
+                    if err.code == Code::NotFound
+                        && writer.get_bytes_written() == bytes_before =>
+                {
+                    warn!(
+                        ?key,
+                        "Fast store item evicted immediately after population, \
+                         reading directly from slow store"
+                    );
+                    self.slow_store
+                        .get_part(key, &mut *writer, offset, length)
+                        .await
+                }
+                Err(err) => Err(err),
+            }
         } else {
-            // This was the thread that did the streaming already, lucky duck.
+            // This was the thread that did the streaming already.
             Ok(())
         }
     }
@@ -655,12 +821,12 @@ impl StoreDriver for FastSlowStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.fast_store.register_remove_callback(callback.clone())?;
-        self.slow_store.register_remove_callback(callback)?;
+        self.fast_store.register_item_callback(callback.clone())?;
+        self.slow_store.register_item_callback(callback)?;
         Ok(())
     }
 }
diff --git a/nativelink-store/src/filesystem_store.rs b/nativelink-store/src/filesystem_store.rs
index 97f531043..81509569e 100644
--- a/nativelink-store/src/filesystem_store.rs
+++ b/nativelink-store/src/filesystem_store.rs
@@ -22,11 +22,11 @@ use std::time::SystemTime;
 
 use async_lock::RwLock;
 use async_trait::async_trait;
-use bytes::{Bytes, BytesMut};
+use bytes::Bytes;
 use futures::stream::{StreamExt, TryStreamExt};
 use futures::{Future, TryFutureExt};
 use nativelink_config::stores::FilesystemSpec;
-use nativelink_error::{Code, Error, ResultExt, make_err, make_input_err};
+use nativelink_error::{Code, Error, ResultExt, make_err};
 use nativelink_metric::MetricsComponent;
 use nativelink_util::background_spawn;
 use nativelink_util::buf_channel::{
@@ -36,18 +36,20 @@ use nativelink_util::common::{DigestInfo, fs};
 use nativelink_util::evicting_map::{EvictingMap, LenEntry};
 use nativelink_util::health_utils::{HealthRegistryBuilder, HealthStatus, HealthStatusIndicator};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, StoreDriver, StoreKey, StoreKeyBorrow, StoreOptimizations, UploadSizeInfo,
+    ItemCallback, StoreDriver, StoreKey, StoreKeyBorrow, StoreOptimizations, UploadSizeInfo,
 };
-use tokio::io::{AsyncReadExt, AsyncWriteExt, Take};
 use tokio::sync::Semaphore;
 use tokio_stream::wrappers::ReadDirStream;
-use tracing::{debug, error, info, trace, warn};
+use tracing::{debug, error, trace, warn};
 
-use crate::callback_utils::RemoveItemCallbackHolder;
+use crate::callback_utils::ItemCallbackHolder;
 use crate::cas_utils::is_zero_digest;
 
 // Default size to allocate memory of the buffer when reading files.
-const DEFAULT_BUFF_SIZE: usize = 32 * 1024;
+// 256 KiB reduces syscalls by 4x compared to 64 KiB. At 10Gbps, 64 KiB reads
+// cause ~19,500 syscalls/sec/stream; 256 KiB brings this down to ~4,900.
+// Modern NVMe SSDs perform significantly better with larger read sizes.
+const DEFAULT_BUFF_SIZE: usize = 256 * 1024;
 // Default block size of all major filesystems is 4KB
 const DEFAULT_BLOCK_SIZE: u64 = 4 * 1024;
 
@@ -139,7 +141,12 @@ impl Drop for EncodedFilePath {
                 .await
                 .err_tip(|| format!("Failed to remove file {}", file_path.display()));
             if let Err(err) = result {
-                error!(?file_path, ?err, "Failed to delete file",);
+                if err.code == Code::NotFound {
+                    // File already deleted (e.g. race between eviction paths).
+                    debug!(?file_path, "File already deleted, ignoring");
+                } else {
+                    error!(?file_path, ?err, "Failed to delete file");
+                }
             } else {
                 debug!(?file_path, "File deleted",);
             }
@@ -201,8 +208,7 @@ pub trait FileEntry: LenEntry + Send + Sync + Debug + 'static {
     fn read_file_part(
         &self,
         offset: u64,
-        length: u64,
-    ) -> impl Future<Output = Result<Take<fs::FileSlot>, Error>> + Send;
+    ) -> impl Future<Output = Result<fs::FileSlot, Error>> + Send;
 
     /// This function is a safe way to extract the file name of the underlying file. To protect users from
     /// accidentally creating undefined behavior we encourage users to do the logic they need to do with
@@ -297,10 +303,9 @@ impl FileEntry for FileEntryImpl {
     fn read_file_part(
         &self,
         offset: u64,
-        length: u64,
-    ) -> impl Future<Output = Result<Take<fs::FileSlot>, Error>> + Send {
+    ) -> impl Future<Output = Result<fs::FileSlot, Error>> + Send {
         self.get_file_path_locked(move |full_content_path| async move {
-            let file = fs::open_file(&full_content_path, offset, length)
+            let file = fs::open_file(&full_content_path, offset)
                 .await
                 .err_tip(|| {
                     format!(
@@ -368,9 +373,10 @@ impl LenEntry for FileEntryImpl {
     async fn unref(&self) {
         let mut encoded_file_path = self.encoded_file_path.write().await;
         if encoded_file_path.path_type == PathType::Temp {
-            // We are already a temp file that is now marked for deletion on drop.
-            // This is very rare, but most likely the rename into the content path failed.
-            warn!(
+            // Already a temp file marked for deletion on drop. This happens
+            // when the entry is evicted from the map before emplace_file
+            // renames it into the content path — expected under cache pressure.
+            debug!(
                 key = ?encoded_file_path.key,
                 "File is already a temp file",
             );
@@ -394,7 +400,7 @@ impl LenEntry for FileEntryImpl {
                 key = ?encoded_file_path.key,
                 ?from_path,
                 ?to_path,
-                "Renamed file (unref)",
+                "Evicted blob from filesystem cache (unref)",
             );
             encoded_file_path.path_type = PathType::Temp;
             encoded_file_path.key = new_key;
@@ -421,7 +427,7 @@ pub fn key_from_file(file_name: &str, file_type: FileType) -> Result<StoreKey<'_
 const SIMULTANEOUS_METADATA_READS: usize = 200;
 
 type FsEvictingMap<'a, Fe> =
-    EvictingMap<StoreKeyBorrow, StoreKey<'a>, Arc<Fe>, SystemTime, RemoveItemCallbackHolder>;
+    EvictingMap<StoreKeyBorrow, StoreKey<'a>, Arc<Fe>, SystemTime, ItemCallbackHolder>;
 
 async fn add_files_to_cache<Fe: FileEntry>(
     evicting_map: &FsEvictingMap<'_, Fe>,
@@ -452,14 +458,28 @@ async fn add_files_to_cache<Fe: FileEntry>(
                 key: key.borrow().into_owned(),
             }),
         );
-        let time_since_anchor = anchor_time
-            .duration_since(atime)
-            .map_err(|_| make_input_err!("File access time newer than now"))?;
+        // Use a negative seconds_since_anchor for files that existed before
+        // the anchor time (startup). This correctly represents them as "older
+        // than anything inserted during runtime" in the EvictingMap timeline.
+        // Files with atime closer to startup get values closer to 0 (newer),
+        // while files not accessed for days get large negative values (older).
+        let seconds_since_anchor = if let Ok(before) = anchor_time.duration_since(atime) {
+            let secs = before.as_secs();
+            if secs > i32::MAX as u64 {
+                i32::MIN
+            } else {
+                -(secs as i32)
+            }
+        } else {
+            // atime is after anchor_time (file touched between capturing
+            // `now` and reading metadata) — treat as most-recently-used.
+            0
+        };
         evicting_map
             .insert_with_time(
                 key.into_owned().into(),
                 Arc::new(file_entry),
-                i32::try_from(time_since_anchor.as_secs()).unwrap_or(i32::MAX),
+                seconds_since_anchor,
             )
             .await;
         Ok(())
@@ -549,13 +569,19 @@ async fn add_files_to_cache<Fe: FileEntry>(
         block_size: u64,
         folder: &str,
     ) -> Result<(), Error> {
-        let file_infos = read_files(Some(folder), shared_context).await?;
+        let mut file_infos = read_files(Some(folder), shared_context).await?;
         let file_type = match folder {
             STR_FOLDER => FileType::String,
             DIGEST_FOLDER => FileType::Digest,
             _ => panic!("Invalid folder type"),
         };
 
+        // Sort by atime oldest-first so that the LRU cache ordering matches
+        // actual file access recency. Without this, items are inserted in
+        // directory-iteration order (random), causing recently-used files to
+        // be evicted while cold files survive.
+        file_infos.sort_by(|a, b| a.1.cmp(&b.1));
+
         let path_root = format!("{}/{folder}", shared_context.content_path);
 
         for (file_name, atime, data_size, _) in file_infos.into_iter().filter(|x| x.3) {
@@ -639,6 +665,8 @@ pub struct FilesystemStore<Fe: FileEntry = FileEntryImpl> {
     rename_fn: fn(&OsStr, &OsStr) -> Result<(), std::io::Error>,
     /// Limits concurrent write operations to prevent disk I/O saturation.
     write_semaphore: Option<Semaphore>,
+    /// Skip writes when a blob with the same key already exists (CAS dedup).
+    content_is_immutable: bool,
 }
 
 impl<Fe: FileEntry> FilesystemStore<Fe> {
@@ -709,6 +737,7 @@ impl<Fe: FileEntry> FilesystemStore<Fe> {
             weak_self: weak_self.clone(),
             rename_fn,
             write_semaphore,
+            content_is_immutable: spec.content_is_immutable,
         }))
     }
 
@@ -716,6 +745,28 @@ impl<Fe: FileEntry> FilesystemStore<Fe> {
         self.weak_self.upgrade()
     }
 
+    /// Returns all digest entries in the cache with their absolute last-access
+    /// timestamps (seconds since UNIX epoch). String-keyed entries are skipped.
+    /// This is a peek-only operation and does NOT promote entries in the LRU.
+    pub fn get_all_digests_with_timestamps(&self) -> Vec<(DigestInfo, i64)> {
+        self.evicting_map
+            .get_all_entries_with_timestamps()
+            .into_iter()
+            .filter_map(|(key_borrow, abs_timestamp)| {
+                match StoreKey::from(key_borrow) {
+                    StoreKey::Digest(digest) => Some((digest, abs_timestamp)),
+                    _ => None,
+                }
+            })
+            .collect()
+    }
+
+    /// Remove a digest's entry from the evicting map so the next
+    /// `populate_fast_store` is forced to re-download from the slow store.
+    pub async fn remove_entry_for_digest(&self, digest: &DigestInfo) {
+        self.evicting_map.remove(&digest.into()).await;
+    }
+
     pub async fn get_file_entry_for_digest(&self, digest: &DigestInfo) -> Result<Arc<Fe>, Error> {
         if is_zero_digest(digest) {
             return Ok(Arc::new(Fe::create(
@@ -734,29 +785,56 @@ impl<Fe: FileEntry> FilesystemStore<Fe> {
             .ok_or_else(|| make_err!(Code::NotFound, "{digest} not found in filesystem store. This may indicate the file was evicted due to cache pressure. Consider increasing 'max_bytes' in your filesystem store's eviction_policy configuration."))
     }
 
+    /// Batch-retrieves file entries for multiple digests in a single lock
+    /// acquisition on the EvictingMap, reducing contention compared to
+    /// calling `get_file_entry_for_digest()` individually for each digest.
+    pub async fn get_file_entries_batch(
+        &self,
+        digests: &[DigestInfo],
+    ) -> Vec<Option<Arc<Fe>>> {
+        // Separate zero digests (which don't go through evicting_map).
+        let store_keys: Vec<StoreKey<'static>> = digests
+            .iter()
+            .filter(|d| !is_zero_digest(**d))
+            .map(|d| (*d).into())
+            .collect();
+
+        let batch_results = self.evicting_map.get_many(store_keys.iter()).await;
+
+        // Reassemble results, inserting zero-digest entries where needed.
+        let mut batch_iter = batch_results.into_iter();
+        digests
+            .iter()
+            .map(|digest| {
+                if is_zero_digest(*digest) {
+                    Some(Arc::new(Fe::create(
+                        0,
+                        0,
+                        RwLock::new(EncodedFilePath {
+                            shared_context: self.shared_context.clone(),
+                            path_type: PathType::Content,
+                            key: (*digest).into(),
+                        }),
+                    )))
+                } else {
+                    batch_iter.next().flatten()
+                }
+            })
+            .collect()
+    }
+
     async fn update_file(
         self: Pin<&Self>,
         mut entry: Fe,
-        mut temp_file: fs::FileSlot,
+        temp_file: fs::FileSlot,
         final_key: StoreKey<'static>,
         mut reader: DropCloserReadHalf,
     ) -> Result<(), Error> {
-        let mut data_size = 0;
-        loop {
-            let mut data = reader
-                .recv()
-                .await
-                .err_tip(|| "Failed to receive data in filesystem store")?;
-            let data_len = data.len();
-            if data_len == 0 {
-                break; // EOF.
-            }
-            temp_file
-                .write_all_buf(&mut data)
-                .await
-                .err_tip(|| "Failed to write data into filesystem store")?;
-            data_size += data_len as u64;
-        }
+        let write_start = std::time::Instant::now();
+        let (data_size, temp_file) = fs::write_file_from_channel(temp_file, &mut reader)
+            .await
+            .err_tip(|| "Failed to write data into filesystem store")?;
+        let write_ms = write_start.elapsed().as_millis();
 
         let _permit = if let Some(sem) = &self.write_semaphore {
             Some(
@@ -768,20 +846,28 @@ impl<Fe: FileEntry> FilesystemStore<Fe> {
             None
         };
 
-        temp_file
-            .as_ref()
-            .sync_all()
-            .await
-            .err_tip(|| "Failed to sync_data in filesystem store")?;
-
         drop(_permit);
 
-        temp_file.advise_dontneed();
         trace!(?temp_file, "Dropping file to update_file");
         drop(temp_file);
 
         *entry.data_size_mut() = data_size;
-        self.emplace_file(final_key, Arc::new(entry)).await
+        let emplace_start = std::time::Instant::now();
+        let result = self.emplace_file(final_key.borrow().into_owned(), Arc::new(entry)).await;
+        let emplace_ms = emplace_start.elapsed().as_millis();
+
+        let total_ms = write_ms + emplace_ms;
+        if total_ms > 50 {
+            debug!(
+                key = %final_key.as_str(),
+                total_ms,
+                write_ms,
+                emplace_ms,
+                data_size,
+                "FilesystemStore::update_file: slow phases",
+            );
+        }
+        result
     }
 
     async fn emplace_file(&self, key: StoreKey<'static>, entry: Arc<Fe>) -> Result<(), Error> {
@@ -817,7 +903,7 @@ impl<Fe: FileEntry> FilesystemStore<Fe> {
             let mut encoded_file_path = entry.get_encoded_file_path().write().await;
             // Then check it's still in there...
             if evicting_map.get(&key).await.is_none() {
-                info!(%key, "Got eviction while emplacing, dropping");
+                debug!(%key, "Got eviction while emplacing, dropping");
                 return Ok(());
             }
 
@@ -827,23 +913,37 @@ impl<Fe: FileEntry> FilesystemStore<Fe> {
                 &key,
             );
 
-            let from_path = encoded_file_path.get_file_path();
-            // Internally tokio spawns fs commands onto a blocking thread anyways.
-            // Since we are already on a blocking thread, we just need the `fs` wrapper to manage
-            // an open-file permit (ensure we don't open too many files at once).
-            let result = (rename_fn)(&from_path, &final_path).err_tip(|| {
-                format!(
-                    "Failed to rename temp file to final path {}",
-                    final_path.display()
-                )
-            });
+            let from_path: OsString = encoded_file_path.get_file_path().into_owned();
+            let final_path_owned: OsString = final_path.into_owned();
+            // Run rename + set_permissions on a blocking thread to avoid
+            // stalling the async runtime with syscalls.
+            let from_clone = from_path.clone();
+            let to_clone = final_path_owned.clone();
+            let result = tokio::task::spawn_blocking(move || -> Result<(), Error> {
+                (rename_fn)(&from_clone, &to_clone)?;
+                // Pre-set CAS file permissions to read+execute (0o555) so that
+                // hardlinked copies already have correct permissions without
+                // needing a per-file chmod during input materialization.
+                #[cfg(target_family = "unix")]
+                {
+                    use std::os::unix::fs::PermissionsExt;
+                    let perms = std::fs::Permissions::from_mode(0o555);
+                    if let Err(err) = std::fs::set_permissions(&to_clone, perms) {
+                        tracing::warn!(?err, path = ?to_clone, "Failed to set CAS file permissions to 0o555");
+                    }
+                }
+                Ok(())
+            })
+            .await
+            .map_err(|e| make_err!(Code::Internal, "Rename task join error: {e:?}"))
+            .and_then(|r| r.err_tip(|| "Failed to rename temp file to final path"));
 
             // In the event our move from temp file to final file fails we need to ensure we remove
             // the entry from our map.
             // Remember: At this point it is possible for another thread to have a reference to
             // `entry`, so we can't delete the file, only drop() should ever delete files.
             if let Err(err) = result {
-                error!(?err, ?from_path, ?final_path, "Failed to rename file",);
+                error!(?err, ?from_path, ?final_path_owned, "Failed to rename file",);
                 // Warning: To prevent deadlock we need to release our lock or during `remove_if()`
                 // it will call `unref()`, which triggers a write-lock on `encoded_file_path`.
                 drop(encoded_file_path);
@@ -911,7 +1011,26 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
             return Ok(());
         }
 
+        // CAS dedup: skip write if blob already exists (same digest = same content).
+        // sizes_for_keys with peek=false promotes the key in the LRU, updating
+        // its access time so it won't be evicted prematurely.
+        if self.content_is_immutable {
+            let owned_key = key.borrow().into_owned();
+            let mut exists = [None];
+            self.evicting_map
+                .sizes_for_keys(core::iter::once(&owned_key), &mut exists, false)
+                .await;
+            if exists[0].is_some() {
+                reader
+                    .drain()
+                    .await
+                    .err_tip(|| "Failed to drain reader for existing blob")?;
+                return Ok(());
+            }
+        }
+
         let temp_key = make_temp_key(&key);
+        let update_total_start = std::time::Instant::now();
 
         // There's a possibility of deadlock here where we take all of the
         // file semaphores with make_and_open_file and the semaphores for
@@ -921,6 +1040,7 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
         // reader available to know that the populator is active.
         reader.peek().await?;
 
+        let temp_create_start = std::time::Instant::now();
         let (entry, temp_file, temp_full_path) = Fe::make_and_open_file(
             self.block_size,
             EncodedFilePath {
@@ -930,15 +1050,28 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
             },
         )
         .await?;
+        let temp_create_ms = temp_create_start.elapsed().as_millis();
 
-        self.update_file(entry, temp_file, key.into_owned(), reader)
+        let result = self.update_file(entry, temp_file, key.borrow().into_owned(), reader)
             .await
             .err_tip(|| {
                 format!(
                     "While processing with temp file {}",
                     temp_full_path.display()
                 )
-            })
+            });
+
+        let total_ms = update_total_start.elapsed().as_millis();
+        if total_ms > 50 {
+            debug!(
+                key = %key.as_str(),
+                total_ms,
+                temp_create_ms,
+                write_and_emplace_ms = total_ms.saturating_sub(temp_create_ms),
+                "FilesystemStore::update: slow write",
+            );
+        }
+        result
     }
 
     fn optimized_for(&self, optimization: StoreOptimizations) -> bool {
@@ -953,7 +1086,21 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
             return Ok(());
         }
 
+        // CAS dedup: skip write if blob already exists (same digest = same content).
+        if self.content_is_immutable {
+            let owned_key = key.borrow().into_owned();
+            let mut exists = [None];
+            self.evicting_map
+                .sizes_for_keys(core::iter::once(&owned_key), &mut exists, false)
+                .await;
+            if exists[0].is_some() {
+                return Ok(());
+            }
+        }
+
+        let oneshot_total_start = std::time::Instant::now();
         let temp_key = make_temp_key(&key);
+        let temp_create_start = std::time::Instant::now();
         let (mut entry, mut temp_file, temp_full_path) = Fe::make_and_open_file(
             self.block_size,
             EncodedFilePath {
@@ -964,13 +1111,30 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
         )
         .await
         .err_tip(|| "Failed to create temp file in filesystem store update_oneshot")?;
+        let temp_create_ms = temp_create_start.elapsed().as_millis();
 
         // Write directly without channel overhead
+        let data_len = data.len() as u64;
+        let write_ms;
         if !data.is_empty() {
-            temp_file
-                .write_all(&data)
-                .await
-                .err_tip(|| format!("Failed to write data to {}", temp_full_path.display()))?;
+            let write_start = std::time::Instant::now();
+            let temp_full_path_clone = temp_full_path.clone();
+            temp_file = nativelink_util::spawn_blocking!("fs_write_oneshot", move || {
+                use std::io::Write;
+                temp_file
+                    .as_std_mut()
+                    .write_all(&data)
+                    .map_err(|e| Into::<Error>::into(e))
+                    .err_tip(|| {
+                        format!("Failed to write data to {}", temp_full_path_clone.display())
+                    })?;
+                Ok::<_, Error>(temp_file)
+            })
+            .await
+            .map_err(|e| make_err!(Code::Internal, "write oneshot join failed: {e:?}"))??;
+            write_ms = write_start.elapsed().as_millis();
+        } else {
+            write_ms = 0;
         }
 
         let _permit = if let Some(sem) = &self.write_semaphore {
@@ -983,19 +1147,28 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
             None
         };
 
-        temp_file
-            .as_ref()
-            .sync_all()
-            .await
-            .err_tip(|| "Failed to sync_data in filesystem store update_oneshot")?;
-
         drop(_permit);
 
-        temp_file.advise_dontneed();
         drop(temp_file);
 
-        *entry.data_size_mut() = data.len() as u64;
-        self.emplace_file(key.into_owned(), Arc::new(entry)).await
+        *entry.data_size_mut() = data_len;
+        let emplace_start = std::time::Instant::now();
+        let result = self.emplace_file(key.borrow().into_owned(), Arc::new(entry)).await;
+        let emplace_ms = emplace_start.elapsed().as_millis();
+
+        let total_ms = oneshot_total_start.elapsed().as_millis();
+        if total_ms > 50 {
+            debug!(
+                key = %key.as_str(),
+                total_ms,
+                temp_create_ms,
+                write_ms,
+                emplace_ms,
+                data_len,
+                "FilesystemStore::update_oneshot: slow write",
+            );
+        }
+        result
     }
 
     async fn update_with_whole_file(
@@ -1008,9 +1181,8 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
         let file_size = match upload_size {
             UploadSizeInfo::ExactSize(size) => size,
             UploadSizeInfo::MaxSize(_) => file
-                .as_ref()
+                .as_std()
                 .metadata()
-                .await
                 .err_tip(|| format!("While reading metadata for {}", path.display()))?
                 .len(),
         };
@@ -1030,7 +1202,6 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
         // We are done with the file, if we hold a reference to the file here, it could
         // result in a deadlock if `emplace_file()` also needs file descriptors.
         trace!(?file, "Dropping file to to update_with_whole_file");
-        file.advise_dontneed();
         drop(file);
         self.emplace_file(key.into_owned(), Arc::new(entry))
             .await
@@ -1063,34 +1234,31 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
             )
         })?;
         let read_limit = length.unwrap_or(u64::MAX);
-        let mut temp_file = entry.read_file_part(offset, read_limit).or_else(|err| async move {
+        let temp_file = entry.read_file_part(offset).or_else(|err| async move {
             // If the file is not found, we need to remove it from the eviction map.
             if err.code == Code::NotFound {
-                error!(
+                warn!(
                     ?err,
                     key = ?owned_key,
-                    "Entry was in our map, but not found on disk. Removing from map as a precaution, but process probably need restarted."
+                    "Stale filesystem cache entry: file not found on disk. \
+                     Removed from map; upper store layer will re-fetch from remote."
                 );
                 self.evicting_map.remove(&owned_key).await;
             }
             Err(err)
         }).await?;
 
-        loop {
-            let mut buf = BytesMut::with_capacity(self.read_buffer_size);
-            temp_file
-                .read_buf(&mut buf)
-                .await
-                .err_tip(|| "Failed to read data in filesystem store")?;
-            if buf.is_empty() {
-                break; // EOF.
-            }
-            writer
-                .send(buf.freeze())
-                .await
-                .err_tip(|| "Failed to send chunk in filesystem store get_part")?;
-        }
-        temp_file.get_ref().advise_dontneed();
+        // Hint to the kernel that we'll read sequentially — enables more
+        // aggressive readahead (typically 2-4x the default 128 KiB).
+        temp_file.advise_sequential();
+
+        // NOTE: We intentionally do NOT call advise_dontneed() after reading.
+        // The same blobs are frequently read by multiple workers within
+        // seconds of each other — keeping them in page cache avoids
+        // redundant disk I/O (measured: 76% of read I/O is re-reads).
+        fs::read_file_to_channel(temp_file, writer, read_limit, self.read_buffer_size)
+            .await
+            .err_tip(|| "Failed to read data in filesystem store")?;
         writer
             .send_eof()
             .err_tip(|| "Filed to send EOF in filesystem store get_part")?;
@@ -1114,12 +1282,12 @@ impl<Fe: FileEntry> StoreDriver for FilesystemStore<Fe> {
         registry.register_indicator(self);
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         self.evicting_map
-            .add_remove_callback(RemoveItemCallbackHolder::new(callback));
+            .add_item_callback(ItemCallbackHolder::new(callback));
         Ok(())
     }
 }
diff --git a/nativelink-store/src/gcs_store.rs b/nativelink-store/src/gcs_store.rs
index 4334bbdd2..dcf281d36 100644
--- a/nativelink-store/src/gcs_store.rs
+++ b/nativelink-store/src/gcs_store.rs
@@ -29,7 +29,7 @@ use nativelink_util::health_utils::{HealthRegistryBuilder, HealthStatus, HealthS
 use nativelink_util::instant_wrapper::InstantWrapper;
 use nativelink_util::retry::{Retrier, RetryResult};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, StoreDriver, StoreKey, StoreOptimizations, UploadSizeInfo,
+    ItemCallback, StoreDriver, StoreKey, StoreOptimizations, UploadSizeInfo,
 };
 use rand::Rng;
 use tokio::time::sleep;
@@ -465,9 +465,9 @@ where
         registry.register_indicator(self);
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        _callback: Arc<dyn RemoveItemCallback>,
+        _callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         // As we're backed by GCS, this store doesn't actually drop stuff
         // so we can actually just ignore this
diff --git a/nativelink-store/src/grpc_store.rs b/nativelink-store/src/grpc_store.rs
index 0d399284f..f7d4f3439 100644
--- a/nativelink-store/src/grpc_store.rs
+++ b/nativelink-store/src/grpc_store.rs
@@ -15,10 +15,11 @@
 use core::pin::Pin;
 use core::time::Duration;
 use std::borrow::Cow;
-use std::sync::Arc;
+use std::collections::HashMap;
+use std::sync::{Arc, Weak};
 
 use async_trait::async_trait;
-use bytes::BytesMut;
+use bytes::{Bytes, BytesMut};
 use futures::stream::{FuturesUnordered, unfold};
 use futures::{Future, Stream, StreamExt, TryFutureExt, TryStreamExt, future};
 use nativelink_config::stores::GrpcSpec;
@@ -30,13 +31,14 @@ use nativelink_proto::build::bazel::remote::execution::v2::{
     ActionResult, BatchReadBlobsRequest, BatchReadBlobsResponse, BatchUpdateBlobsRequest,
     BatchUpdateBlobsResponse, FindMissingBlobsRequest, FindMissingBlobsResponse,
     GetActionResultRequest, GetTreeRequest, GetTreeResponse, UpdateActionResultRequest,
+    batch_update_blobs_request, compressor,
 };
 use nativelink_proto::google::bytestream::byte_stream_client::ByteStreamClient;
 use nativelink_proto::google::bytestream::{
     QueryWriteStatusRequest, QueryWriteStatusResponse, ReadRequest, ReadResponse, WriteRequest,
     WriteResponse,
 };
-use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
+use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair};
 use nativelink_util::common::DigestInfo;
 use nativelink_util::connection_manager::ConnectionManager;
 use nativelink_util::digest_hasher::{DigestHasherFunc, default_digest_hasher_func};
@@ -46,19 +48,33 @@ use nativelink_util::proto_stream_utils::{
 };
 use nativelink_util::resource_info::ResourceInfo;
 use nativelink_util::retry::{Retrier, RetryResult};
-use nativelink_util::store_trait::{RemoveItemCallback, StoreDriver, StoreKey, UploadSizeInfo};
+use nativelink_util::store_trait::{
+    IS_WORKER_REQUEST, ItemCallback, StoreDriver, StoreKey, StoreOptimizations, UploadSizeInfo,
+};
 use nativelink_util::{default_health_status_indicator, tls_utils};
 use opentelemetry::context::Context;
 use parking_lot::Mutex;
 use prost::Message;
 use tokio::time::sleep;
 use tonic::{Code, IntoRequest, Request, Response, Status, Streaming};
-use tracing::{error, trace, warn};
+use tracing::{error, info, trace, warn};
 use uuid::Uuid;
 
 // This store is usually a pass-through store, but can also be used as a CAS store. Using it as an
+/// Maximum gRPC message decoding size. Must be larger than the biggest
+/// possible response (e.g. batch_read_blobs, get_tree, or a single
+/// ByteStream ReadResponse chunk). 256 MiB is generous while still
+/// providing an OOM safety net.
+const MAX_GRPC_DECODING_SIZE: usize = 256 * 1024 * 1024;
+
 // AC store has one major side-effect... The has() function may not give the proper size of the
 // underlying data. This might cause issues if embedded in certain stores.
+struct PendingBatchEntry {
+    digest: DigestInfo,
+    data: Bytes,
+    result_tx: tokio::sync::oneshot::Sender<Result<(), Error>>,
+}
+
 #[derive(Debug, MetricsComponent)]
 pub struct GrpcStore {
     #[metric(help = "Instance name for the store")]
@@ -68,6 +84,12 @@ pub struct GrpcStore {
     connection_manager: ConnectionManager,
     /// Per-RPC timeout. Duration::ZERO means disabled.
     rpc_timeout: Duration,
+    /// Blobs at or below this size use BatchUpdateBlobs instead of
+    /// ByteStream.Write. 0 means disabled.
+    batch_update_threshold: u64,
+    /// Sender for coalescing batch entries. None when coalescing is
+    /// disabled (delay_ms == 0 or threshold == 0).
+    batch_tx: Option<tokio::sync::mpsc::UnboundedSender<PendingBatchEntry>>,
 }
 
 impl GrpcStore {
@@ -96,7 +118,18 @@ impl GrpcStore {
             Duration::from_secs(120)
         };
 
-        Ok(Arc::new(Self {
+        let batch_update_threshold = spec.batch_update_threshold_bytes;
+        let coalesce_delay_ms = spec.batch_coalesce_delay_ms;
+
+        let (batch_tx, batch_rx) =
+            if batch_update_threshold > 0 && coalesce_delay_ms > 0 {
+                let (tx, rx) = tokio::sync::mpsc::unbounded_channel();
+                (Some(tx), Some(rx))
+            } else {
+                (None, None)
+            };
+
+        let store = Arc::new(Self {
             instance_name: spec.instance_name.clone(),
             store_type: spec.store_type,
             retrier: Retrier::new(
@@ -112,7 +145,183 @@ impl GrpcStore {
                 jitter_fn,
             ),
             rpc_timeout,
-        }))
+            batch_update_threshold,
+            batch_tx,
+        });
+
+        if let Some(rx) = batch_rx {
+            let weak = Arc::downgrade(&store);
+            let delay = Duration::from_millis(coalesce_delay_ms);
+            tokio::spawn(Self::batch_flush_loop(weak, rx, delay));
+            info!(
+                batch_update_threshold,
+                coalesce_delay_ms,
+                "GrpcStore: BatchUpdateBlobs coalescing enabled",
+            );
+        } else if batch_update_threshold > 0 {
+            info!(
+                batch_update_threshold,
+                "GrpcStore: BatchUpdateBlobs enabled (no coalescing)",
+            );
+        }
+
+        Ok(store)
+    }
+
+    /// Maximum total payload size for a single BatchUpdateBlobs RPC.
+    /// The RE API spec recommends servers support at least 4 MiB.
+    const MAX_BATCH_TOTAL_SIZE: usize = 4 * 1024 * 1024;
+
+    /// Send one or more blobs via a single BatchUpdateBlobs RPC.
+    /// Returns per-entry results keyed by digest. The RE API does not
+    /// guarantee response ordering, so we match by digest, not index.
+    async fn do_batch_update(
+        &self,
+        digests: &[DigestInfo],
+        entries: Vec<(DigestInfo, Bytes)>,
+    ) -> HashMap<DigestInfo, Result<(), Error>> {
+        let digest_function = Context::current()
+            .get::<DigestHasherFunc>()
+            .map_or_else(default_digest_hasher_func, |v| *v)
+            .proto_digest_func()
+            .into();
+
+        // Deduplicate entries by digest — multiple callers may submit the
+        // same blob in the same batch (e.g., identical stdout/stderr).
+        let deduped: HashMap<DigestInfo, Bytes> = entries.into_iter().collect();
+        let requests: Vec<_> = deduped
+            .into_iter()
+            .map(|(digest, data)| batch_update_blobs_request::Request {
+                digest: Some(digest.into()),
+                data,
+                compressor: compressor::Value::Identity.into(),
+            })
+            .collect();
+
+        let response = match self
+            .batch_update_blobs(Request::new(BatchUpdateBlobsRequest {
+                instance_name: String::new(), // Overwritten by batch_update_blobs()
+                requests,
+                digest_function,
+            }))
+            .await
+        {
+            Ok(resp) => resp,
+            Err(e) => {
+                let err = e.append("In GrpcStore::do_batch_update");
+                return digests
+                    .iter()
+                    .map(|d| (*d, Err(err.clone())))
+                    .collect();
+            }
+        };
+
+        // Build result map keyed by digest (RE API does not guarantee ordering).
+        let mut results: HashMap<DigestInfo, Result<(), Error>> = response
+            .into_inner()
+            .responses
+            .into_iter()
+            .filter_map(|resp| {
+                let digest = DigestInfo::try_from(resp.digest?).ok()?;
+                let result = match &resp.status {
+                    Some(status) if status.code != 0 => Err(make_input_err!(
+                        "BatchUpdateBlobs failed: code={}, message={}",
+                        status.code,
+                        status.message
+                    )),
+                    _ => Ok(()),
+                };
+                Some((digest, result))
+            })
+            .collect();
+
+        // Fill in missing responses as errors.
+        for d in digests {
+            results
+                .entry(*d)
+                .or_insert_with(|| Err(make_input_err!("BatchUpdateBlobs: no response for digest")));
+        }
+        results
+    }
+
+    /// Background task that accumulates small blob uploads and flushes
+    /// them as batched RPCs.
+    async fn batch_flush_loop(
+        weak: Weak<GrpcStore>,
+        mut rx: tokio::sync::mpsc::UnboundedReceiver<PendingBatchEntry>,
+        delay: Duration,
+    ) {
+        // An entry that didn't fit in the previous batch, carried forward.
+        let mut held_entry: Option<PendingBatchEntry> = None;
+
+        loop {
+            // Use held entry from previous iteration, or wait for a new one.
+            let first = if let Some(entry) = held_entry.take() {
+                entry
+            } else {
+                match rx.recv().await {
+                    Some(entry) => entry,
+                    None => return, // Channel closed
+                }
+            };
+
+            let mut batch = vec![first];
+            let mut total_size = batch[0].data.len();
+
+            // Collect more entries within the delay window, up to size limit.
+            let deadline = tokio::time::Instant::now() + delay;
+            loop {
+                let remaining =
+                    deadline.saturating_duration_since(tokio::time::Instant::now());
+                if remaining.is_zero() {
+                    break;
+                }
+                match tokio::time::timeout(remaining, rx.recv()).await {
+                    Ok(Some(entry)) => {
+                        let new_total = total_size + entry.data.len();
+                        if new_total > Self::MAX_BATCH_TOTAL_SIZE && !batch.is_empty()
+                        {
+                            // Would exceed limit — hold for next batch.
+                            held_entry = Some(entry);
+                            break;
+                        }
+                        total_size = new_total;
+                        batch.push(entry);
+                    }
+                    _ => break, // Timeout or channel closed
+                }
+            }
+
+            let store = match weak.upgrade() {
+                Some(s) => s,
+                None => return, // GrpcStore dropped
+            };
+
+            let num = batch.len();
+            trace!(
+                count = num,
+                total_size,
+                "GrpcStore: flushing coalesced batch",
+            );
+
+            let digests: Vec<_> = batch.iter().map(|e| e.digest).collect();
+            let (senders_with_digests, entries): (Vec<_>, Vec<_>) = batch
+                .into_iter()
+                .map(|e| ((e.digest, e.result_tx), (e.digest, e.data)))
+                .unzip();
+
+            let results = store.do_batch_update(&digests, entries).await;
+
+            for (digest, sender) in senders_with_digests {
+                // Use .get().cloned() instead of .remove() because multiple
+                // senders may reference the same digest (e.g., stdout and stderr
+                // with identical content in the same batch).
+                let result = results.get(&digest).cloned().unwrap_or_else(|| {
+                    Err(make_input_err!("BatchUpdateBlobs: missing result for {digest:?}"))
+                });
+                drop(sender.send(result));
+            }
+        }
     }
 
     async fn perform_request<F, Fut, R, I>(&self, input: I, mut request: F) -> Result<R, Error>
@@ -153,6 +362,7 @@ impl GrpcStore {
                 .await
                 .err_tip(|| "in find_missing_blobs")?;
             ContentAddressableStorageClient::new(channel)
+                .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
                 .find_missing_blobs(Request::new(request))
                 .await
                 .err_tip(|| "in GrpcStore::find_missing_blobs")
@@ -178,6 +388,7 @@ impl GrpcStore {
                 .await
                 .err_tip(|| "in batch_update_blobs")?;
             ContentAddressableStorageClient::new(channel)
+                .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
                 .batch_update_blobs(Request::new(request))
                 .await
                 .err_tip(|| "in GrpcStore::batch_update_blobs")
@@ -196,14 +407,23 @@ impl GrpcStore {
 
         let mut request = grpc_request.into_inner();
         request.instance_name.clone_from(&self.instance_name);
+        let is_worker = IS_WORKER_REQUEST.try_with(|v| *v).unwrap_or(false);
         self.perform_request(request, |request| async move {
             let channel = self
                 .connection_manager
                 .connection()
                 .await
                 .err_tip(|| "in batch_read_blobs")?;
+            let mut grpc_request = Request::new(request);
+            if is_worker {
+                grpc_request.metadata_mut().insert(
+                    "x-nativelink-worker",
+                    tonic::metadata::MetadataValue::from_static("true"),
+                );
+            }
             ContentAddressableStorageClient::new(channel)
-                .batch_read_blobs(Request::new(request))
+                .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
+                .batch_read_blobs(grpc_request)
                 .await
                 .err_tip(|| "in GrpcStore::batch_read_blobs")
         })
@@ -228,6 +448,7 @@ impl GrpcStore {
                 .await
                 .err_tip(|| "in get_tree")?;
             ContentAddressableStorageClient::new(channel)
+                .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
                 .get_tree(Request::new(request))
                 .await
                 .err_tip(|| "in GrpcStore::get_tree")
@@ -254,8 +475,16 @@ impl GrpcStore {
             .connection()
             .await
             .err_tip(|| "in read_internal")?;
+        let mut grpc_request = Request::new(request);
+        if IS_WORKER_REQUEST.try_with(|v| *v).unwrap_or(false) {
+            grpc_request.metadata_mut().insert(
+                "x-nativelink-worker",
+                tonic::metadata::MetadataValue::from_static("true"),
+            );
+        }
         let mut response = ByteStreamClient::new(channel)
-            .read(Request::new(request))
+            .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
+            .read(grpc_request)
             .await
             .err_tip(|| "in GrpcStore::read")?
             .into_inner();
@@ -343,6 +572,7 @@ impl GrpcStore {
                         let local_state_for_rpc = local_state.clone();
                         async move {
                             let res = ByteStreamClient::new(channel)
+                                .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
                                 .write(WriteStateWrapper::new(local_state_for_rpc))
                                 .await
                                 .err_tip(|| "in GrpcStore::write");
@@ -452,6 +682,7 @@ impl GrpcStore {
                 .await
                 .err_tip(|| "in query_write_status")?;
             ByteStreamClient::new(channel)
+                .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
                 .query_write_status(Request::new(request))
                 .await
                 .err_tip(|| "in GrpcStore::query_write_status")
@@ -472,6 +703,7 @@ impl GrpcStore {
                 .await
                 .err_tip(|| "in get_action_result")?;
             ActionCacheClient::new(channel)
+                .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
                 .get_action_result(Request::new(request))
                 .await
                 .err_tip(|| "in GrpcStore::get_action_result")
@@ -492,6 +724,7 @@ impl GrpcStore {
                 .await
                 .err_tip(|| "in update_action_result")?;
             ActionCacheClient::new(channel)
+                .max_decoding_message_size(MAX_GRPC_DECODING_SIZE)
                 .update_action_result(Request::new(request))
                 .await
                 .err_tip(|| "in GrpcStore::update_action_result")
@@ -736,6 +969,74 @@ impl StoreDriver for GrpcStore {
         Ok(())
     }
 
+    async fn update_oneshot(
+        self: Pin<&Self>,
+        key: StoreKey<'_>,
+        data: Bytes,
+    ) -> Result<(), Error> {
+        // Route small CAS blobs through BatchUpdateBlobs.
+        if !matches!(self.store_type, nativelink_config::stores::StoreType::Ac)
+            && self.batch_update_threshold > 0
+            && (data.len() as u64) <= self.batch_update_threshold
+        {
+            let digest = key.into_digest();
+
+            if let Some(tx) = &self.batch_tx {
+                // Approach B: coalescing — queue for the background flush loop.
+                let (result_tx, result_rx) = tokio::sync::oneshot::channel();
+                tx.send(PendingBatchEntry {
+                    digest,
+                    data,
+                    result_tx,
+                })
+                .map_err(|_| make_input_err!("Batch coalescer channel closed"))?;
+                return result_rx
+                    .await
+                    .map_err(|_| make_input_err!("Batch coalescer dropped"))?;
+            }
+
+            // Approach A: immediate single-element BatchUpdateBlobs.
+            let digests = [digest];
+            let mut results =
+                self.do_batch_update(&digests, vec![(digest, data)]).await;
+            return results.remove(&digest).unwrap_or_else(|| {
+                Err(make_input_err!("BatchUpdateBlobs: no response for digest"))
+            });
+        }
+
+        // Fallback: standard ByteStream.Write via channel pair.
+        let (mut tx, rx) = make_buf_channel_pair();
+        let data_len =
+            u64::try_from(data.len()).err_tip(|| "Could not convert data.len() to u64")?;
+        let send_fut = async move {
+            if !data.is_empty() {
+                tx.send(data)
+                    .await
+                    .err_tip(|| "Failed to write data in update_oneshot")?;
+            }
+            tx.send_eof()
+                .err_tip(|| "Failed to write EOF in update_oneshot")?;
+            Ok(())
+        };
+        future::try_join(
+            send_fut,
+            self.update(key, rx, UploadSizeInfo::ExactSize(data_len)),
+        )
+        .await?;
+        Ok(())
+    }
+
+    fn optimized_for(&self, optimization: StoreOptimizations) -> bool {
+        if optimization == StoreOptimizations::LazyExistenceOnSync
+            && !matches!(self.store_type, nativelink_config::stores::StoreType::Ac)
+        {
+            return true;
+        }
+        optimization == StoreOptimizations::SubscribesToUpdateOneshot
+            && self.batch_update_threshold > 0
+            && !matches!(self.store_type, nativelink_config::stores::StoreType::Ac)
+    }
+
     async fn get_part(
         self: Pin<&Self>,
         key: StoreKey<'_>,
@@ -809,7 +1110,7 @@ impl StoreDriver for GrpcStore {
                 loop {
                     let data = match stream.next().await {
                         // Create an empty response to represent EOF.
-                        None => bytes::Bytes::new(),
+                        None => Bytes::new(),
                         Some(Ok(message)) => message.data,
                         Some(Err(status)) => {
                             return Some((
@@ -858,9 +1159,9 @@ impl StoreDriver for GrpcStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        _callback: Arc<dyn RemoveItemCallback>,
+        _callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         Err(Error::new(
             Code::Internal,
diff --git a/nativelink-store/src/lib.rs b/nativelink-store/src/lib.rs
index 72b7f46d6..21d531a6f 100644
--- a/nativelink-store/src/lib.rs
+++ b/nativelink-store/src/lib.rs
@@ -39,3 +39,4 @@ pub mod shard_store;
 pub mod size_partitioning_store;
 pub mod store_manager;
 pub mod verify_store;
+pub mod worker_proxy_store;
diff --git a/nativelink-store/src/memory_store.rs b/nativelink-store/src/memory_store.rs
index 22391596f..fb5f30725 100644
--- a/nativelink-store/src/memory_store.rs
+++ b/nativelink-store/src/memory_store.rs
@@ -31,10 +31,10 @@ use nativelink_util::health_utils::{
     HealthRegistryBuilder, HealthStatusIndicator, default_health_status_indicator,
 };
 use nativelink_util::store_trait::{
-    RemoveItemCallback, StoreDriver, StoreKey, StoreKeyBorrow, StoreOptimizations, UploadSizeInfo,
+    ItemCallback, StoreDriver, StoreKey, StoreKeyBorrow, StoreOptimizations, UploadSizeInfo,
 };
 
-use crate::callback_utils::RemoveItemCallbackHolder;
+use crate::callback_utils::ItemCallbackHolder;
 use crate::cas_utils::is_zero_digest;
 
 #[derive(Clone)]
@@ -66,7 +66,7 @@ pub struct MemoryStore {
         StoreKey<'static>,
         BytesWrapper,
         SystemTime,
-        RemoveItemCallbackHolder,
+        ItemCallbackHolder,
     >,
 }
 
@@ -81,8 +81,8 @@ impl MemoryStore {
 
     /// Returns the number of key-value pairs that are currently in the the cache.
     /// Function is not for production code paths.
-    pub fn len_for_test(&self) -> usize {
-        self.evicting_map.len_for_test()
+    pub async fn len_for_test(&self) -> usize {
+        self.evicting_map.len_for_test().await
     }
 
     pub async fn remove_entry(&self, key: StoreKey<'_>) -> bool {
@@ -126,7 +126,8 @@ impl StoreDriver for MemoryStore {
         );
         let iterations = self
             .evicting_map
-            .range(range, move |key, _value| handler(key.borrow()));
+            .range(range, move |key, _value| handler(key.borrow()))
+            .await;
         Ok(iterations)
     }
 
@@ -136,17 +137,12 @@ impl StoreDriver for MemoryStore {
         mut reader: DropCloserReadHalf,
         _size_info: UploadSizeInfo,
     ) -> Result<(), Error> {
-        // Internally Bytes might hold a reference to more data than just our data. To prevent
-        // this potential case, we make a full copy of our data for long-term storage.
-        let final_buffer = {
-            let buffer = reader
-                .consume(None)
-                .await
-                .err_tip(|| "Failed to collect all bytes from reader in memory_store::update")?;
-            let mut new_buffer = BytesMut::with_capacity(buffer.len());
-            new_buffer.extend_from_slice(&buffer[..]);
-            new_buffer.freeze()
-        };
+        // consume() returns a standalone Bytes from a frozen BytesMut inside
+        // buf_channel — no shared parent buffer, so no need to copy.
+        let final_buffer = reader
+            .consume(None)
+            .await
+            .err_tip(|| "Failed to collect all bytes from reader in memory_store::update")?;
 
         self.evicting_map
             .insert(key.into_owned().into(), BytesWrapper(final_buffer))
@@ -232,12 +228,12 @@ impl StoreDriver for MemoryStore {
         registry.register_indicator(self);
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         self.evicting_map
-            .add_remove_callback(RemoveItemCallbackHolder::new(callback));
+            .add_item_callback(ItemCallbackHolder::new(callback));
         Ok(())
     }
 }
diff --git a/nativelink-store/src/mongo_store.rs b/nativelink-store/src/mongo_store.rs
index b85e1ec3b..76ea69e7e 100644
--- a/nativelink-store/src/mongo_store.rs
+++ b/nativelink-store/src/mongo_store.rs
@@ -32,7 +32,7 @@ use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
 use nativelink_util::health_utils::{HealthRegistryBuilder, HealthStatus, HealthStatusIndicator};
 use nativelink_util::spawn;
 use nativelink_util::store_trait::{
-    BoolValue, RemoveItemCallback, SchedulerCurrentVersionProvider, SchedulerIndexProvider,
+    BoolValue, ItemCallback, SchedulerCurrentVersionProvider, SchedulerIndexProvider,
     SchedulerStore, SchedulerStoreDataProvider, SchedulerStoreDecodeTo, SchedulerStoreKeyProvider,
     SchedulerSubscription, SchedulerSubscriptionManager, StoreDriver, StoreKey, UploadSizeInfo,
 };
@@ -577,9 +577,9 @@ impl StoreDriver for ExperimentalMongoStore {
         registry.register_indicator(self);
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        _callback: Arc<dyn RemoveItemCallback>,
+        _callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         // drop because we don't remove anything from Mongo
         Ok(())
diff --git a/nativelink-store/src/noop_store.rs b/nativelink-store/src/noop_store.rs
index 9c749750b..c283eee52 100644
--- a/nativelink-store/src/noop_store.rs
+++ b/nativelink-store/src/noop_store.rs
@@ -23,7 +23,7 @@ use nativelink_metric::{
 use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, StoreDriver, StoreKey, StoreOptimizations, UploadSizeInfo,
+    ItemCallback, StoreDriver, StoreKey, StoreOptimizations, UploadSizeInfo,
 };
 
 #[derive(Debug, Default, Clone, Copy)]
@@ -97,9 +97,9 @@ impl StoreDriver for NoopStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        _callback: Arc<dyn RemoveItemCallback>,
+        _callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         // does nothing, so drop
         Ok(())
diff --git a/nativelink-store/src/ontap_s3_existence_cache_store.rs b/nativelink-store/src/ontap_s3_existence_cache_store.rs
index a78d2d35a..59c88ad65 100644
--- a/nativelink-store/src/ontap_s3_existence_cache_store.rs
+++ b/nativelink-store/src/ontap_s3_existence_cache_store.rs
@@ -36,7 +36,7 @@ use nativelink_util::instant_wrapper::InstantWrapper;
 use nativelink_util::metrics_utils::CounterWithTime;
 use nativelink_util::spawn;
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 use serde::{Deserialize, Serialize};
 use tokio::fs;
@@ -97,7 +97,7 @@ where
     }
 }
 
-impl<I, NowFn> RemoveItemCallback for OntapS3CacheCallback<I, NowFn>
+impl<I, NowFn> ItemCallback for OntapS3CacheCallback<I, NowFn>
 where
     I: InstantWrapper,
     NowFn: Fn() -> I + Send + Sync + Unpin + Clone + 'static,
@@ -368,7 +368,7 @@ where
         let other_ref = Arc::downgrade(&cache);
         cache
             .inner_store
-            .register_remove_callback(Arc::new(OntapS3CacheCallback { cache: other_ref }))?;
+            .register_item_callback(Arc::new(OntapS3CacheCallback { cache: other_ref }))?;
 
         // Try to load existing cache file
         if let Ok(contents) = fs::read_to_string(&spec.index_path).await {
@@ -429,7 +429,7 @@ async fn create_s3_client(spec: &ExperimentalOntapS3Spec) -> Result<Client, Erro
         .endpoint_url(&spec.endpoint)
         .region(Region::new(spec.vserver_name.clone()))
         .force_path_style(true)
-        .behavior_version(BehaviorVersion::v2025_08_07())
+        .behavior_version(BehaviorVersion::v2026_01_12())
         .build();
 
     Ok(Client::from_conf(config))
@@ -533,15 +533,15 @@ where
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.inner_store.register_remove_callback(callback)
+        self.inner_store.register_item_callback(callback)
     }
 }
 
-impl<I, NowFn> RemoveItemCallback for OntapS3ExistenceCache<I, NowFn>
+impl<I, NowFn> ItemCallback for OntapS3ExistenceCache<I, NowFn>
 where
     I: InstantWrapper,
     NowFn: Fn() -> I + Send + Sync + Unpin + Clone + 'static,
diff --git a/nativelink-store/src/ontap_s3_store.rs b/nativelink-store/src/ontap_s3_store.rs
index ecec6bd55..e39769bf9 100644
--- a/nativelink-store/src/ontap_s3_store.rs
+++ b/nativelink-store/src/ontap_s3_store.rs
@@ -47,7 +47,7 @@ use nativelink_util::buf_channel::{
 use nativelink_util::health_utils::{HealthStatus, HealthStatusIndicator};
 use nativelink_util::instant_wrapper::InstantWrapper;
 use nativelink_util::retry::{Retrier, RetryResult};
-use nativelink_util::store_trait::{RemoveItemCallback, StoreDriver, StoreKey, UploadSizeInfo};
+use nativelink_util::store_trait::{ItemCallback, StoreDriver, StoreKey, UploadSizeInfo};
 use parking_lot::Mutex;
 use rustls::{ClientConfig, RootCertStore};
 use rustls_pki_types::CertificateDer;
@@ -74,7 +74,7 @@ const DEFAULT_MAX_RETRY_BUFFER_PER_REQUEST: usize = 20 * 1024 * 1024; // 20MB
 // Default limit for concurrent part uploads per multipart upload
 const DEFAULT_MULTIPART_MAX_CONCURRENT_UPLOADS: usize = 10;
 
-type RemoveCallback = Arc<dyn RemoveItemCallback>;
+type ItemCb = Arc<dyn ItemCallback>;
 
 #[derive(Debug, MetricsComponent)]
 pub struct OntapS3Store<NowFn> {
@@ -92,7 +92,7 @@ pub struct OntapS3Store<NowFn> {
     #[metric(help = "The number of concurrent uploads allowed for multipart uploads")]
     multipart_max_concurrent_uploads: usize,
 
-    remove_callbacks: Mutex<Vec<RemoveCallback>>,
+    item_callbacks: Mutex<Vec<ItemCb>>,
 }
 
 pub fn load_custom_certs(cert_path: &str) -> Result<Arc<ClientConfig>, Error> {
@@ -167,7 +167,7 @@ where
             .app_name(aws_config::AppName::new("nativelink").expect("valid app name"))
             .http_client(http_client)
             .force_path_style(true)
-            .behavior_version(BehaviorVersion::v2025_08_07())
+            .behavior_version(BehaviorVersion::v2026_01_12())
             .timeout_config(
                 aws_config::timeout::TimeoutConfig::builder()
                     .connect_timeout(Duration::from_secs(30))
@@ -216,7 +216,7 @@ where
                 .common
                 .multipart_max_concurrent_uploads
                 .unwrap_or(DEFAULT_MULTIPART_MAX_CONCURRENT_UPLOADS),
-            remove_callbacks: Mutex::new(vec![]),
+            item_callbacks: Mutex::new(vec![]),
         }))
     }
 
@@ -245,8 +245,8 @@ where
                                     let now_s = (self.now_fn)().unix_timestamp() as i64;
                                     if last_modified.secs() + self.consider_expired_after_s <= now_s
                                     {
-                                        let remove_callbacks = self.remove_callbacks.lock().clone();
-                                        let mut callbacks: FuturesUnordered<_> = remove_callbacks
+                                        let item_callbacks = self.item_callbacks.lock().clone();
+                                        let mut callbacks: FuturesUnordered<_> = item_callbacks
                                             .into_iter()
                                             .map(|callback| {
                                                 let store_key = local_digest.borrow();
@@ -767,11 +767,11 @@ where
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.remove_callbacks.lock().push(callback);
+        self.item_callbacks.lock().push(callback);
         Ok(())
     }
 }
diff --git a/nativelink-store/src/redis_store.rs b/nativelink-store/src/redis_store.rs
index 590605429..a82183a3a 100644
--- a/nativelink-store/src/redis_store.rs
+++ b/nativelink-store/src/redis_store.rs
@@ -36,7 +36,7 @@ use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
 use nativelink_util::health_utils::{HealthRegistryBuilder, HealthStatus, HealthStatusIndicator};
 use nativelink_util::spawn;
 use nativelink_util::store_trait::{
-    BoolValue, RemoveItemCallback, SchedulerCurrentVersionProvider, SchedulerIndexProvider,
+    BoolValue, ItemCallback, SchedulerCurrentVersionProvider, SchedulerIndexProvider,
     SchedulerStore, SchedulerStoreDataProvider, SchedulerStoreDecodeTo, SchedulerStoreKeyProvider,
     SchedulerSubscription, SchedulerSubscriptionManager, StoreDriver, StoreKey, UploadSizeInfo,
 };
@@ -855,9 +855,9 @@ impl<C: ConnectionLike + Clone + 'static + Send + Sync + Unpin, P: RedisPatternS
         registry.register_indicator(self);
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        _callback: Arc<dyn RemoveItemCallback>,
+        _callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         // As redis doesn't drop stuff, we can just ignore this
         Ok(())
diff --git a/nativelink-store/src/ref_store.rs b/nativelink-store/src/ref_store.rs
index d432553f0..2f89380fa 100644
--- a/nativelink-store/src/ref_store.rs
+++ b/nativelink-store/src/ref_store.rs
@@ -23,7 +23,7 @@ use nativelink_metric::MetricsComponent;
 use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 use parking_lot::Mutex;
 use tracing::error;
@@ -48,7 +48,7 @@ pub struct RefStore {
     name: String,
     store_manager: Weak<StoreManager>,
     inner: StoreReference,
-    remove_callbacks: Mutex<Vec<Arc<dyn RemoveItemCallback>>>,
+    item_callbacks: Mutex<Vec<Arc<dyn ItemCallback>>>,
 }
 
 impl RefStore {
@@ -60,7 +60,7 @@ impl RefStore {
                 mux: Mutex::new(()),
                 cell: AlignedStoreCell(UnsafeCell::new(None)),
             },
-            remove_callbacks: Mutex::new(vec![]),
+            item_callbacks: Mutex::new(vec![]),
         })
     }
 
@@ -87,9 +87,9 @@ impl RefStore {
             .upgrade()
             .err_tip(|| "Store manager is gone")?;
         if let Some(store) = store_manager.get_store(&self.name) {
-            let remove_callbacks = self.remove_callbacks.lock().clone();
-            for callback in remove_callbacks {
-                store.register_remove_callback(callback)?;
+            let item_callbacks = self.item_callbacks.lock().clone();
+            for callback in item_callbacks {
+                store.register_item_callback(callback)?;
             }
             unsafe {
                 *ref_store = Some(store);
@@ -152,15 +152,15 @@ impl StoreDriver for RefStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.remove_callbacks.lock().push(callback.clone());
+        self.item_callbacks.lock().push(callback.clone());
         let ref_store = self.inner.cell.0.get();
         unsafe {
             if let Some(ref store) = *ref_store {
-                store.register_remove_callback(callback)?;
+                store.register_item_callback(callback)?;
             }
         }
         Ok(())
diff --git a/nativelink-store/src/s3_store.rs b/nativelink-store/src/s3_store.rs
index a175a0b54..0a2f5420d 100644
--- a/nativelink-store/src/s3_store.rs
+++ b/nativelink-store/src/s3_store.rs
@@ -47,7 +47,7 @@ use nativelink_util::health_utils::{HealthRegistryBuilder, HealthStatus, HealthS
 use nativelink_util::instant_wrapper::InstantWrapper;
 use nativelink_util::retry::{Retrier, RetryResult};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, StoreDriver, StoreKey, StoreOptimizations, UploadSizeInfo,
+    ItemCallback, StoreDriver, StoreKey, StoreOptimizations, UploadSizeInfo,
 };
 use parking_lot::Mutex;
 use tokio::sync::mpsc;
@@ -93,7 +93,7 @@ pub struct S3Store<NowFn> {
     #[metric(help = "The number of concurrent uploads allowed for multipart uploads")]
     multipart_max_concurrent_uploads: usize,
 
-    remove_callbacks: Mutex<Vec<Arc<dyn RemoveItemCallback>>>,
+    item_callbacks: Mutex<Vec<Arc<dyn ItemCallback>>>,
 }
 
 impl<I, NowFn> S3Store<NowFn>
@@ -115,7 +115,7 @@ where
                 .build()
                 .await;
 
-            let config = aws_config::defaults(BehaviorVersion::v2025_08_07())
+            let config = aws_config::defaults(BehaviorVersion::v2026_01_12())
                 .credentials_provider(credential_provider)
                 .app_name(AppName::new("nativelink").expect("valid app name"))
                 .timeout_config(
@@ -163,7 +163,7 @@ where
                 .common
                 .multipart_max_concurrent_uploads
                 .map_or(DEFAULT_MULTIPART_MAX_CONCURRENT_UPLOADS, |v| v),
-            remove_callbacks: Mutex::new(Vec::new()),
+            item_callbacks: Mutex::new(Vec::new()),
         }))
     }
 
@@ -192,8 +192,8 @@ where
                                     let now_s = (self.now_fn)().unix_timestamp() as i64;
                                     if last_modified.secs() + self.consider_expired_after_s <= now_s
                                     {
-                                        let remove_callbacks = self.remove_callbacks.lock().clone();
-                                        let mut callbacks: FuturesUnordered<_> = remove_callbacks
+                                        let item_callbacks = self.item_callbacks.lock().clone();
+                                        let mut callbacks: FuturesUnordered<_> = item_callbacks
                                             .iter()
                                             .map(|callback| {
                                                 callback.callback(local_digest.borrow())
@@ -653,11 +653,11 @@ where
         registry.register_indicator(self);
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.remove_callbacks.lock().push(callback);
+        self.item_callbacks.lock().push(callback);
         Ok(())
     }
 }
diff --git a/nativelink-store/src/shard_store.rs b/nativelink-store/src/shard_store.rs
index e59a05845..1ba722666 100644
--- a/nativelink-store/src/shard_store.rs
+++ b/nativelink-store/src/shard_store.rs
@@ -12,10 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use core::hash::Hasher;
 use core::ops::BitXor;
 use core::pin::Pin;
-use std::hash::DefaultHasher;
 use std::sync::Arc;
 
 use async_trait::async_trait;
@@ -26,7 +24,7 @@ use nativelink_metric::MetricsComponent;
 use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 
 #[derive(Debug, MetricsComponent)]
@@ -127,10 +125,9 @@ impl ShardStore {
                 .bitxor(u32::from_le_bytes(size_bytes[4..8].try_into().unwrap()))
             }
             StoreKey::Str(s) => {
-                let mut hasher = DefaultHasher::new();
-                hasher.write(s.as_bytes());
-                let key_u64 = hasher.finish();
-                (key_u64 >> 32) as u32 // We only need the top 32 bits.
+                let hash = blake3::hash(s.as_bytes());
+                let hash_bytes = hash.as_bytes();
+                u32::from_le_bytes([hash_bytes[0], hash_bytes[1], hash_bytes[2], hash_bytes[3]])
             }
         };
         self.weights_and_stores
@@ -244,12 +241,12 @@ impl StoreDriver for ShardStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         for store in &self.weights_and_stores {
-            store.store.register_remove_callback(callback.clone())?;
+            store.store.register_item_callback(callback.clone())?;
         }
         Ok(())
     }
diff --git a/nativelink-store/src/size_partitioning_store.rs b/nativelink-store/src/size_partitioning_store.rs
index a959244b5..399785b7b 100644
--- a/nativelink-store/src/size_partitioning_store.rs
+++ b/nativelink-store/src/size_partitioning_store.rs
@@ -22,7 +22,7 @@ use nativelink_metric::MetricsComponent;
 use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 use tokio::join;
 
@@ -162,13 +162,13 @@ impl StoreDriver for SizePartitioningStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
         self.lower_store
-            .register_remove_callback(callback.clone())?;
-        self.upper_store.register_remove_callback(callback)?;
+            .register_item_callback(callback.clone())?;
+        self.upper_store.register_item_callback(callback)?;
         Ok(())
     }
 }
diff --git a/nativelink-store/src/verify_store.rs b/nativelink-store/src/verify_store.rs
index 04ba3a02f..bc71df2ae 100644
--- a/nativelink-store/src/verify_store.rs
+++ b/nativelink-store/src/verify_store.rs
@@ -27,7 +27,7 @@ use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc, default_dig
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
 use nativelink_util::metrics_utils::CounterWithTime;
 use nativelink_util::store_trait::{
-    RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
+    ItemCallback, Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo,
 };
 use opentelemetry::context::Context;
 
@@ -231,11 +231,11 @@ impl StoreDriver for VerifyStore {
         self
     }
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.inner_store.register_remove_callback(callback)
+        self.inner_store.register_item_callback(callback)
     }
 }
 
diff --git a/nativelink-store/src/worker_proxy_store.rs b/nativelink-store/src/worker_proxy_store.rs
new file mode 100644
index 000000000..38e333d27
--- /dev/null
+++ b/nativelink-store/src/worker_proxy_store.rs
@@ -0,0 +1,1176 @@
+// Copyright 2024 The NativeLink Authors. All rights reserved.
+//
+// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    See LICENSE file for details
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use core::pin::Pin;
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use nativelink_config::stores::{GrpcEndpoint, GrpcSpec, Retry, StoreType};
+use nativelink_error::{Code, Error, ResultExt, make_err};
+use nativelink_metric::MetricsComponent;
+use nativelink_util::blob_locality_map::SharedBlobLocalityMap;
+use nativelink_util::buf_channel::{
+    DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair,
+};
+use nativelink_util::health_utils::{HealthStatus, HealthStatusIndicator};
+use nativelink_util::store_trait::{
+    IS_WORKER_REQUEST, ItemCallback, REDIRECT_PREFIX, Store, StoreDriver, StoreKey, StoreLike,
+    StoreOptimizations, UploadSizeInfo,
+};
+use parking_lot::RwLock;
+use tokio::task::JoinHandle;
+use tracing::{debug, trace, warn};
+
+use crate::grpc_store::GrpcStore;
+
+/// A store wrapper that transparently proxies CAS reads from workers when
+/// the inner store returns NotFound. This enables worker-to-worker blob sharing.
+///
+/// Behavior:
+/// - `get_part()`: Try inner store first. If NotFound, consult the locality map
+///   for workers that have the digest, try reading from a worker.
+/// - `has()` / `has_with_results()`: ONLY check inner store. Never consult the
+///   locality map. (Prevents stale-positive issues with FindMissingBlobs.)
+/// - `update()`: Pass through to inner store.
+#[derive(MetricsComponent)]
+pub struct WorkerProxyStore {
+    #[metric(group = "inner_store")]
+    inner: Store,
+    /// Blob locality map — digest → worker endpoints.
+    locality_map: SharedBlobLocalityMap,
+    /// Cached GrpcStore connections to worker endpoints.
+    worker_connections: RwLock<HashMap<Arc<str>, Store>>,
+    /// When true, race peer fetches against server fetches in get_part.
+    /// Only workers should enable this — servers should use the sequential
+    /// path which generates redirects for workers.
+    race_peers: bool,
+}
+
+impl core::fmt::Debug for WorkerProxyStore {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        f.debug_struct("WorkerProxyStore")
+            .field("inner", &self.inner)
+            .field("worker_connections", &self.worker_connections.read().len())
+            .finish()
+    }
+}
+
+/// Returns true if the error code indicates a connection-level failure,
+/// meaning the cached connection should be removed.
+fn is_connection_error(e: &Error) -> bool {
+    matches!(e.code, Code::Unavailable | Code::Unknown)
+}
+
+impl WorkerProxyStore {
+    pub fn new(inner: Store, locality_map: SharedBlobLocalityMap) -> Arc<Self> {
+        Arc::new(Self {
+            inner,
+            locality_map,
+            worker_connections: RwLock::new(HashMap::new()),
+            race_peers: false,
+        })
+    }
+
+    /// Enable racing peer fetches against server fetches.
+    /// Only workers should call this — servers should leave it disabled.
+    pub fn enable_race_peers(&mut self) {
+        self.race_peers = true;
+    }
+
+    /// Add a worker endpoint to the connection pool.
+    pub async fn add_worker_endpoint(&self, endpoint: &str) {
+        if self.get_worker_connection(endpoint).is_some() {
+            return;
+        }
+        self.get_or_create_connection(endpoint).await;
+    }
+
+    /// Returns the inner (server) store.
+    pub fn inner_store(&self) -> &Store {
+        &self.inner
+    }
+
+    /// Returns the locality map for looking up which peers have which digests.
+    pub fn locality_map(&self) -> &SharedBlobLocalityMap {
+        &self.locality_map
+    }
+
+    /// Returns all currently-connected peer stores.
+    pub fn peer_stores(&self) -> HashMap<Arc<str>, Store> {
+        self.worker_connections.read().clone()
+    }
+
+    /// Remove a worker endpoint from the connection pool.
+    pub fn remove_worker_endpoint(&self, endpoint: &str) {
+        let mut conns = self.worker_connections.write();
+        if conns.remove(endpoint).is_some() {
+            debug!(endpoint, "WorkerProxyStore: removed worker connection");
+        }
+    }
+
+    /// Inject a pre-built Store as a worker connection for the given endpoint.
+    /// This is primarily useful for testing, where you want to use a MemoryStore
+    /// instead of a real GrpcStore.
+    pub fn inject_worker_connection(&self, endpoint: &str, store: Store) {
+        self.worker_connections
+            .write()
+            .insert(Arc::from(endpoint), store);
+    }
+
+    /// Get a cached connection to a worker endpoint, or None.
+    fn get_worker_connection(&self, endpoint: &str) -> Option<Store> {
+        self.worker_connections.read().get(endpoint).cloned()
+    }
+
+    /// Get or create a connection to a worker endpoint.
+    /// Returns None if the connection could not be created.
+    async fn get_or_create_connection(&self, endpoint: &str) -> Option<Store> {
+        if let Some(store) = self.get_worker_connection(endpoint) {
+            return Some(store);
+        }
+        match Self::create_worker_connection(endpoint).await {
+            Ok(store) => {
+                self.worker_connections
+                    .write()
+                    .entry(Arc::from(endpoint))
+                    .or_insert_with(|| store.clone());
+                Some(store)
+            }
+            Err(e) => {
+                trace!(endpoint, ?e, "WorkerProxyStore: failed to connect to peer");
+                None
+            }
+        }
+    }
+
+    /// Create a minimal GrpcStore connection to a worker endpoint.
+    async fn create_worker_connection(endpoint: &str) -> Result<Store, Error> {
+        let spec = GrpcSpec {
+            instance_name: String::new(),
+            endpoints: vec![GrpcEndpoint {
+                address: endpoint.to_string(),
+                tls_config: None,
+                concurrency_limit: None,
+                connect_timeout_s: 5,
+                tcp_keepalive_s: 30,
+                http2_keepalive_interval_s: 30,
+                http2_keepalive_timeout_s: 20,
+                tcp_nodelay: true,
+            }],
+            store_type: StoreType::Cas,
+            retry: Retry::default(),
+            max_concurrent_requests: 0,
+            connections_per_endpoint: 64,
+            rpc_timeout_s: 120,
+            batch_update_threshold_bytes: 0, // Not uploading via this store
+            batch_coalesce_delay_ms: 0,
+        };
+        let store = GrpcStore::new(&spec)
+            .await
+            .err_tip(|| format!("Creating worker proxy connection to {endpoint}"))?;
+        Ok(Store::new(store))
+    }
+
+    /// Try to read a blob from a specific list of peer endpoints (e.g. from
+    /// a redirect response). Same logic as `try_read_from_worker` but uses
+    /// the caller-provided endpoints instead of consulting the locality map.
+    async fn try_read_from_endpoints(
+        &self,
+        key: StoreKey<'_>,
+        writer: &mut DropCloserWriteHalf,
+        offset: u64,
+        length: Option<u64>,
+        endpoints: &[String],
+    ) -> Result<bool, Error> {
+        let digest = key.borrow().into_digest();
+        debug!(
+            ?digest,
+            endpoint_count = endpoints.len(),
+            "WorkerProxyStore: following redirect to peer endpoints"
+        );
+
+        for endpoint in endpoints {
+            let Some(store) = self.get_or_create_connection(endpoint).await else {
+                continue;
+            };
+
+            match store
+                .get_part(key.borrow(), &mut *writer, offset, length)
+                .await
+            {
+                Ok(()) => {
+                    debug!(
+                        ?digest,
+                        endpoint = endpoint.as_str(),
+                        "WorkerProxyStore: successfully read blob from redirected peer"
+                    );
+                    return Ok(true);
+                }
+                Err(e) => {
+                    if is_connection_error(&e) {
+                        self.remove_worker_endpoint(endpoint);
+                    }
+                    warn!(
+                        ?digest,
+                        endpoint = endpoint.as_str(),
+                        ?e,
+                        "WorkerProxyStore: read from redirected peer failed, trying next"
+                    );
+                    continue;
+                }
+            }
+        }
+
+        Ok(false)
+    }
+
+    /// Try to read a blob from a worker that has it, according to the locality map.
+    ///
+    /// Streams directly from the peer to the caller's writer via `get_part()` —
+    /// no buffering. If a peer fails mid-stream, we resume from the next peer
+    /// at the byte offset where the previous one left off (content-addressed
+    /// blobs are identical across peers).
+    async fn try_read_from_worker(
+        &self,
+        key: StoreKey<'_>,
+        writer: &mut DropCloserWriteHalf,
+        offset: u64,
+        length: Option<u64>,
+    ) -> Result<bool, Error> {
+        let digest = key.borrow().into_digest();
+        let workers = self.locality_map.read().lookup_workers(&digest);
+
+        if workers.is_empty() {
+            return Ok(false);
+        }
+
+        debug!(
+            ?digest,
+            worker_count = workers.len(),
+            "WorkerProxyStore: attempting to proxy blob from workers"
+        );
+
+        // Track how many bytes have been written so we can resume from the
+        // correct offset if a streaming peer fails mid-transfer.
+        let bytes_before_proxy = writer.get_bytes_written();
+        let mut current_offset = offset;
+        let mut remaining_length = length;
+
+        for endpoint in &workers {
+            let Some(store) = self.get_or_create_connection(endpoint).await else {
+                continue;
+            };
+
+            // Stream directly from the peer — no buffering.
+            // On failure, compute how many bytes were written and resume
+            // from the next peer at the correct offset.
+            match store
+                .get_part(key.borrow(), &mut *writer, current_offset, remaining_length)
+                .await
+            {
+                Ok(()) => {
+                    debug!(
+                        ?digest,
+                        endpoint = %endpoint,
+                        "WorkerProxyStore: successfully proxied blob from worker"
+                    );
+                    return Ok(true);
+                }
+                Err(e) => {
+                    if is_connection_error(&e) {
+                        self.remove_worker_endpoint(endpoint);
+                    }
+                    let bytes_written_total =
+                        writer.get_bytes_written() - bytes_before_proxy;
+                    warn!(
+                        ?digest,
+                        endpoint = %endpoint,
+                        bytes_written_total,
+                        ?e,
+                        "WorkerProxyStore: streaming get_part from peer failed, \
+                         will resume from next peer at offset {}",
+                        offset + bytes_written_total,
+                    );
+                    // Advance offset so the next peer picks up where this one left off.
+                    current_offset = offset + bytes_written_total;
+                    if let Some(len) = remaining_length {
+                        remaining_length =
+                            Some(len.saturating_sub(bytes_written_total));
+                    }
+                    continue;
+                }
+            }
+        }
+
+        Ok(false)
+    }
+
+    /// The original sequential get_part logic: try inner store, then parse
+    /// redirects, then fall back to locality map / peer proxying.
+    /// This is used as the fallback when no peers are known for racing.
+    async fn get_part_sequential(
+        &self,
+        key: StoreKey<'_>,
+        writer: &mut DropCloserWriteHalf,
+        offset: u64,
+        length: Option<u64>,
+    ) -> Result<(), Error> {
+        let mut redirect_endpoints: Option<Vec<String>> = None;
+        match IS_WORKER_REQUEST
+            .scope(
+                true,
+                self.inner.get_part(key.borrow(), &mut *writer, offset, length),
+            )
+            .await
+        {
+            Ok(()) => return Ok(()),
+            Err(e) if e.code == Code::NotFound => {
+                trace!(
+                    key = ?key.borrow().into_digest(),
+                    "WorkerProxyStore: inner store miss (NotFound), consulting locality map"
+                );
+            }
+            Err(e) if e.code == Code::FailedPrecondition => {
+                let msg = e.message_string();
+                if let Some(start) = msg.find(REDIRECT_PREFIX) {
+                    let endpoints_str = &msg[start + REDIRECT_PREFIX.len()..];
+                    let endpoints_str = endpoints_str
+                        .split('|')
+                        .next()
+                        .unwrap_or(endpoints_str);
+                    let endpoints: Vec<String> = endpoints_str
+                        .split(',')
+                        .filter(|s| !s.is_empty())
+                        .map(String::from)
+                        .collect();
+                    if !endpoints.is_empty() {
+                        debug!(
+                            key = ?key.borrow().into_digest(),
+                            ?endpoints,
+                            "WorkerProxyStore: received redirect from inner store"
+                        );
+                        redirect_endpoints = Some(endpoints);
+                    }
+                }
+                if redirect_endpoints.is_none() {
+                    return Err(e);
+                }
+            }
+            Err(e) => return Err(e),
+        }
+
+        if let Some(endpoints) = redirect_endpoints {
+            if self
+                .try_read_from_endpoints(key.borrow(), writer, offset, length, &endpoints)
+                .await?
+            {
+                return Ok(());
+            }
+        }
+
+        let is_worker = IS_WORKER_REQUEST.try_with(|v| *v).unwrap_or(false);
+
+        if is_worker {
+            let digest = key.borrow().into_digest();
+            let workers = self.locality_map.read().lookup_workers(&digest);
+            if workers.is_empty() {
+                return Err(make_err!(
+                    Code::NotFound,
+                    "Blob {digest:?} not found in inner store or locality map"
+                ));
+            }
+            let endpoints = workers.join(",");
+            debug!(
+                ?digest,
+                endpoints,
+                "WorkerProxyStore: redirecting worker to peer endpoints"
+            );
+            return Err(make_err!(
+                Code::FailedPrecondition,
+                "{REDIRECT_PREFIX}{endpoints}|"
+            ));
+        }
+
+        if self
+            .try_read_from_worker(key.borrow(), writer, offset, length)
+            .await?
+        {
+            return Ok(());
+        }
+
+        Err(make_err!(
+            Code::NotFound,
+            "Blob {:?} not found in inner store or any worker",
+            key.borrow().into_digest()
+        ))
+    }
+
+    /// Forward remaining data from a racer's read half to the caller's writer,
+    /// then wait for the spawned task to complete.
+    async fn forward_racer(
+        winner_name: &str,
+        writer: &mut DropCloserWriteHalf,
+        rx: &mut DropCloserReadHalf,
+        handle: JoinHandle<Result<(), Error>>,
+    ) -> Result<(), Error> {
+        // Forward all remaining chunks from the racer's channel to the
+        // caller's writer. bind_buffered handles EOF propagation.
+        writer
+            .bind_buffered(rx)
+            .await
+            .err_tip(|| format!("WorkerProxyStore: {winner_name} racer bind_buffered"))?;
+
+        // Wait for the spawned get_part to confirm it finished successfully.
+        // If the task was already done (sent EOF), this returns immediately.
+        handle
+            .await
+            .map_err(|e| make_err!(Code::Internal, "WorkerProxyStore: {winner_name} task join error: {e}"))?
+            .err_tip(|| format!("WorkerProxyStore: {winner_name} get_part failed after winning race"))
+    }
+}
+
+#[async_trait]
+impl StoreDriver for WorkerProxyStore {
+    async fn has_with_results(
+        self: Pin<&Self>,
+        digests: &[StoreKey<'_>],
+        results: &mut [Option<u64>],
+    ) -> Result<(), Error> {
+        // ONLY check inner store. Never consult the locality map for has().
+        // This prevents stale-positive issues with FindMissingBlobs.
+        self.inner.has_with_results(digests, results).await
+    }
+
+    async fn update(
+        self: Pin<&Self>,
+        key: StoreKey<'_>,
+        reader: DropCloserReadHalf,
+        upload_size: UploadSizeInfo,
+    ) -> Result<(), Error> {
+        // Pass through to inner store.
+        self.inner.update(key, reader, upload_size).await
+    }
+
+    fn optimized_for(&self, optimization: StoreOptimizations) -> bool {
+        // Report LazyExistenceOnSync so that FastSlowStore skips the has()
+        // check before get_part(). Our has() only checks the inner store
+        // (to avoid stale-positive FindMissingBlobs), but get_part() also
+        // consults the locality map and peer workers. Without this, blobs
+        // that exist only on peer workers would never be found by
+        // FastSlowStore because has() returns None.
+        if optimization == StoreOptimizations::LazyExistenceOnSync {
+            return true;
+        }
+        self.inner
+            .inner_store(None::<StoreKey<'_>>)
+            .optimized_for(optimization)
+    }
+
+    async fn get_part(
+        self: Pin<&Self>,
+        key: StoreKey<'_>,
+        writer: &mut DropCloserWriteHalf,
+        offset: u64,
+        length: Option<u64>,
+    ) -> Result<(), Error> {
+        // Only race when explicitly enabled (worker side). Server-side
+        // WorkerProxyStore uses the sequential path which generates
+        // redirects for workers and proxies for non-worker callers.
+        let digest = key.borrow().into_digest();
+        let peers = if self.race_peers {
+            self.locality_map.read().lookup_workers(&digest)
+        } else {
+            Vec::new()
+        };
+
+        if peers.is_empty() {
+            // No peers known (or server side) — use the sequential path.
+            return self
+                .get_part_sequential(key, writer, offset, length)
+                .await;
+        }
+
+        // Try to get a connection to the first peer.
+        let peer_store = match self.get_or_create_connection(&peers[0]).await {
+            Some(store) => store,
+            None => {
+                return self
+                    .get_part_sequential(key, writer, offset, length)
+                    .await;
+            }
+        };
+        let peer_endpoint: Arc<str> = peers[0].clone();
+
+        // Create buf_channel pairs for each racer. Each spawned task writes
+        // into its own tx; we read from the rx to see who produces data first.
+        let (mut server_tx, mut server_rx) = make_buf_channel_pair();
+        let (mut peer_tx, mut peer_rx) = make_buf_channel_pair();
+
+        // We need owned keys for the spawned tasks.
+        let server_key = key.borrow().into_owned();
+        let peer_key = key.borrow().into_owned();
+
+        // Clone inner store for the server task.
+        let inner = self.inner.clone();
+
+        // Spawn server fetch. Do NOT set IS_WORKER_REQUEST — we want the
+        // server to actually serve the blob data, not return a redirect.
+        let server_handle: JoinHandle<Result<(), Error>> = tokio::spawn(async move {
+            inner
+                .get_part(server_key.borrow(), &mut server_tx, offset, length)
+                .await
+        });
+
+        // Spawn peer fetch.
+        let peer_handle: JoinHandle<Result<(), Error>> = tokio::spawn(async move {
+            peer_store
+                .get_part(peer_key.borrow(), &mut peer_tx, offset, length)
+                .await
+        });
+
+        // Race: wait for the first racer to produce a data chunk (or error).
+        tokio::select! {
+            server_result = server_rx.recv() => {
+                match server_result {
+                    Ok(chunk) if !chunk.is_empty() => {
+                        // Server produced data first — it wins.
+                        peer_handle.abort();
+                        debug!(
+                            ?digest,
+                            "WorkerProxyStore: server won race against peer"
+                        );
+                        writer.send(chunk).await
+                            .err_tip(|| "WorkerProxyStore: sending server winner chunk")?;
+                        Self::forward_racer("server", writer, &mut server_rx, server_handle).await
+                    }
+                    Ok(_empty) => {
+                        // Server returned EOF immediately (zero-length blob).
+                        peer_handle.abort();
+                        debug!(
+                            ?digest,
+                            "WorkerProxyStore: server won race (empty blob)"
+                        );
+                        writer.send_eof()
+                            .err_tip(|| "WorkerProxyStore: sending EOF for empty blob")?;
+                        server_handle.await
+                            .map_err(|e| make_err!(Code::Internal, "server task join: {e}"))?
+                    }
+                    Err(_server_err) => {
+                        // Server racer failed — wait for peer.
+                        warn!(
+                            ?digest,
+                            "WorkerProxyStore: server racer failed, waiting for peer"
+                        );
+                        let peer_chunk = peer_rx.recv().await
+                            .err_tip(|| "WorkerProxyStore: peer recv after server failure")?;
+                        if peer_chunk.is_empty() {
+                            writer.send_eof()
+                                .err_tip(|| "WorkerProxyStore: peer EOF after server failure")?;
+                            return peer_handle.await
+                                .map_err(|e| make_err!(Code::Internal, "peer task join: {e}"))?;
+                        }
+                        debug!(
+                            ?digest,
+                            endpoint = %peer_endpoint,
+                            "WorkerProxyStore: peer won race (server failed)"
+                        );
+                        writer.send(peer_chunk).await
+                            .err_tip(|| "WorkerProxyStore: sending peer fallback chunk")?;
+                        Self::forward_racer("peer", writer, &mut peer_rx, peer_handle).await
+                    }
+                }
+            }
+            peer_result = peer_rx.recv() => {
+                match peer_result {
+                    Ok(chunk) if !chunk.is_empty() => {
+                        // Peer produced data first — it wins.
+                        server_handle.abort();
+                        debug!(
+                            ?digest,
+                            endpoint = %peer_endpoint,
+                            "WorkerProxyStore: peer won race against server"
+                        );
+                        writer.send(chunk).await
+                            .err_tip(|| "WorkerProxyStore: sending peer winner chunk")?;
+                        Self::forward_racer("peer", writer, &mut peer_rx, peer_handle).await
+                    }
+                    Ok(_empty) => {
+                        // Peer returned EOF immediately (zero-length blob).
+                        server_handle.abort();
+                        debug!(
+                            ?digest,
+                            endpoint = %peer_endpoint,
+                            "WorkerProxyStore: peer won race (empty blob)"
+                        );
+                        writer.send_eof()
+                            .err_tip(|| "WorkerProxyStore: sending EOF for empty blob from peer")?;
+                        peer_handle.await
+                            .map_err(|e| make_err!(Code::Internal, "peer task join: {e}"))?
+                    }
+                    Err(_peer_err) => {
+                        // Peer racer failed — wait for server.
+                        warn!(
+                            ?digest,
+                            endpoint = %peer_endpoint,
+                            "WorkerProxyStore: peer racer failed, waiting for server"
+                        );
+                        let server_chunk = server_rx.recv().await
+                            .err_tip(|| "WorkerProxyStore: server recv after peer failure")?;
+                        if server_chunk.is_empty() {
+                            writer.send_eof()
+                                .err_tip(|| "WorkerProxyStore: server EOF after peer failure")?;
+                            return server_handle.await
+                                .map_err(|e| make_err!(Code::Internal, "server task join: {e}"))?;
+                        }
+                        debug!(
+                            ?digest,
+                            "WorkerProxyStore: server won race (peer failed)"
+                        );
+                        writer.send(server_chunk).await
+                            .err_tip(|| "WorkerProxyStore: sending server fallback chunk")?;
+                        Self::forward_racer("server", writer, &mut server_rx, server_handle).await
+                    }
+                }
+            }
+        }
+    }
+
+    fn inner_store(&self, key: Option<StoreKey>) -> &dyn StoreDriver {
+        // Delegate to inner store so that callers can downcast through
+        // the chain (e.g. worker finding FastSlowStore via downcast_ref).
+        // WorkerProxyStore's optimized_for override is independent of this.
+        self.inner.inner_store(key)
+    }
+
+    fn as_any<'a>(&'a self) -> &'a (dyn core::any::Any + Sync + Send + 'static) {
+        self
+    }
+
+    fn as_any_arc(self: Arc<Self>) -> Arc<dyn core::any::Any + Sync + Send + 'static> {
+        self
+    }
+
+    fn register_item_callback(
+        self: Arc<Self>,
+        callback: Arc<dyn ItemCallback>,
+    ) -> Result<(), Error> {
+        self.inner.register_item_callback(callback)
+    }
+}
+
+#[async_trait]
+impl HealthStatusIndicator for WorkerProxyStore {
+    fn get_name(&self) -> &'static str {
+        "WorkerProxyStore"
+    }
+
+    async fn check_health(
+        &self,
+        namespace: Cow<'static, str>,
+    ) -> HealthStatus {
+        self.inner.check_health(namespace).await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use bytes::Bytes;
+    use nativelink_config::stores::MemorySpec;
+    use nativelink_error::{Code, Error, make_err};
+    use nativelink_macro::nativelink_test;
+    use nativelink_util::blob_locality_map::new_shared_blob_locality_map;
+    use nativelink_util::common::DigestInfo;
+    use nativelink_util::store_trait::{
+        IS_WORKER_REQUEST, REDIRECT_PREFIX, StoreLike, StoreKey, StoreOptimizations,
+    };
+    use pretty_assertions::assert_eq;
+
+    use super::*;
+    use crate::memory_store::MemoryStore;
+
+    const VALID_HASH1: &str =
+        "0123456789abcdef000000000000000000010000000000000123456789abcdef";
+    const VALID_HASH2: &str =
+        "0123456789abcdef000000000000000000020000000000000123456789abcdef";
+
+    /// Helper: create a WorkerProxyStore backed by a fresh MemoryStore.
+    fn make_proxy_store() -> (Store, SharedBlobLocalityMap) {
+        let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let locality_map = new_shared_blob_locality_map();
+        let proxy = WorkerProxyStore::new(inner, locality_map.clone());
+        (Store::new(proxy), locality_map)
+    }
+
+    // ---------------------------------------------------------------
+    // 1. Inner store hit returns data without consulting locality map.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_inner_store_hit_skips_locality() -> Result<(), Error> {
+        let (store, locality_map) = make_proxy_store();
+
+        let value = b"hello world";
+        let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+        // Write the blob into the inner store via the proxy.
+        store
+            .update_oneshot(digest, Bytes::from_static(value))
+            .await?;
+
+        // Register a fake worker in the locality map so we can verify
+        // it is NOT contacted when the inner store already has the blob.
+        locality_map
+            .write()
+            .register_blobs("fake-worker:50081", &[digest]);
+
+        // Read the blob back — should succeed from the inner store.
+        let result = store
+            .get_part_unchunked(digest, 0, None)
+            .await?;
+        assert_eq!(result.as_ref(), value);
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 2. Inner store miss + empty locality map => NotFound.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_inner_store_miss_no_peers_returns_not_found() -> Result<(), Error> {
+        let (store, _locality_map) = make_proxy_store();
+
+        let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+
+        // The inner store is empty and the locality map has no entries.
+        let result = store.get_part_unchunked(digest, 0, None).await;
+
+        assert!(result.is_err(), "Expected NotFound error");
+        let err = result.unwrap_err();
+        assert_eq!(
+            err.code,
+            Code::NotFound,
+            "Expected NotFound code, got: {err:?}"
+        );
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 3. Inner store miss + locality has peers but no gRPC connections
+    //    => falls through gracefully and returns NotFound.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_inner_store_miss_locality_has_peers_but_no_connections()
+        -> Result<(), Error>
+    {
+        let (store, locality_map) = make_proxy_store();
+
+        let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+
+        // Use an invalid URI that fails during GrpcStore::new(). The
+        // space character is illegal in URIs, so Uri::try_from() fails
+        // and create_worker_connection returns Err. try_read_from_worker
+        // will `continue` past this endpoint and return Ok(false),
+        // resulting in the final NotFound error.
+        locality_map
+            .write()
+            .register_blobs("not a valid uri", &[digest]);
+
+        let result = store.get_part_unchunked(digest, 0, None).await;
+
+        assert!(result.is_err(), "Expected NotFound error");
+        let err = result.unwrap_err();
+        assert_eq!(
+            err.code,
+            Code::NotFound,
+            "Expected NotFound, got: {err:?}"
+        );
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 4. has_with_results passes through to inner store (no proxy).
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_has_with_results_passes_through() -> Result<(), Error> {
+        let (store, locality_map) = make_proxy_store();
+
+        let value = b"test data";
+        let d1 = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+        let d2 = DigestInfo::try_new(VALID_HASH2, 999)?;
+
+        // Only d1 is in the inner store.
+        store
+            .update_oneshot(d1, Bytes::from_static(value))
+            .await?;
+
+        // Register d2 on a worker so we can prove has() does NOT
+        // consult the locality map.
+        locality_map
+            .write()
+            .register_blobs("worker-a:50081", &[d2]);
+
+        let keys: Vec<StoreKey<'_>> = vec![d1.into(), d2.into()];
+        let mut results = vec![None; 2];
+        store.has_with_results(&keys, &mut results).await?;
+
+        // d1 should be found with correct size.
+        assert_eq!(
+            results[0],
+            Some(value.len() as u64),
+            "d1 should be present in inner store"
+        );
+        // d2 should NOT be found (locality map is never consulted for has).
+        assert_eq!(
+            results[1], None,
+            "d2 should NOT be found — has() must not consult locality map"
+        );
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 5. update() passes through to inner store.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_update_passes_through() -> Result<(), Error> {
+        let (store, _locality_map) = make_proxy_store();
+
+        let value = b"upload me";
+        let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+        // Upload via the proxy store.
+        store
+            .update_oneshot(digest, Bytes::from_static(value))
+            .await?;
+
+        // Verify the blob is retrievable (proving it went into the inner store).
+        let data = store.get_part_unchunked(digest, 0, None).await?;
+        assert_eq!(data.as_ref(), value);
+
+        // Also verify via has().
+        let size = store.has(digest).await?;
+        assert_eq!(size, Some(value.len() as u64));
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 6. get_part with offset and length returns correct subset.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_get_part_with_offset_and_length() -> Result<(), Error> {
+        let (store, _locality_map) = make_proxy_store();
+
+        let value = b"0123456789abcdefghij"; // 20 bytes
+        let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+        store
+            .update_oneshot(digest, Bytes::from_static(value))
+            .await?;
+
+        // Read bytes [5..15) — 10 bytes starting at offset 5.
+        let data = store
+            .get_part_unchunked(digest, 5, Some(10))
+            .await?;
+        assert_eq!(
+            data.as_ref(),
+            b"56789abcde",
+            "Expected subset at offset=5, length=10"
+        );
+
+        // Read from offset 15 to end (no length limit).
+        let data = store.get_part_unchunked(digest, 15, None).await?;
+        assert_eq!(
+            data.as_ref(),
+            b"fghij",
+            "Expected tail from offset=15"
+        );
+
+        // Read 0 bytes from offset 0 with length 0.
+        let data = store
+            .get_part_unchunked(digest, 0, Some(0))
+            .await?;
+        assert_eq!(data.as_ref(), b"", "Expected empty result for length=0");
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 7. Redirect parsing: well-formed redirect error.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_redirect_well_formed() -> Result<(), Error> {
+        let err = make_err!(
+            Code::FailedPrecondition,
+            "{REDIRECT_PREFIX}grpc://w1:50071,grpc://w2:50071|"
+        );
+        let msg = err.message_string();
+        let start = msg.find(REDIRECT_PREFIX).expect("prefix missing");
+        let endpoints_str = &msg[start + REDIRECT_PREFIX.len()..];
+        let endpoints_str = endpoints_str.split('|').next().unwrap_or(endpoints_str);
+        let endpoints: Vec<String> = endpoints_str
+            .split(',')
+            .filter(|s| !s.is_empty())
+            .map(String::from)
+            .collect();
+        assert_eq!(endpoints.len(), 2);
+        assert_eq!(endpoints[0], "grpc://w1:50071");
+        assert_eq!(endpoints[1], "grpc://w2:50071");
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 8. Redirect parsing: trailing noise after pipe is ignored.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_redirect_trailing_noise_after_pipe() -> Result<(), Error> {
+        let err = make_err!(
+            Code::FailedPrecondition,
+            "{REDIRECT_PREFIX}grpc://w1:50071|some extra noise"
+        );
+        let msg = err.message_string();
+        let start = msg.find(REDIRECT_PREFIX).expect("prefix missing");
+        let endpoints_str = &msg[start + REDIRECT_PREFIX.len()..];
+        let endpoints_str = endpoints_str.split('|').next().unwrap_or(endpoints_str);
+        let endpoints: Vec<String> = endpoints_str
+            .split(',')
+            .filter(|s| !s.is_empty())
+            .map(String::from)
+            .collect();
+        assert_eq!(endpoints.len(), 1);
+        assert_eq!(endpoints[0], "grpc://w1:50071");
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 9. Redirect parsing: empty segments filtered out.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_redirect_empty_segments_filtered() -> Result<(), Error> {
+        let err = make_err!(
+            Code::FailedPrecondition,
+            "{REDIRECT_PREFIX}a,,b,|"
+        );
+        let msg = err.message_string();
+        let start = msg.find(REDIRECT_PREFIX).expect("prefix missing");
+        let endpoints_str = &msg[start + REDIRECT_PREFIX.len()..];
+        let endpoints_str = endpoints_str.split('|').next().unwrap_or(endpoints_str);
+        let endpoints: Vec<String> = endpoints_str
+            .split(',')
+            .filter(|s| !s.is_empty())
+            .map(String::from)
+            .collect();
+        assert_eq!(endpoints, vec!["a", "b"]);
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 10. IS_WORKER_REQUEST=true gets redirect with peer endpoints.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_worker_request_gets_redirect() -> Result<(), Error> {
+        let (store, locality_map) = make_proxy_store();
+
+        let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+        let peer_endpoint = "grpc://peer-worker:50071";
+
+        locality_map
+            .write()
+            .register_blobs(peer_endpoint, &[digest]);
+
+        let result = IS_WORKER_REQUEST
+            .scope(true, store.get_part_unchunked(digest, 0, None))
+            .await;
+
+        assert!(result.is_err(), "Expected redirect error");
+        let err = result.unwrap_err();
+        assert_eq!(
+            err.code,
+            Code::FailedPrecondition,
+            "Redirect should use FailedPrecondition, got: {err:?}"
+        );
+        let msg = err.message_string();
+        assert!(
+            msg.contains(REDIRECT_PREFIX),
+            "Error should contain redirect prefix: {msg}"
+        );
+        assert!(
+            msg.contains(peer_endpoint),
+            "Error should contain peer endpoint: {msg}"
+        );
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 11. IS_WORKER_REQUEST=false gets NotFound (no proxy to invalid peer).
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_non_worker_request_gets_not_found() -> Result<(), Error> {
+        let (store, locality_map) = make_proxy_store();
+
+        let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+
+        // Use an invalid URI so the proxy attempt fails gracefully.
+        locality_map
+            .write()
+            .register_blobs("not a valid uri", &[digest]);
+
+        let result = IS_WORKER_REQUEST
+            .scope(false, store.get_part_unchunked(digest, 0, None))
+            .await;
+
+        assert!(result.is_err(), "Expected NotFound error");
+        let err = result.unwrap_err();
+        assert_eq!(
+            err.code,
+            Code::NotFound,
+            "Non-worker should get NotFound, got: {err:?}"
+        );
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 12. optimized_for(LazyExistenceOnSync) returns true.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_optimized_for_lazy_existence() -> Result<(), Error> {
+        let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let locality_map = new_shared_blob_locality_map();
+        let proxy = WorkerProxyStore::new(inner, locality_map);
+
+        assert!(
+            StoreDriver::optimized_for(&*proxy, StoreOptimizations::LazyExistenceOnSync),
+            "WorkerProxyStore should report LazyExistenceOnSync"
+        );
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 13. optimized_for(other) delegates to inner store.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_optimized_for_other_delegates_to_inner() -> Result<(), Error> {
+        let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let locality_map = new_shared_blob_locality_map();
+        let proxy = WorkerProxyStore::new(inner, locality_map);
+
+        assert!(
+            !StoreDriver::optimized_for(&*proxy, StoreOptimizations::NoopUpdates),
+            "Should delegate non-LazyExistence optimizations to inner store"
+        );
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 14. Race: inner store has blob, peer registered — server wins race.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_race_server_wins_when_inner_has_blob() -> Result<(), Error> {
+        let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let locality_map = new_shared_blob_locality_map();
+        let mut proxy = WorkerProxyStore::new(inner.clone(), locality_map.clone());
+        Arc::get_mut(&mut proxy).unwrap().enable_race_peers();
+        let store = Store::new(proxy.clone());
+
+        let value = b"race test data";
+        let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+        // Put blob in inner store.
+        inner
+            .update_oneshot(digest, Bytes::from_static(value))
+            .await?;
+
+        // Inject a peer that also has the blob (MemoryStore with same data).
+        let peer_store = Store::new(MemoryStore::new(&MemorySpec::default()));
+        peer_store
+            .update_oneshot(digest, Bytes::from_static(value))
+            .await?;
+        proxy.inject_worker_connection("grpc://peer:50071", peer_store);
+
+        locality_map
+            .write()
+            .register_blobs("grpc://peer:50071", &[digest]);
+
+        // NOT in IS_WORKER_REQUEST scope, so racing path is taken.
+        let result = store.get_part_unchunked(digest, 0, None).await?;
+        assert_eq!(result.as_ref(), value);
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 15. Race: inner store miss, peer has blob — peer wins race.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_race_peer_wins_when_inner_misses() -> Result<(), Error> {
+        let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let locality_map = new_shared_blob_locality_map();
+        let mut proxy = WorkerProxyStore::new(inner, locality_map.clone());
+        Arc::get_mut(&mut proxy).unwrap().enable_race_peers();
+        let store = Store::new(proxy.clone());
+
+        let value = b"peer only data";
+        let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+        // Inner store is empty. Peer has the blob.
+        let peer_store = Store::new(MemoryStore::new(&MemorySpec::default()));
+        peer_store
+            .update_oneshot(digest, Bytes::from_static(value))
+            .await?;
+        proxy.inject_worker_connection("grpc://peer:50071", peer_store);
+
+        locality_map
+            .write()
+            .register_blobs("grpc://peer:50071", &[digest]);
+
+        let result = store.get_part_unchunked(digest, 0, None).await?;
+        assert_eq!(result.as_ref(), value);
+
+        Ok(())
+    }
+
+    // ---------------------------------------------------------------
+    // 16. Race: both inner and peer miss — returns error.
+    // ---------------------------------------------------------------
+    #[nativelink_test]
+    async fn test_race_both_miss_returns_error() -> Result<(), Error> {
+        let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+        let locality_map = new_shared_blob_locality_map();
+        let mut proxy = WorkerProxyStore::new(inner, locality_map.clone());
+        Arc::get_mut(&mut proxy).unwrap().enable_race_peers();
+        let store = Store::new(proxy.clone());
+
+        let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+
+        // Both inner and peer are empty.
+        let peer_store = Store::new(MemoryStore::new(&MemorySpec::default()));
+        proxy.inject_worker_connection("grpc://peer:50071", peer_store);
+
+        locality_map
+            .write()
+            .register_blobs("grpc://peer:50071", &[digest]);
+
+        let result = store.get_part_unchunked(digest, 0, None).await;
+        assert!(result.is_err(), "Expected error when both miss");
+
+        Ok(())
+    }
+}
diff --git a/nativelink-store/tests/ac_utils_test.rs b/nativelink-store/tests/ac_utils_test.rs
index f9cd4ac9f..d1270483b 100644
--- a/nativelink-store/tests/ac_utils_test.rs
+++ b/nativelink-store/tests/ac_utils_test.rs
@@ -62,10 +62,9 @@ async fn upload_file_to_store_with_large_file() -> Result<(), Error> {
     }
     {
         // Upload our file.
-        let file = fs::open_file(&filepath, 0, u64::MAX)
+        let file = fs::open_file(&filepath, 0)
             .await
-            .unwrap()
-            .into_inner();
+            .unwrap();
         store
             .update_with_whole_file(
                 digest,
diff --git a/nativelink-store/tests/existence_store_test.rs b/nativelink-store/tests/existence_store_test.rs
index 5bba22256..9560140b8 100644
--- a/nativelink-store/tests/existence_store_test.rs
+++ b/nativelink-store/tests/existence_store_test.rs
@@ -26,6 +26,7 @@ use nativelink_util::common::DigestInfo;
 use nativelink_util::instant_wrapper::MockInstantWrapped;
 use nativelink_util::store_trait::{Store, StoreLike};
 use pretty_assertions::assert_eq;
+use tokio::time::sleep;
 
 const VALID_HASH1: &str = "0123456789abcdef000000000000000000010000000000000123456789abcdef";
 
@@ -144,11 +145,12 @@ async fn ensure_has_requests_do_let_evictions_happen() -> Result<(), Error> {
     assert_eq!(store.has(digest).await, Ok(Some(VALUE.len() as u64)));
     MockClock::advance(Duration::from_secs(3));
 
-    // Now that our existence cache has been populated, remove
-    // it from the inner store.
+    // Remove from the inner store.
     inner_store.remove_entry(digest.into()).await;
 
-    // It should be immediately evicted from the existence cache.
+    // Allow background eviction callbacks to propagate to the existence cache.
+    sleep(Duration::from_millis(10)).await;
+    // has() reflects the removal once the background callback clears the cache.
     assert_eq!(store.has(digest).await, Ok(None));
 
     Ok(())
@@ -175,6 +177,8 @@ async fn copes_with_dropped_items() -> Result<(), Error> {
         .await
         .err_tip(|| "Failed to update store")?;
 
+    // Allow background eviction callbacks to propagate to the existence cache.
+    sleep(Duration::from_millis(10)).await;
     let inner_store_item = inner_store.has(digest).await;
     assert!(
         inner_store_item.is_ok(),
diff --git a/nativelink-store/tests/fast_slow_store_test.rs b/nativelink-store/tests/fast_slow_store_test.rs
index 53dd12387..04a82d870 100644
--- a/nativelink-store/tests/fast_slow_store_test.rs
+++ b/nativelink-store/tests/fast_slow_store_test.rs
@@ -28,7 +28,7 @@ use nativelink_store::noop_store::NoopStore;
 use nativelink_util::buf_channel::make_buf_channel_pair;
 use nativelink_util::common::DigestInfo;
 use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
-use nativelink_util::store_trait::{RemoveItemCallback, Store, StoreDriver, StoreKey, StoreLike};
+use nativelink_util::store_trait::{ItemCallback, Store, StoreDriver, StoreKey, StoreLike};
 use pretty_assertions::assert_eq;
 use rand::rngs::SmallRng;
 use rand::{Rng, SeedableRng};
@@ -310,9 +310,9 @@ async fn drop_on_eof_completes_store_futures() -> Result<(), Error> {
             self
         }
 
-        fn register_remove_callback(
+        fn register_item_callback(
             self: Arc<Self>,
-            _callback: Arc<dyn RemoveItemCallback>,
+            _callback: Arc<dyn ItemCallback>,
         ) -> Result<(), Error> {
             Ok(())
         }
@@ -634,9 +634,9 @@ fn make_stores_with_lazy_slow() -> (Store, Store, Store) {
             self
         }
 
-        fn register_remove_callback(
+        fn register_item_callback(
             self: Arc<Self>,
-            _callback: Arc<dyn RemoveItemCallback>,
+            _callback: Arc<dyn ItemCallback>,
         ) -> Result<(), Error> {
             Ok(())
         }
diff --git a/nativelink-store/tests/filesystem_store_test.rs b/nativelink-store/tests/filesystem_store_test.rs
index 7655de0c1..cc441a80e 100644
--- a/nativelink-store/tests/filesystem_store_test.rs
+++ b/nativelink-store/tests/filesystem_store_test.rs
@@ -44,7 +44,6 @@ use pretty_assertions::assert_eq;
 use rand::rngs::SmallRng;
 use rand::{Rng, SeedableRng};
 use sha2::{Digest, Sha256};
-use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, Take};
 use tokio::sync::{Barrier, Semaphore};
 use tokio::time::sleep;
 use tokio_stream::StreamExt;
@@ -124,11 +123,11 @@ impl<Hooks: FileEntryHooks + 'static + Sync + Send> FileEntry for TestFileEntry<
         self.inner.as_ref().unwrap().get_encoded_file_path()
     }
 
-    async fn read_file_part(&self, offset: u64, length: u64) -> Result<Take<fs::FileSlot>, Error> {
+    async fn read_file_part(&self, offset: u64) -> Result<fs::FileSlot, Error> {
         self.inner
             .as_ref()
             .unwrap()
-            .read_file_part(offset, length)
+            .read_file_part(offset)
             .await
     }
 
@@ -211,14 +210,7 @@ fn make_temp_path(data: &str) -> String {
 }
 
 async fn read_file_contents(file_name: &OsStr) -> Result<Vec<u8>, Error> {
-    let mut file = fs::open_file(file_name, 0, u64::MAX)
-        .await
-        .err_tip(|| format!("Failed to open file: {}", file_name.display()))?;
-    let mut data = vec![];
-    file.read_to_end(&mut data)
-        .await
-        .err_tip(|| "Error reading file to end")?;
-    Ok(data)
+    fs::read(Path::new(file_name)).await
 }
 
 async fn wait_for_no_open_files() -> Result<(), Error> {
@@ -406,7 +398,13 @@ async fn file_continues_to_stream_on_content_replace_test() -> Result<(), Error>
         }
     }
 
-    let digest1 = DigestInfo::try_new(HASH1, VALUE1.len())?;
+    // Use a large value so the producer is still blocked mid-stream when we
+    // check the temp directory. With read_buffer_size=1 and channel capacity 64,
+    // the producer sends 1-byte chunks. It needs well over 64 bytes to ensure
+    // it can't finish before the test inspects temp_path.
+    let large_value1: String = "abcdefghij".repeat(10); // 100 bytes
+    let large_value2: String = "ABCDEFGHIJ".repeat(10); // 100 bytes
+    let digest1 = DigestInfo::try_new(HASH1, large_value1.len())?;
     let content_path = make_temp_path("content_path");
     let temp_path = make_temp_path("temp_path");
 
@@ -426,7 +424,9 @@ async fn file_continues_to_stream_on_content_replace_test() -> Result<(), Error>
     );
 
     // Insert data into store.
-    store.update_oneshot(digest1, VALUE1.into()).await?;
+    store
+        .update_oneshot(digest1, large_value1.clone().into())
+        .await?;
 
     let (writer, mut reader) = make_buf_channel_pair();
     let store_clone = store.clone();
@@ -444,13 +444,15 @@ async fn file_continues_to_stream_on_content_replace_test() -> Result<(), Error>
             .err_tip(|| "Error reading first byte")?;
         assert_eq!(
             first_byte[0],
-            VALUE1.as_bytes()[0],
+            large_value1.as_bytes()[0],
             "Expected first byte to match"
         );
     }
 
     // Replace content.
-    store.update_oneshot(digest1, VALUE2.into()).await?;
+    store
+        .update_oneshot(digest1, large_value2.into())
+        .await?;
 
     // Ensure we let any background tasks finish.
     tokio::task::yield_now().await;
@@ -469,7 +471,7 @@ async fn file_continues_to_stream_on_content_replace_test() -> Result<(), Error>
             let data = read_file_contents(path.as_os_str()).await?;
             assert_eq!(
                 &data[..],
-                VALUE1.as_bytes(),
+                large_value1.as_bytes(),
                 "Expected file content to match"
             );
         }
@@ -486,7 +488,7 @@ async fn file_continues_to_stream_on_content_replace_test() -> Result<(), Error>
 
     assert_eq!(
         &remaining_file_data,
-        &VALUE1.as_bytes()[1..],
+        &large_value1.as_bytes()[1..],
         "Expected file content to match"
     );
 
@@ -514,8 +516,17 @@ async fn file_gets_cleans_up_on_cache_eviction() -> Result<(), Error> {
         }
     }
 
-    let digest1 = DigestInfo::try_new(HASH1, VALUE1.len())?;
-    let digest2 = DigestInfo::try_new(HASH2, VALUE2.len())?;
+    // Use a large value so the producer is still blocked mid-stream when we
+    // check the temp directory. With read_buffer_size=1 and channel capacity 64,
+    // the producer sends 1-byte chunks. It needs well over 64 bytes to ensure
+    // it can't finish before the test inspects temp_path. With a small value
+    // (e.g. 10 bytes), all chunks fit in the channel buffer, the get task
+    // completes immediately, and the background delete can race ahead of the
+    // temp directory inspection.
+    let large_value1: String = "abcdefghij".repeat(10); // 100 bytes
+    let large_value2: String = "ABCDEFGHIJ".repeat(10); // 100 bytes
+    let digest1 = DigestInfo::try_new(HASH1, large_value1.len())?;
+    let digest2 = DigestInfo::try_new(HASH2, large_value2.len())?;
     let content_path = make_temp_path("content_path");
     let temp_path = make_temp_path("temp_path");
 
@@ -535,23 +546,36 @@ async fn file_gets_cleans_up_on_cache_eviction() -> Result<(), Error> {
     );
 
     // Insert data into store.
-    store.update_oneshot(digest1, VALUE1.into()).await.unwrap();
-
-    let mut reader = {
-        let (writer, reader) = make_buf_channel_pair();
-        let store_clone = store.clone();
-        background_spawn!(
-            "file_gets_cleans_up_on_cache_eviction_store_get",
-            async move { store_clone.get(digest1, writer).await.unwrap() },
+    store
+        .update_oneshot(digest1, large_value1.clone().into())
+        .await
+        .unwrap();
+
+    let (writer, mut reader) = make_buf_channel_pair();
+    let store_clone = store.clone();
+    background_spawn!(
+        "file_gets_cleans_up_on_cache_eviction_store_get",
+        async move { store_clone.get(digest1, writer).await.unwrap() },
+    );
+
+    {
+        // Check to ensure our first byte has been received. The future should be stalled
+        // here because the large value exceeds the channel capacity with read_buffer_size=1.
+        let first_byte = reader
+            .consume(Some(1))
+            .await
+            .err_tip(|| "Error reading first byte")?;
+        assert_eq!(
+            first_byte[0],
+            large_value1.as_bytes()[0],
+            "Expected first byte to match"
         );
-        reader
-    };
-    // Ensure we have received 1 byte in our buffer. This will ensure we have a reference to
-    // our file open.
-    assert!(reader.peek().await.is_ok(), "Could not peek into reader");
+    }
 
     // Insert new content. This will evict the old item.
-    store.update_oneshot(digest2, VALUE2.into()).await?;
+    store
+        .update_oneshot(digest2, large_value2.into())
+        .await?;
 
     // Ensure we let any background tasks finish.
     tokio::task::yield_now().await;
@@ -570,7 +594,7 @@ async fn file_gets_cleans_up_on_cache_eviction() -> Result<(), Error> {
             let data = read_file_contents(path.as_os_str()).await?;
             assert_eq!(
                 &data[..],
-                VALUE1.as_bytes(),
+                large_value1.as_bytes(),
                 "Expected file content to match"
             );
         }
@@ -580,12 +604,16 @@ async fn file_gets_cleans_up_on_cache_eviction() -> Result<(), Error> {
         );
     }
 
-    let reader_data = reader
+    let remaining_file_data = reader
         .consume(Some(1024))
         .await
         .err_tip(|| "Error reading remaining bytes")?;
 
-    assert_eq!(&reader_data, VALUE1, "Expected file content to match");
+    assert_eq!(
+        &remaining_file_data,
+        &large_value1.as_bytes()[1..],
+        "Expected file content to match"
+    );
 
     loop {
         if DELETES_FINISHED.load(Ordering::Relaxed) == 1 {
@@ -619,9 +647,9 @@ async fn digest_contents_replaced_continues_using_old_data() -> Result<(), Error
     let file_entry = store.get_file_entry_for_digest(&digest).await?;
     {
         // The file contents should equal our initial data.
-        let mut reader = file_entry.read_file_part(0, u64::MAX).await?;
+        let mut reader = file_entry.read_file_part(0).await?;
         let mut file_contents = String::new();
-        reader.read_to_string(&mut file_contents).await?;
+        std::io::Read::read_to_string(reader.as_std_mut(), &mut file_contents)?;
         assert_eq!(file_contents, VALUE1);
     }
 
@@ -630,9 +658,9 @@ async fn digest_contents_replaced_continues_using_old_data() -> Result<(), Error
 
     {
         // The file contents still equal our old data.
-        let mut reader = file_entry.read_file_part(0, u64::MAX).await?;
+        let mut reader = file_entry.read_file_part(0).await?;
         let mut file_contents = String::new();
-        reader.read_to_string(&mut file_contents).await?;
+        std::io::Read::read_to_string(reader.as_std_mut(), &mut file_contents)?;
         assert_eq!(file_contents, VALUE1);
     }
 
@@ -723,11 +751,11 @@ async fn rename_on_insert_fails_due_to_filesystem_error_proper_cleanup_happens()
                 let dir_entry = dir_entry?;
                 {
                     // Some filesystems won't sync automatically, so force it.
-                    let file_handle = fs::open_file(dir_entry.path().into_os_string(), 0, u64::MAX)
+                    let file_handle = fs::open_file(dir_entry.path().into_os_string(), 0)
                         .await
                         .err_tip(|| "Failed to open temp file")?;
                     // We don't care if it fails, this is only best attempt.
-                    drop(file_handle.get_ref().as_ref().sync_all().await);
+                    drop(file_handle.as_std().sync_all());
                 }
                 // Ensure we have written to the file too. This ensures we have an open file handle.
                 // Failing to do this may result in the file existing, but the `update_fut` not actually
@@ -983,7 +1011,7 @@ async fn update_whole_file_with_zero_digest() -> Result<(), Error> {
         let temp_file_path = Path::new(&temp_file_dir).join("zero-length-file");
         std::fs::write(&temp_file_path, b"")
             .err_tip(|| format!("Writing to {temp_file_path:?}"))?;
-        let file_slot = fs::open_file(&temp_file_path, 0, 0).await?.into_inner();
+        let file_slot = fs::open_file(&temp_file_path, 0).await?;
         store
             .update_with_whole_file(
                 digest,
@@ -1244,9 +1272,13 @@ async fn update_with_whole_file_closes_file() -> Result<(), Error> {
     let file_path = OsString::from(format!("{temp_path}/dummy_file"));
     let mut file = fs::create_file(&file_path).await?;
     {
-        file.write_all(value.as_bytes()).await?;
-        file.as_mut().sync_all().await?;
-        file.seek(tokio::io::SeekFrom::Start(0)).await?;
+        use std::io::{Seek, Write};
+        file.as_std_mut().write_all(value.as_bytes())
+            .err_tip(|| "Could not write to file")?;
+        file.as_std().sync_all()
+            .err_tip(|| "Could not sync file")?;
+        file.as_std_mut().seek(std::io::SeekFrom::Start(0))
+            .err_tip(|| "Could not seek file")?;
     }
 
     store
@@ -1288,7 +1320,8 @@ async fn update_with_whole_file_uses_same_inode() -> Result<(), Error> {
     let file_path = OsString::from(format!("{temp_path}/dummy_file"));
     let original_inode = {
         let file = fs::create_file(&file_path).await?;
-        let original_inode = file.as_ref().metadata().await?.ino();
+        let original_inode = file.as_std().metadata()
+            .err_tip(|| "Could not get metadata")?.ino();
 
         let result = store
             .update_with_whole_file(
@@ -1305,14 +1338,8 @@ async fn update_with_whole_file_uses_same_inode() -> Result<(), Error> {
         original_inode
     };
 
-    let expected_file_name = OsString::from(format!("{content_path}/{DIGEST_FOLDER}/{digest}"));
-    let new_inode = fs::create_file(expected_file_name)
-        .await
-        .unwrap()
-        .as_ref()
-        .metadata()
-        .await?
-        .ino();
+    let expected_file_name = format!("{content_path}/{DIGEST_FOLDER}/{digest}");
+    let new_inode = tokio::fs::metadata(&expected_file_name).await?.ino();
     assert_eq!(
         original_inode, new_inode,
         "Expected the same inode for the file"
@@ -1457,6 +1484,7 @@ async fn safe_small_safe_eviction() -> Result<(), Error> {
             messages: vec![format!(
                 "{VALID_HASH}-{bytes} not found in filesystem store here"
             )],
+            details: vec![],
         }),
         "Expected data to not exist in store, because eviction"
     );
diff --git a/nativelink-store/tests/redis_store_test.rs b/nativelink-store/tests/redis_store_test.rs
index 4d558b416..12cf8cb1c 100644
--- a/nativelink-store/tests/redis_store_test.rs
+++ b/nativelink-store/tests/redis_store_test.rs
@@ -639,7 +639,8 @@ fn test_connection_errors() {
             messages: vec![
                 "deadline has elapsed".into(),
                 format!("While connecting to redis with url: redis://nativelink.com:6379/")
-            ]
+            ],
+            details: vec![],
         },
         err
     );
@@ -738,7 +739,8 @@ async fn test_sentinel_connect_with_bad_master() {
             messages: vec![
                 "MasterNameNotFoundBySentinel: Master with given name not found in sentinel - MasterNameNotFoundBySentinel".into(),
                 format!("While connecting to redis with url: redis+sentinel://127.0.0.1:{port}/")
-            ]
+            ],
+            details: vec![],
         },
         RedisStore::new_standard(spec).await.unwrap_err()
     );
@@ -778,7 +780,8 @@ async fn test_redis_connect_timeout() {
             messages: vec![
                 "deadline has elapsed".into(),
                 format!("While connecting to redis with url: redis://127.0.0.1:{port}/")
-            ]
+            ],
+            details: vec![],
         },
         RedisStore::new_standard(spec).await.unwrap_err()
     );
diff --git a/nativelink-store/tests/shard_store_test.rs b/nativelink-store/tests/shard_store_test.rs
index f8753849a..ac6b22988 100644
--- a/nativelink-store/tests/shard_store_test.rs
+++ b/nativelink-store/tests/shard_store_test.rs
@@ -81,7 +81,7 @@ async fn verify_weights(
     }
 
     for (index, (store, expected_hit)) in stores.iter().zip(expected_hits.iter()).enumerate() {
-        let total_hits = store.len_for_test();
+        let total_hits = store.len_for_test().await;
         #[expect(clippy::print_stdout, reason = "improves debugging")]
         if print_results {
             println!("expected_hit: {expected_hit} - total_hits: {total_hits}");
diff --git a/nativelink-store/tests/worker_proxy_store_test.rs b/nativelink-store/tests/worker_proxy_store_test.rs
new file mode 100644
index 000000000..641b335f0
--- /dev/null
+++ b/nativelink-store/tests/worker_proxy_store_test.rs
@@ -0,0 +1,839 @@
+// Copyright 2024 The NativeLink Authors. All rights reserved.
+//
+// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    See LICENSE file for details
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use core::pin::Pin;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nativelink_config::stores::MemorySpec;
+use nativelink_error::{Code, Error, make_err};
+use nativelink_macro::nativelink_test;
+use nativelink_metric::MetricsComponent;
+use nativelink_store::memory_store::MemoryStore;
+use nativelink_store::worker_proxy_store::WorkerProxyStore;
+use nativelink_util::blob_locality_map::{SharedBlobLocalityMap, new_shared_blob_locality_map};
+use nativelink_util::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
+use nativelink_util::common::DigestInfo;
+use nativelink_util::health_utils::{HealthStatusIndicator, default_health_status_indicator};
+use nativelink_util::store_trait::{
+    IS_WORKER_REQUEST, ItemCallback, REDIRECT_PREFIX, Store, StoreDriver, StoreKey, StoreLike,
+    StoreOptimizations, UploadSizeInfo,
+};
+use pretty_assertions::assert_eq;
+
+const VALID_HASH1: &str = "0123456789abcdef000000000000000000010000000000000123456789abcdef";
+const VALID_HASH2: &str = "0123456789abcdef000000000000000000020000000000000123456789abcdef";
+const VALID_HASH3: &str = "0123456789abcdef000000000000000000030000000000000123456789abcdef";
+
+/// Helper: create a WorkerProxyStore backed by a fresh MemoryStore.
+/// Returns (proxy_store_as_Store, inner_memory_store, locality_map).
+fn make_proxy_store() -> (Store, Store, SharedBlobLocalityMap) {
+    let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+    let locality_map = new_shared_blob_locality_map();
+    let proxy = WorkerProxyStore::new(inner.clone(), locality_map.clone());
+    (Store::new(proxy), inner, locality_map)
+}
+
+// -------------------------------------------------------------------
+// 1. get_part delegates to inner store on hit
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_returns_data_from_inner_store_on_hit() -> Result<(), Error> {
+    let (proxy, _inner, locality_map) = make_proxy_store();
+
+    let value = b"hello from inner store";
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    // Write directly through the proxy (which delegates update to inner).
+    proxy
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+
+    // Register a fake worker in the locality map. If get_part were to
+    // consult it, it would try to connect and potentially fail or return
+    // different data. We verify the inner store data is returned instead.
+    locality_map
+        .write()
+        .register_blobs("fake-worker:9999", &[digest]);
+
+    let result = proxy.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(
+        result.as_ref(),
+        value,
+        "Expected data from inner store, not from worker"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 2. get_part returns NotFound when inner misses and no peers
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_returns_not_found_when_inner_misses_and_no_peers() -> Result<(), Error> {
+    let (proxy, _inner, _locality_map) = make_proxy_store();
+
+    let digest = DigestInfo::try_new(VALID_HASH1, 42)?;
+
+    let result = proxy.get_part_unchunked(digest, 0, None).await;
+    assert!(result.is_err(), "Expected an error for missing blob");
+
+    let err = result.unwrap_err();
+    assert_eq!(
+        err.code,
+        Code::NotFound,
+        "Expected NotFound error code, got: {err:?}"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 3. has delegates to inner store (returns Some on hit)
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn has_returns_size_when_inner_has_blob() -> Result<(), Error> {
+    let (proxy, _inner, _locality_map) = make_proxy_store();
+
+    let value = b"test data for has";
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    proxy
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+
+    let size = proxy.has(digest).await?;
+    assert_eq!(
+        size,
+        Some(value.len() as u64),
+        "has() should return the blob size from inner store"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 4. has returns None when inner does not have blob
+//    (locality map is never consulted for has)
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn has_returns_none_when_inner_missing_even_if_locality_has_peers() -> Result<(), Error> {
+    let (proxy, _inner, locality_map) = make_proxy_store();
+
+    let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+
+    // Register the digest on a worker endpoint.
+    locality_map
+        .write()
+        .register_blobs("worker-a:50081", &[digest]);
+
+    // has() must NOT consult the locality map.
+    let size = proxy.has(digest).await?;
+    assert_eq!(
+        size, None,
+        "has() should return None even though locality map has the digest"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 5. has_with_results delegates to inner store (pass-through)
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn has_with_results_delegates_to_inner_store() -> Result<(), Error> {
+    let (proxy, _inner, locality_map) = make_proxy_store();
+
+    let value = b"test data";
+    let d1 = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+    let d2 = DigestInfo::try_new(VALID_HASH2, 999)?;
+    let d3 = DigestInfo::try_new(VALID_HASH3, 50)?;
+
+    // Only d1 is in the inner store.
+    proxy
+        .update_oneshot(d1, Bytes::from_static(value))
+        .await?;
+
+    // Register d2 and d3 on workers — should NOT affect has_with_results.
+    {
+        let mut map = locality_map.write();
+        map.register_blobs("worker-a:50081", &[d2]);
+        map.register_blobs("worker-b:50081", &[d3]);
+    }
+
+    let keys: Vec<StoreKey<'_>> = vec![d1.into(), d2.into(), d3.into()];
+    let mut results = vec![None; 3];
+    proxy.has_with_results(&keys, &mut results).await?;
+
+    assert_eq!(
+        results[0],
+        Some(value.len() as u64),
+        "d1 should be found in inner store"
+    );
+    assert_eq!(
+        results[1], None,
+        "d2 should NOT be found — has_with_results must not consult locality map"
+    );
+    assert_eq!(
+        results[2], None,
+        "d3 should NOT be found — has_with_results must not consult locality map"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 6. has_with_results on empty digest list succeeds
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn has_with_results_empty_digests_succeeds() -> Result<(), Error> {
+    let (proxy, _inner, _locality_map) = make_proxy_store();
+
+    let keys: Vec<StoreKey<'_>> = vec![];
+    let mut results: Vec<Option<u64>> = vec![];
+    proxy.has_with_results(&keys, &mut results).await?;
+
+    // No assertions needed beyond not panicking.
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 7. update_oneshot delegates to inner store
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn update_oneshot_stores_in_inner() -> Result<(), Error> {
+    let (proxy, inner, _locality_map) = make_proxy_store();
+
+    let value = b"upload via proxy";
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    proxy
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+
+    // Verify the blob landed in the inner store directly.
+    let inner_data = inner.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(
+        inner_data.as_ref(),
+        value,
+        "Data should be present in the inner store after update_oneshot"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 8. get_part with offset and length on inner hit
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_with_offset_and_length_from_inner() -> Result<(), Error> {
+    let (proxy, _inner, _locality_map) = make_proxy_store();
+
+    let value = b"0123456789abcdefghij"; // 20 bytes
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    proxy
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+
+    // Read bytes [5..15) — 10 bytes at offset 5.
+    let data = proxy.get_part_unchunked(digest, 5, Some(10)).await?;
+    assert_eq!(
+        data.as_ref(),
+        b"56789abcde",
+        "Expected subset at offset=5, length=10"
+    );
+
+    // Read from offset 15 to end.
+    let data = proxy.get_part_unchunked(digest, 15, None).await?;
+    assert_eq!(data.as_ref(), b"fghij", "Expected tail from offset=15");
+
+    // Read 0 bytes.
+    let data = proxy.get_part_unchunked(digest, 0, Some(0)).await?;
+    assert_eq!(data.as_ref(), b"", "Expected empty result for length=0");
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 9. Inner miss + locality has peers for a DIFFERENT digest
+//    => the queried digest is still NotFound (locality map miss)
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_inner_miss_locality_has_different_digest_returns_not_found() -> Result<(), Error> {
+    let (proxy, _inner, locality_map) = make_proxy_store();
+
+    let d1 = DigestInfo::try_new(VALID_HASH1, 100)?;
+    let d2 = DigestInfo::try_new(VALID_HASH2, 200)?;
+
+    // Register d2 on a worker, but NOT d1.
+    locality_map
+        .write()
+        .register_blobs("worker-a:50081", &[d2]);
+
+    // Query d1 — not in inner store, not in locality map.
+    let result = proxy.get_part_unchunked(d1, 0, None).await;
+    assert!(result.is_err(), "Expected NotFound for d1");
+
+    let err = result.unwrap_err();
+    assert_eq!(
+        err.code,
+        Code::NotFound,
+        "Expected NotFound since d1 has no locality entries, got: {err:?}"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 10. Locality map returns empty workers list after eviction
+//     => NotFound (no peers to try)
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_inner_miss_locality_evicted_returns_not_found() -> Result<(), Error> {
+    let (proxy, _inner, locality_map) = make_proxy_store();
+
+    let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+
+    // Register then evict the digest.
+    {
+        let mut map = locality_map.write();
+        map.register_blobs("worker-a:50081", &[digest]);
+        map.evict_blobs("worker-a:50081", &[digest]);
+    }
+
+    // Now there are no workers for this digest.
+    let result = proxy.get_part_unchunked(digest, 0, None).await;
+    assert!(result.is_err(), "Expected NotFound after eviction");
+
+    let err = result.unwrap_err();
+    assert_eq!(
+        err.code,
+        Code::NotFound,
+        "Expected NotFound since locality was evicted, got: {err:?}"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 11. update followed by get_part roundtrip
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn update_then_get_roundtrip() -> Result<(), Error> {
+    let (proxy, _inner, _locality_map) = make_proxy_store();
+
+    let value = b"roundtrip data payload";
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    // Upload via proxy.
+    proxy
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+
+    // Verify has() works.
+    let size = proxy.has(digest).await?;
+    assert_eq!(size, Some(value.len() as u64));
+
+    // Verify get_part returns the correct data.
+    let data = proxy.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(data.as_ref(), value);
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 12. Multiple blobs: has_with_results shows correct presence
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn has_with_results_multiple_blobs_mixed() -> Result<(), Error> {
+    let (proxy, _inner, _locality_map) = make_proxy_store();
+
+    let v1 = b"first blob";
+    let v3 = b"third blob";
+    let d1 = DigestInfo::try_new(VALID_HASH1, v1.len() as u64)?;
+    let d2 = DigestInfo::try_new(VALID_HASH2, 999)?; // not stored
+    let d3 = DigestInfo::try_new(VALID_HASH3, v3.len() as u64)?;
+
+    proxy
+        .update_oneshot(d1, Bytes::from_static(v1))
+        .await?;
+    proxy
+        .update_oneshot(d3, Bytes::from_static(v3))
+        .await?;
+
+    let keys: Vec<StoreKey<'_>> = vec![d1.into(), d2.into(), d3.into()];
+    let mut results = vec![None; 3];
+    proxy.has_with_results(&keys, &mut results).await?;
+
+    assert_eq!(results[0], Some(v1.len() as u64), "d1 should be found");
+    assert_eq!(results[1], None, "d2 should not be found");
+    assert_eq!(results[2], Some(v3.len() as u64), "d3 should be found");
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 13. get_part for a blob that was never stored and has no locality
+//     entries returns NotFound (different digest, not in map at all)
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_completely_unknown_digest_returns_not_found() -> Result<(), Error> {
+    let (proxy, _inner, locality_map) = make_proxy_store();
+
+    // Register a DIFFERENT digest on a worker (not the one we query).
+    let other_digest = DigestInfo::try_new(VALID_HASH2, 50)?;
+    locality_map
+        .write()
+        .register_blobs("worker-x:50081", &[other_digest]);
+
+    // Query a digest that is not in the inner store and not in the
+    // locality map at all.
+    let query_digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+    let result = proxy.get_part_unchunked(query_digest, 0, None).await;
+
+    assert!(result.is_err());
+    assert_eq!(result.unwrap_err().code, Code::NotFound);
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 14. Overwrite a blob via update and verify new data is returned
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn update_overwrites_existing_blob() -> Result<(), Error> {
+    let (proxy, _inner, _locality_map) = make_proxy_store();
+
+    let digest = DigestInfo::try_new(VALID_HASH1, 5)?;
+
+    proxy
+        .update_oneshot(digest, Bytes::from_static(b"first"))
+        .await?;
+
+    let data = proxy.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(data.as_ref(), b"first");
+
+    // Overwrite with new data (same digest key, different content for
+    // MemoryStore which doesn't validate content hash).
+    proxy
+        .update_oneshot(digest, Bytes::from_static(b"secnd"))
+        .await?;
+
+    let data = proxy.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(data.as_ref(), b"secnd");
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 15. Non-NotFound errors from inner store propagate directly
+//     (no locality map fallback)
+// -------------------------------------------------------------------
+// Note: This is difficult to test without a custom mock store that
+// returns a non-NotFound error. The inline tests cover this via the
+// match arm in get_part(). We verify the pattern indirectly: a
+// successful inner read never consults the locality map (test 1),
+// and NotFound triggers the locality path (tests 2, 9, 10).
+
+// -------------------------------------------------------------------
+// 16. Large blob roundtrip through the proxy
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn large_blob_roundtrip() -> Result<(), Error> {
+    let (proxy, _inner, _locality_map) = make_proxy_store();
+
+    // 1 MiB of repeated bytes
+    let size: usize = 1024 * 1024;
+    let value: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+    let digest = DigestInfo::try_new(VALID_HASH1, size as u64)?;
+
+    proxy
+        .update_oneshot(digest, Bytes::from(value.clone()))
+        .await?;
+
+    let data = proxy.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(data.len(), size, "Returned blob size should match");
+    assert_eq!(data.as_ref(), value.as_slice());
+
+    Ok(())
+}
+
+// ===================================================================
+// Gap 1: Successful peer proxy read — inject a MemoryStore as a peer
+// ===================================================================
+
+/// Helper: create a WorkerProxyStore and return the underlying Arc so we
+/// can call inject_worker_connection().
+fn make_proxy_store_with_arc() -> (Arc<WorkerProxyStore>, Store, SharedBlobLocalityMap) {
+    let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+    let locality_map = new_shared_blob_locality_map();
+    let proxy_arc = WorkerProxyStore::new(inner.clone(), locality_map.clone());
+    (proxy_arc, inner, locality_map)
+}
+
+// -------------------------------------------------------------------
+// 17. Successful peer proxy read: inner miss, peer has the blob
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_proxies_from_injected_peer() -> Result<(), Error> {
+    let (proxy_arc, _inner, locality_map) = make_proxy_store_with_arc();
+    let proxy = Store::new(proxy_arc.clone());
+
+    let value = b"data from the peer worker";
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    // Create a "peer" MemoryStore and populate it with the blob.
+    let peer_store = Store::new(MemoryStore::new(&MemorySpec::default()));
+    peer_store
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+
+    // Inject the peer store as a worker connection.
+    let peer_endpoint = "grpc://peer-worker:50081";
+    proxy_arc.inject_worker_connection(peer_endpoint, peer_store);
+
+    // Register the digest on the peer in the locality map.
+    locality_map
+        .write()
+        .register_blobs(peer_endpoint, &[digest]);
+
+    // The inner store is empty, so get_part should proxy from the peer.
+    let result = proxy.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(
+        result.as_ref(),
+        value,
+        "Expected blob data from the injected peer store"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 18. Peer proxy read with offset and length
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_proxies_from_peer_with_offset() -> Result<(), Error> {
+    let (proxy_arc, _inner, locality_map) = make_proxy_store_with_arc();
+    let proxy = Store::new(proxy_arc.clone());
+
+    let value = b"0123456789abcdef"; // 16 bytes
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    let peer_store = Store::new(MemoryStore::new(&MemorySpec::default()));
+    peer_store
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+
+    let peer_endpoint = "grpc://peer-worker:50081";
+    proxy_arc.inject_worker_connection(peer_endpoint, peer_store);
+    locality_map
+        .write()
+        .register_blobs(peer_endpoint, &[digest]);
+
+    // Read bytes [4..12) from the peer.
+    let result = proxy.get_part_unchunked(digest, 4, Some(8)).await?;
+    assert_eq!(
+        result.as_ref(),
+        b"456789ab",
+        "Expected subset from peer at offset=4, length=8"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 19. Peer proxy: first peer doesn't have blob, second peer does
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_skips_peer_without_blob_and_reads_from_next() -> Result<(), Error> {
+    let (proxy_arc, _inner, locality_map) = make_proxy_store_with_arc();
+    let proxy = Store::new(proxy_arc.clone());
+
+    let value = b"only on peer-b";
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    // Peer A: empty store (has() returns None).
+    let peer_a_store = Store::new(MemoryStore::new(&MemorySpec::default()));
+    let peer_a_endpoint = "grpc://peer-a:50081";
+    proxy_arc.inject_worker_connection(peer_a_endpoint, peer_a_store);
+
+    // Peer B: has the blob.
+    let peer_b_store = Store::new(MemoryStore::new(&MemorySpec::default()));
+    peer_b_store
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+    let peer_b_endpoint = "grpc://peer-b:50081";
+    proxy_arc.inject_worker_connection(peer_b_endpoint, peer_b_store);
+
+    // Register the digest on both peers.
+    {
+        let mut map = locality_map.write();
+        map.register_blobs(peer_a_endpoint, &[digest]);
+        map.register_blobs(peer_b_endpoint, &[digest]);
+    }
+
+    let result = proxy.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(
+        result.as_ref(),
+        value,
+        "Expected data from peer-b after peer-a returned None for has()"
+    );
+
+    Ok(())
+}
+
+// ===================================================================
+// Gap 2: Resume-from-offset — PartialFailStore + next peer
+// ===================================================================
+
+/// A store wrapper that delegates to an inner store but fails `get_part`
+/// after writing a configured number of bytes. Used to test streaming
+/// resume logic in WorkerProxyStore.
+#[derive(Debug, MetricsComponent)]
+struct PartialFailStore {
+    inner: Store,
+    /// Number of bytes to successfully write before returning an error.
+    fail_after_bytes: u64,
+}
+
+default_health_status_indicator!(PartialFailStore);
+
+#[async_trait]
+impl StoreDriver for PartialFailStore {
+    async fn has_with_results(
+        self: Pin<&Self>,
+        digests: &[StoreKey<'_>],
+        results: &mut [Option<u64>],
+    ) -> Result<(), Error> {
+        self.inner.has_with_results(digests, results).await
+    }
+
+    async fn update(
+        self: Pin<&Self>,
+        key: StoreKey<'_>,
+        reader: DropCloserReadHalf,
+        upload_size: UploadSizeInfo,
+    ) -> Result<(), Error> {
+        self.inner.update(key, reader, upload_size).await
+    }
+
+    async fn get_part(
+        self: Pin<&Self>,
+        key: StoreKey<'_>,
+        writer: &mut DropCloserWriteHalf,
+        offset: u64,
+        length: Option<u64>,
+    ) -> Result<(), Error> {
+        // Read the full blob from the inner store.
+        let data = self.inner.get_part_unchunked(key.borrow(), offset, length).await?;
+
+        // Write up to `fail_after_bytes` bytes, then return an error.
+        let write_len = core::cmp::min(data.len() as u64, self.fail_after_bytes) as usize;
+        if write_len > 0 {
+            writer
+                .send(data.slice(..write_len))
+                .await
+                .map_err(|e| make_err!(Code::Internal, "PartialFailStore write error: {e:?}"))?;
+        }
+
+        Err(make_err!(
+            Code::Internal,
+            "PartialFailStore: simulated failure after {} bytes",
+            write_len
+        ))
+    }
+
+    fn inner_store(&self, _key: Option<StoreKey>) -> &dyn StoreDriver {
+        self
+    }
+
+    fn as_any<'a>(&'a self) -> &'a (dyn core::any::Any + Sync + Send + 'static) {
+        self
+    }
+
+    fn as_any_arc(self: Arc<Self>) -> Arc<dyn core::any::Any + Sync + Send + 'static> {
+        self
+    }
+
+    fn register_item_callback(
+        self: Arc<Self>,
+        _callback: Arc<dyn ItemCallback>,
+    ) -> Result<(), Error> {
+        Ok(())
+    }
+}
+
+// -------------------------------------------------------------------
+// 20. Resume from offset: first peer fails mid-stream, second succeeds
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn get_part_resumes_from_next_peer_after_mid_stream_failure() -> Result<(), Error> {
+    let (proxy_arc, _inner, locality_map) = make_proxy_store_with_arc();
+    let proxy = Store::new(proxy_arc.clone());
+
+    let value = b"0123456789abcdef"; // 16 bytes
+    let digest = DigestInfo::try_new(VALID_HASH1, value.len() as u64)?;
+
+    // Peer A: a PartialFailStore that writes 5 bytes then fails.
+    let peer_a_inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+    peer_a_inner
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+    let peer_a_store = Store::new(Arc::new(PartialFailStore {
+        inner: peer_a_inner,
+        fail_after_bytes: 5,
+    }));
+    let peer_a_endpoint = "grpc://peer-a:50081";
+    proxy_arc.inject_worker_connection(peer_a_endpoint, peer_a_store);
+
+    // Peer B: has the full blob (normal MemoryStore).
+    let peer_b_store = Store::new(MemoryStore::new(&MemorySpec::default()));
+    peer_b_store
+        .update_oneshot(digest, Bytes::from_static(value))
+        .await?;
+    let peer_b_endpoint = "grpc://peer-b:50081";
+    proxy_arc.inject_worker_connection(peer_b_endpoint, peer_b_store);
+
+    // Register the digest on both peers. The order in the locality map
+    // determines which peer is tried first. We register A first.
+    {
+        let mut map = locality_map.write();
+        map.register_blobs(peer_a_endpoint, &[digest]);
+        map.register_blobs(peer_b_endpoint, &[digest]);
+    }
+
+    // The proxy should: try peer A, get 5 bytes, fail, then resume from
+    // peer B at offset 5. The final result should be the complete blob.
+    let result = proxy.get_part_unchunked(digest, 0, None).await?;
+    assert_eq!(
+        result.as_ref(),
+        value,
+        "Expected complete blob after resume from second peer"
+    );
+
+    Ok(())
+}
+
+// ===================================================================
+// Gap 3: IS_WORKER_REQUEST branching tests
+// ===================================================================
+
+// -------------------------------------------------------------------
+// 21. IS_WORKER_REQUEST=true: inner miss + locality has peer
+//     => FailedPrecondition redirect with peer endpoint
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn worker_request_returns_redirect_with_peer_endpoints() -> Result<(), Error> {
+    let (proxy, _inner, locality_map) = make_proxy_store();
+
+    let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+    let peer_endpoint = "grpc://peer-worker:50071";
+
+    locality_map
+        .write()
+        .register_blobs(peer_endpoint, &[digest]);
+
+    let result = IS_WORKER_REQUEST
+        .scope(true, proxy.get_part_unchunked(digest, 0, None))
+        .await;
+
+    assert!(result.is_err(), "Expected redirect error for worker request");
+    let err = result.unwrap_err();
+    assert_eq!(
+        err.code,
+        Code::FailedPrecondition,
+        "Redirect should use FailedPrecondition, got: {err:?}"
+    );
+    let msg = err.message_string();
+    assert!(
+        msg.contains(REDIRECT_PREFIX),
+        "Error message should contain redirect prefix: {msg}"
+    );
+    assert!(
+        msg.contains(peer_endpoint),
+        "Error message should contain peer endpoint: {msg}"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 22. IS_WORKER_REQUEST=false: inner miss + locality has peer with
+//     invalid URI => NotFound (proxy attempt fails gracefully)
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn non_worker_request_returns_not_found_when_peer_unreachable() -> Result<(), Error> {
+    let (proxy, _inner, locality_map) = make_proxy_store();
+
+    let digest = DigestInfo::try_new(VALID_HASH1, 100)?;
+
+    // Invalid URI fails during create_worker_connection.
+    locality_map
+        .write()
+        .register_blobs("not a valid uri", &[digest]);
+
+    let result = IS_WORKER_REQUEST
+        .scope(false, proxy.get_part_unchunked(digest, 0, None))
+        .await;
+
+    assert!(result.is_err(), "Expected NotFound error");
+    let err = result.unwrap_err();
+    assert_eq!(
+        err.code,
+        Code::NotFound,
+        "Non-worker request should get NotFound, got: {err:?}"
+    );
+
+    Ok(())
+}
+
+// ===================================================================
+// Gap 4: optimized_for tests
+// ===================================================================
+
+// -------------------------------------------------------------------
+// 23. optimized_for(LazyExistenceOnSync) returns true
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn optimized_for_lazy_existence_returns_true() -> Result<(), Error> {
+    let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+    let locality_map = new_shared_blob_locality_map();
+    let proxy = WorkerProxyStore::new(inner, locality_map);
+
+    assert!(
+        StoreDriver::optimized_for(&*proxy, StoreOptimizations::LazyExistenceOnSync),
+        "WorkerProxyStore should report LazyExistenceOnSync"
+    );
+
+    Ok(())
+}
+
+// -------------------------------------------------------------------
+// 24. optimized_for(other) delegates to inner store
+// -------------------------------------------------------------------
+#[nativelink_test]
+async fn optimized_for_other_delegates_to_inner() -> Result<(), Error> {
+    let inner = Store::new(MemoryStore::new(&MemorySpec::default()));
+    let locality_map = new_shared_blob_locality_map();
+    let proxy = WorkerProxyStore::new(inner, locality_map);
+
+    assert!(
+        !StoreDriver::optimized_for(&*proxy, StoreOptimizations::NoopUpdates),
+        "Should delegate non-LazyExistence optimizations to inner store"
+    );
+
+    Ok(())
+}
diff --git a/nativelink-util/Cargo.toml b/nativelink-util/Cargo.toml
index 7001cd075..12566b090 100644
--- a/nativelink-util/Cargo.toml
+++ b/nativelink-util/Cargo.toml
@@ -15,7 +15,7 @@ nativelink-proto = { path = "../nativelink-proto" }
 async-trait = { version = "0.1.88", default-features = false }
 base64 = { version = "0.22.1", default-features = false, features = ["std"] }
 bitflags = { version = "2.9.0", default-features = false }
-blake3 = { version = "1.8.0", features = ["mmap"], default-features = false }
+blake3 = { version = "1.8.0", features = ["mmap", "rayon"], default-features = false }
 bytes = { version = "1.10.1", default-features = false }
 futures = { version = "0.3.31", features = [
   "async-await",
@@ -27,37 +27,38 @@ hyper-util = { version = "0.1.11", default-features = false }
 libc = { version = "0.2.177", default-features = false }
 lru = { version = "0.16.0", default-features = false }
 mock_instant = { version = "0.5.3", default-features = false }
-opentelemetry = { version = "0.29.0", default-features = false }
-opentelemetry-appender-tracing = { version = "0.29.1", default-features = false }
-opentelemetry-http = { version = "0.29.0", default-features = false }
-opentelemetry-otlp = { version = "0.29.0", default-features = false, features = [
+opentelemetry = { version = "0.31.0", default-features = false }
+opentelemetry-appender-tracing = { version = "0.31.1", default-features = false }
+opentelemetry-http = { version = "0.31.0", default-features = false }
+opentelemetry-otlp = { version = "0.31.0", default-features = false, features = [
   "grpc-tonic",
   "logs",
   "metrics",
   "trace",
   "zstd-tonic",
 ] }
-opentelemetry-semantic-conventions = { version = "0.29.0", default-features = false, features = [
+opentelemetry-semantic-conventions = { version = "0.31.0", default-features = false, features = [
   "default",
   "semconv_experimental",
 ] }
-opentelemetry_sdk = { version = "0.29.0", default-features = false }
+opentelemetry_sdk = { version = "0.31.0", default-features = false }
 parking_lot = { version = "0.12.3", features = [
   "arc_lock",
   "send_guard",
 ], default-features = false }
 pin-project = { version = "1.1.10", default-features = false }
 pin-project-lite = { version = "0.2.16", default-features = false }
-prost = { version = "0.13.5", default-features = false }
-prost-types = { version = "0.13.5", default-features = false, features = [
+prost = { version = "0.14.3", default-features = false }
+prost-types = { version = "0.14.3", default-features = false, features = [
   "std",
 ] }
 rand = { version = "0.9.0", default-features = false, features = [
   "thread_rng",
 ] }
+rayon = { version = "1.10.0", default-features = false }
 rlimit = { version = "0.10.2", default-features = false }
 serde = { version = "1.0.219", default-features = false }
-sha2 = { version = "0.10.8", default-features = false }
+sha2 = { version = "0.10.8", default-features = false, features = ["asm"] }
 tempfile = { version = "3.20.0", default-features = false }
 tokio = { version = "1.44.1", features = [
   "fs",
@@ -69,14 +70,14 @@ tokio-stream = { version = "0.1.17", features = [
   "fs",
 ], default-features = false }
 tokio-util = { version = "0.7.14", default-features = false }
-tonic = { version = "0.13.0", features = [
+tonic = { version = "0.14.5", features = [
   "tls-native-roots",
-  "tls-ring",
+  "tls-aws-lc",
   "transport",
 ], default-features = false }
 tower = { version = "0.5.2", default-features = false }
 tracing = { version = "0.1.41", default-features = false }
-tracing-opentelemetry = { version = "0.30.0", default-features = false, features = [
+tracing-opentelemetry = { version = "0.32.1", default-features = false, features = [
   "metrics",
 ] }
 tracing-subscriber = { version = "0.3.19", features = [
diff --git a/nativelink-util/src/blob_locality_map.rs b/nativelink-util/src/blob_locality_map.rs
new file mode 100644
index 000000000..16a28a454
--- /dev/null
+++ b/nativelink-util/src/blob_locality_map.rs
@@ -0,0 +1,483 @@
+// Copyright 2024 The NativeLink Authors. All rights reserved.
+//
+// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    See LICENSE file for details
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+use std::time::SystemTime;
+
+use crate::common::DigestInfo;
+use parking_lot::RwLock;
+
+/// Tracks which worker endpoints have which blobs, enabling peer-to-peer
+/// blob fetching between workers.
+///
+/// The map is bidirectional:
+/// - `blobs`: digest → { endpoint → last_registered_timestamp }
+/// - `endpoint_blobs`: endpoint → set of digests (for fast cleanup on disconnect)
+///
+/// Cleanup relies entirely on explicit eviction notifications and worker
+/// disconnect (no TTL — EvictingMap's `max_seconds_since_last_access` defaults
+/// to unlimited).
+#[derive(Debug)]
+pub struct BlobLocalityMap {
+    /// digest → { endpoint → timestamp }
+    blobs: HashMap<DigestInfo, HashMap<Arc<str>, SystemTime>>,
+    /// endpoint → set of digests (for fast cleanup on disconnect)
+    endpoint_blobs: HashMap<Arc<str>, HashSet<DigestInfo>>,
+}
+
+impl BlobLocalityMap {
+    pub fn new() -> Self {
+        Self {
+            blobs: HashMap::new(),
+            endpoint_blobs: HashMap::new(),
+        }
+    }
+
+    /// Register that the given digests are available on the given endpoint.
+    pub fn register_blobs(&mut self, endpoint: &str, digests: &[DigestInfo]) {
+        let now = SystemTime::now();
+        self.register_blobs_with_timestamps(
+            endpoint,
+            &digests.iter().map(|d| (*d, now)).collect::<Vec<_>>(),
+        );
+    }
+
+    /// Register digests with explicit timestamps (e.g. from BlobDigestInfo).
+    pub fn register_blobs_with_timestamps(
+        &mut self,
+        endpoint: &str,
+        digests_with_ts: &[(DigestInfo, SystemTime)],
+    ) {
+        // Allocate the endpoint Arc<str> once; clones are O(1) atomic increments
+        // instead of O(N) String allocations per digest.
+        let ep: Arc<str> = endpoint.into();
+        let digest_set = self
+            .endpoint_blobs
+            .entry(ep.clone())
+            .or_default();
+
+        for (digest, ts) in digests_with_ts {
+            digest_set.insert(*digest);
+            self.blobs
+                .entry(*digest)
+                .or_default()
+                .insert(ep.clone(), *ts);
+        }
+    }
+
+    /// Remove specific digests from the given endpoint (eviction notification).
+    pub fn evict_blobs(&mut self, endpoint: &str, digests: &[DigestInfo]) {
+        if let Some(digest_set) = self.endpoint_blobs.get_mut(endpoint) {
+            for digest in digests {
+                digest_set.remove(digest);
+                if let Some(endpoints) = self.blobs.get_mut(digest) {
+                    endpoints.remove(endpoint);
+                    if endpoints.is_empty() {
+                        self.blobs.remove(digest);
+                    }
+                }
+            }
+            if digest_set.is_empty() {
+                self.endpoint_blobs.remove(endpoint);
+            }
+        }
+    }
+
+    /// Remove ALL entries for an endpoint (worker disconnect).
+    pub fn remove_endpoint(&mut self, endpoint: &str) {
+        if let Some(digests) = self.endpoint_blobs.remove(endpoint) {
+            for digest in &digests {
+                if let Some(endpoints) = self.blobs.get_mut(digest) {
+                    endpoints.remove(endpoint);
+                    if endpoints.is_empty() {
+                        self.blobs.remove(digest);
+                    }
+                }
+            }
+        }
+    }
+
+    /// Look up which worker endpoints have the given digest.
+    /// Returns all endpoints that have registered this digest.
+    ///
+    /// Workers refresh their timestamps on every BlobsAvailable update
+    /// (typically every ~500ms), so stale entries are only possible if
+    /// a worker disconnects without cleanup. Disconnects are handled
+    /// via `remove_endpoint`, so we can simply return all endpoints.
+    pub fn lookup_workers(&self, digest: &DigestInfo) -> Vec<Arc<str>> {
+        let Some(endpoints) = self.blobs.get(digest) else {
+            return Vec::new();
+        };
+
+        endpoints.keys().cloned().collect()
+    }
+
+    /// Look up which worker endpoints have the given digest, including the
+    /// timestamp of when the blob was last registered/refreshed on each endpoint.
+    /// Useful for preferring workers with more recently-refreshed locality data.
+    pub fn lookup_workers_with_timestamps(&self, digest: &DigestInfo) -> Vec<(Arc<str>, SystemTime)> {
+        let Some(endpoints) = self.blobs.get(digest) else {
+            return Vec::new();
+        };
+
+        endpoints
+            .iter()
+            .map(|(endpoint, ts)| (endpoint.clone(), *ts))
+            .collect()
+    }
+
+    /// Returns the set of all known endpoints.
+    pub fn all_endpoints(&self) -> Vec<Arc<str>> {
+        self.endpoint_blobs.keys().cloned().collect()
+    }
+
+    /// Returns the number of tracked digests.
+    pub fn digest_count(&self) -> usize {
+        self.blobs.len()
+    }
+
+    /// Returns the number of tracked endpoints.
+    pub fn endpoint_count(&self) -> usize {
+        self.endpoint_blobs.len()
+    }
+
+    /// Raw access to the blobs map for bulk scoring.
+    /// Caller must hold the read lock.
+    pub fn blobs_map(&self) -> &HashMap<DigestInfo, HashMap<Arc<str>, SystemTime>> {
+        &self.blobs
+    }
+}
+
+impl Default for BlobLocalityMap {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Thread-safe shared handle to a `BlobLocalityMap`.
+pub type SharedBlobLocalityMap = Arc<RwLock<BlobLocalityMap>>;
+
+/// Create a new shared blob locality map.
+pub fn new_shared_blob_locality_map() -> SharedBlobLocalityMap {
+    Arc::new(RwLock::new(BlobLocalityMap::new()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_register_and_lookup() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        map.register_blobs("worker-a:50081", &[d1, d2]);
+        map.register_blobs("worker-b:50081", &[d1]);
+
+        let workers = map.lookup_workers(&d1);
+        assert_eq!(workers.len(), 2);
+        assert!(workers.contains(&Arc::from("worker-a:50081")));
+        assert!(workers.contains(&Arc::from("worker-b:50081")));
+
+        let workers = map.lookup_workers(&d2);
+        assert_eq!(workers.len(), 1);
+        assert!(workers.contains(&Arc::from("worker-a:50081")));
+    }
+
+    #[test]
+    fn test_evict_blobs() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        map.register_blobs("worker-a:50081", &[d1, d2]);
+        map.evict_blobs("worker-a:50081", &[d1]);
+
+        assert!(map.lookup_workers(&d1).is_empty());
+        assert_eq!(map.lookup_workers(&d2).len(), 1);
+    }
+
+    #[test]
+    fn test_remove_endpoint() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        map.register_blobs("worker-a:50081", &[d1, d2]);
+        map.register_blobs("worker-b:50081", &[d1]);
+
+        map.remove_endpoint("worker-a:50081");
+
+        // d1 still available on worker-b
+        let workers = map.lookup_workers(&d1);
+        assert_eq!(workers.len(), 1);
+        assert!(workers.contains(&Arc::from("worker-b:50081")));
+
+        // d2 no longer available anywhere
+        assert!(map.lookup_workers(&d2).is_empty());
+    }
+
+    #[test]
+    fn test_lookup_unknown_digest() {
+        let map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        assert!(map.lookup_workers(&d1).is_empty());
+    }
+
+    #[test]
+    fn test_blobs_map_accessor() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        map.register_blobs("worker-a:50081", &[d1, d2]);
+        map.register_blobs("worker-b:50081", &[d1]);
+
+        let blobs = map.blobs_map();
+        assert_eq!(blobs.len(), 2);
+
+        // d1 has two endpoints
+        let d1_endpoints = blobs.get(&d1).unwrap();
+        assert_eq!(d1_endpoints.len(), 2);
+        assert!(d1_endpoints.contains_key("worker-a:50081"));
+        assert!(d1_endpoints.contains_key("worker-b:50081"));
+
+        // d2 has one endpoint
+        let d2_endpoints = blobs.get(&d2).unwrap();
+        assert_eq!(d2_endpoints.len(), 1);
+        assert!(d2_endpoints.contains_key("worker-a:50081"));
+    }
+
+    #[test]
+    fn test_re_registration_updates_timestamp() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+
+        map.register_blobs("worker-a", &[d1]);
+        let ts1 = *map
+            .blobs_map()
+            .get(&d1)
+            .unwrap()
+            .get("worker-a")
+            .unwrap();
+
+        // Spin until the clock advances (SystemTime resolution varies by OS).
+        loop {
+            if SystemTime::now() > ts1 {
+                break;
+            }
+        }
+
+        map.register_blobs("worker-a", &[d1]);
+        let ts2 = *map
+            .blobs_map()
+            .get(&d1)
+            .unwrap()
+            .get("worker-a")
+            .unwrap();
+
+        assert!(
+            ts2 > ts1,
+            "Expected re-registration to update timestamp: ts1={ts1:?}, ts2={ts2:?}"
+        );
+    }
+
+    #[test]
+    fn test_evict_all_blobs_removes_endpoint() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        map.register_blobs("worker-a", &[d1, d2]);
+        assert_eq!(map.endpoint_count(), 1);
+
+        map.evict_blobs("worker-a", &[d1, d2]);
+
+        assert_eq!(map.endpoint_count(), 0);
+        assert_eq!(map.digest_count(), 0);
+        assert!(map.lookup_workers(&d1).is_empty());
+        assert!(map.lookup_workers(&d2).is_empty());
+        // endpoint_blobs should be fully cleaned up
+        assert!(map.all_endpoints().is_empty());
+    }
+
+    #[test]
+    fn test_partial_eviction_preserves_remaining() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+        let d3 = DigestInfo::new([3u8; 32], 300);
+
+        map.register_blobs("worker-a", &[d1, d2, d3]);
+        assert_eq!(map.digest_count(), 3);
+        assert_eq!(map.endpoint_count(), 1);
+
+        map.evict_blobs("worker-a", &[d1]);
+
+        assert!(map.lookup_workers(&d1).is_empty());
+        assert_eq!(map.lookup_workers(&d2), vec![Arc::from("worker-a")]);
+        assert_eq!(map.lookup_workers(&d3), vec![Arc::from("worker-a")]);
+        assert_eq!(map.digest_count(), 2);
+        assert_eq!(map.endpoint_count(), 1);
+    }
+
+    #[test]
+    fn test_evict_unknown_digest_is_noop() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        map.register_blobs("worker-a", &[d1]);
+
+        // Evict a digest that was never registered — should not panic.
+        map.evict_blobs("worker-a", &[d2]);
+
+        assert_eq!(map.lookup_workers(&d1), vec![Arc::from("worker-a")]);
+        assert_eq!(map.endpoint_count(), 1);
+        assert_eq!(map.digest_count(), 1);
+    }
+
+    #[test]
+    fn test_complex_multi_endpoint_topology() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+        let d3 = DigestInfo::new([3u8; 32], 300);
+        let d4 = DigestInfo::new([4u8; 32], 400);
+        let d5 = DigestInfo::new([5u8; 32], 500);
+
+        map.register_blobs("worker-a", &[d1, d2, d3]);
+        map.register_blobs("worker-b", &[d2, d3, d4]);
+        map.register_blobs("worker-c", &[d4, d5]);
+
+        assert_eq!(map.digest_count(), 5);
+        assert_eq!(map.endpoint_count(), 3);
+
+        // D2 on both worker-a and worker-b
+        let d2_workers = map.lookup_workers(&d2);
+        assert_eq!(d2_workers.len(), 2);
+        assert!(d2_workers.contains(&Arc::from("worker-a")));
+        assert!(d2_workers.contains(&Arc::from("worker-b")));
+
+        // Remove worker-b
+        map.remove_endpoint("worker-b");
+
+        assert_eq!(map.endpoint_count(), 2);
+
+        // D2 still on worker-a
+        let d2_workers = map.lookup_workers(&d2);
+        assert_eq!(d2_workers.len(), 1);
+        assert!(d2_workers.contains(&Arc::from("worker-a")));
+
+        // D4 still on worker-c
+        let d4_workers = map.lookup_workers(&d4);
+        assert_eq!(d4_workers.len(), 1);
+        assert!(d4_workers.contains(&Arc::from("worker-c")));
+
+        // D3 only on worker-a now
+        let d3_workers = map.lookup_workers(&d3);
+        assert_eq!(d3_workers.len(), 1);
+        assert!(d3_workers.contains(&Arc::from("worker-a")));
+
+        // D1 still on worker-a, D5 still on worker-c
+        assert_eq!(map.lookup_workers(&d1).len(), 1);
+        assert_eq!(map.lookup_workers(&d5).len(), 1);
+        assert_eq!(map.digest_count(), 5);
+    }
+
+    #[test]
+    fn test_digest_count_and_endpoint_count_consistency() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+        let d3 = DigestInfo::new([3u8; 32], 300);
+
+        // Step 1: Empty map.
+        assert_eq!(map.digest_count(), 0);
+        assert_eq!(map.endpoint_count(), 0);
+
+        // Step 2: Register d1, d2 on worker-a.
+        map.register_blobs("worker-a", &[d1, d2]);
+        assert_eq!(map.digest_count(), 2);
+        assert_eq!(map.endpoint_count(), 1);
+
+        // Step 3: Register d2, d3 on worker-b (d2 shared).
+        map.register_blobs("worker-b", &[d2, d3]);
+        assert_eq!(map.digest_count(), 3);
+        assert_eq!(map.endpoint_count(), 2);
+
+        // Step 4: Evict d1 from worker-a (d1 disappears entirely).
+        map.evict_blobs("worker-a", &[d1]);
+        assert_eq!(map.digest_count(), 2);
+        assert_eq!(map.endpoint_count(), 2);
+
+        // Step 5: Evict d2 from worker-a (d2 still on worker-b).
+        map.evict_blobs("worker-a", &[d2]);
+        assert_eq!(map.digest_count(), 2); // d2 and d3 remain
+        assert_eq!(map.endpoint_count(), 1); // worker-a removed (empty)
+
+        // Step 6: Remove worker-b entirely.
+        map.remove_endpoint("worker-b");
+        assert_eq!(map.digest_count(), 0);
+        assert_eq!(map.endpoint_count(), 0);
+    }
+
+    #[test]
+    fn test_lookup_workers_with_timestamps() {
+        let mut map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+
+        map.register_blobs("worker-a:50081", &[d1]);
+        map.register_blobs("worker-b:50081", &[d1]);
+
+        let workers_with_ts = map.lookup_workers_with_timestamps(&d1);
+        assert_eq!(
+            workers_with_ts.len(),
+            2,
+            "Expected 2 endpoints with timestamps"
+        );
+
+        // Both timestamps should be non-UNIX_EPOCH (i.e., set to SystemTime::now()).
+        for (endpoint, ts) in &workers_with_ts {
+            assert!(
+                *ts > std::time::UNIX_EPOCH,
+                "Expected valid timestamp for {endpoint}, got {ts:?}"
+            );
+        }
+
+        // Verify endpoint names match.
+        let endpoints: Vec<&str> = workers_with_ts.iter().map(|(e, _)| &**e).collect();
+        assert!(
+            endpoints.contains(&"worker-a:50081"),
+            "Expected worker-a:50081 in results"
+        );
+        assert!(
+            endpoints.contains(&"worker-b:50081"),
+            "Expected worker-b:50081 in results"
+        );
+    }
+
+    #[test]
+    fn test_lookup_workers_with_timestamps_unknown_digest() {
+        let map = BlobLocalityMap::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let result = map.lookup_workers_with_timestamps(&d1);
+        assert!(
+            result.is_empty(),
+            "Expected empty result for unknown digest"
+        );
+    }
+}
diff --git a/nativelink-util/src/buf_channel.rs b/nativelink-util/src/buf_channel.rs
index ad3b8c288..e26a0ffdd 100644
--- a/nativelink-util/src/buf_channel.rs
+++ b/nativelink-util/src/buf_channel.rs
@@ -27,18 +27,37 @@ use tracing::warn;
 
 const ZERO_DATA: Bytes = Bytes::new();
 
+/// Default channel capacity: 64 slots. At 256KiB chunks this gives 16MiB of
+/// buffered data, which is sufficient for most workloads.
+const DEFAULT_BUF_CHANNEL_CAPACITY: usize = 64;
+
 /// Create a channel pair that can be used to transport buffer objects around to
 /// different components. This wrapper is used because the streams give some
 /// utility like managing EOF in a more friendly way, ensure if no EOF is received
 /// it will send an error to the receiver channel before shutting down and count
 /// the number of bytes sent.
+///
+/// Uses the default capacity of 64 slots. For high-throughput or
+/// latency-sensitive paths, use [`make_buf_channel_pair_with_size`] instead.
 #[must_use]
 pub fn make_buf_channel_pair() -> (DropCloserWriteHalf, DropCloserReadHalf) {
-    // We allow up to 2 items in the buffer at any given time. There is no major
-    // reason behind this magic number other than thinking it will be nice to give
-    // a little time for another thread to wake up and consume data if another
-    // thread is pumping large amounts of data into the channel.
-    let (tx, rx) = mpsc::channel(2);
+    make_buf_channel_pair_with_size(DEFAULT_BUF_CHANNEL_CAPACITY)
+}
+
+/// Like [`make_buf_channel_pair`], but with a caller-specified channel capacity.
+///
+/// The `capacity` parameter controls how many chunks can be buffered before the
+/// producer is forced to wait. At 256KiB chunks (the default `read_buffer_size`),
+/// each slot represents ~256KiB of buffered data, so:
+///
+/// -  64 slots = ~16MiB (default, good for most workloads)
+/// - 128 slots = ~32MiB (suitable for dual-store writes in FastSlowStore)
+/// - 256 slots = ~64MiB (suitable for high-throughput streaming at 10Gbps+)
+#[must_use]
+pub fn make_buf_channel_pair_with_size(
+    capacity: usize,
+) -> (DropCloserWriteHalf, DropCloserReadHalf) {
+    let (tx, rx) = mpsc::channel(capacity);
     let eof_sent = Arc::new(AtomicBool::new(false));
     (
         DropCloserWriteHalf {
@@ -368,7 +387,9 @@ impl DropCloserReadHalf {
             }
             chunk
         };
-        let mut output = BytesMut::new();
+        // If we get here, first_chunk was not enough and there is more data.
+        // Fall back to concatenation for multiple chunks.
+        let mut output = BytesMut::with_capacity(size.min(first_chunk.len() * 2));
         output.extend_from_slice(&first_chunk);
 
         loop {
@@ -396,20 +417,41 @@ impl DropCloserReadHalf {
 impl Stream for DropCloserReadHalf {
     type Item = Result<Bytes, std::io::Error>;
 
-    // TODO(palfrey) This is not very efficient as we are creating a new future on every
-    // poll() call. It might be better to use a waker.
     fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        Box::pin(self.recv())
-            .as_mut()
-            .poll(cx)
-            .map(|result| match result {
+        // First drain any queued data (e.g., from try_reset_stream or peek).
+        if let Some(chunk) = self.queued_data.pop_front() {
+            // queued_data may contain empty bytes representing EOF.
+            if chunk.is_empty() {
+                return Poll::Ready(None);
+            }
+            return Poll::Ready(Some(Ok(chunk)));
+        }
+
+        // Check for previous errors.
+        if let Some(err) = &self.last_err {
+            return Poll::Ready(Some(Err(err.clone().to_std_err())));
+        }
+
+        // Poll the underlying mpsc channel directly to avoid heap allocation.
+        match self.rx.poll_recv(cx) {
+            Poll::Ready(Some(bytes)) => match self.recv_inner(bytes) {
                 Ok(bytes) => {
                     if bytes.is_empty() {
-                        return None;
+                        Poll::Ready(None) // EOF
+                    } else {
+                        Poll::Ready(Some(Ok(bytes)))
                     }
-                    Some(Ok(bytes))
                 }
-                Err(e) => Some(Err(e.to_std_err())),
-            })
+                Err(e) => Poll::Ready(Some(Err(e.to_std_err()))),
+            },
+            Poll::Ready(None) => {
+                // Channel closed — treat as EOF or error depending on eof_sent flag.
+                match self.recv_inner(ZERO_DATA) {
+                    Ok(_) => Poll::Ready(None),
+                    Err(e) => Poll::Ready(Some(Err(e.to_std_err()))),
+                }
+            }
+            Poll::Pending => Poll::Pending,
+        }
     }
 }
diff --git a/nativelink-util/src/connection_manager.rs b/nativelink-util/src/connection_manager.rs
index 26d9f9553..762f63c63 100644
--- a/nativelink-util/src/connection_manager.rs
+++ b/nativelink-util/src/connection_manager.rs
@@ -111,7 +111,7 @@ struct ConnectionManagerWorker {
 /// The maximum number of queued requests to obtain a connection from the
 /// worker before applying back pressure to the requestor.  It makes sense to
 /// keep this small since it has to wait for a response anyway.
-const WORKER_BACKLOG: usize = 8;
+const WORKER_BACKLOG: usize = 64;
 
 impl ConnectionManager {
     /// Create a connection manager that creates a balance list between a given
diff --git a/nativelink-util/src/digest_hasher.rs b/nativelink-util/src/digest_hasher.rs
index 61d1269c2..ed695c70a 100644
--- a/nativelink-util/src/digest_hasher.rs
+++ b/nativelink-util/src/digest_hasher.rs
@@ -26,10 +26,10 @@ use nativelink_proto::build::bazel::remote::execution::v2::digest_function::Valu
 use opentelemetry::context::Context;
 use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
-use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt};
+use tokio::io::{AsyncRead, AsyncReadExt};
 
 use crate::common::DigestInfo;
-use crate::{fs, spawn_blocking};
+use crate::fs;
 
 static DEFAULT_DIGEST_HASHER_FUNC: OnceLock<DigestHasherFunc> = OnceLock::new();
 
@@ -229,15 +229,27 @@ pub struct DigestHasherImpl {
 }
 
 impl DigestHasherImpl {
-    #[inline]
     async fn hash_file(
-        &mut self,
-        mut file: fs::FileSlot,
+        self,
+        file: fs::FileSlot,
     ) -> Result<(DigestInfo, fs::FileSlot), Error> {
-        let digest = self
-            .compute_from_reader(&mut file)
-            .await
-            .err_tip(|| "In digest_for_file")?;
+        let (mut hasher, file) = crate::spawn_blocking!("hash_file", move || {
+            let mut f = file;
+            let mut hasher = self;
+            let mut buf = vec![0u8; fs::DEFAULT_READ_BUFF_SIZE];
+            loop {
+                let n = std::io::Read::read(f.as_std_mut(), &mut buf)
+                    .err_tip(|| "Read error in hash_file")?;
+                if n == 0 {
+                    break;
+                }
+                DigestHasher::update(&mut hasher, &buf[..n]);
+            }
+            Ok::<_, Error>((hasher, f))
+        })
+        .await
+        .map_err(|e| make_err!(Code::Internal, "hash_file spawn failed: {e:?}"))??;
+        let digest = hasher.finalize_digest();
         Ok((digest, file))
     }
 }
@@ -264,14 +276,12 @@ impl DigestHasher for DigestHasherImpl {
     }
 
     async fn digest_for_file(
-        mut self,
+        self,
         file_path: impl AsRef<std::path::Path>,
         mut file: fs::FileSlot,
         size_hint: Option<u64>,
     ) -> Result<(DigestInfo, fs::FileSlot), Error> {
-        let file_position = file
-            .stream_position()
-            .await
+        let file_position = std::io::Seek::stream_position(file.as_std_mut())
             .err_tip(|| "Couldn't get stream position in digest_for_file")?;
         if file_position != 0 {
             return self.hash_file(file).await;
@@ -287,17 +297,26 @@ impl DigestHasher for DigestHasherImpl {
         match self.hash_func_impl {
             DigestHasherFuncImpl::Sha256(_) => self.hash_file(file).await,
             DigestHasherFuncImpl::Blake3(mut hasher) => {
-                spawn_blocking!("digest_for_file", move || {
-                    hasher.update_mmap(file_path).map_err(|e| {
-                        make_err!(Code::Internal, "Error in blake3's update_mmap: {e:?}")
-                    })?;
-                    Result::<_, Error>::Ok((
-                        DigestInfo::new(hasher.finalize().into(), hasher.count()),
-                        file,
-                    ))
-                })
-                .await
-                .err_tip(|| "Could not spawn blocking task in digest_for_file")?
+                // Use rayon::spawn + oneshot instead of spawn_blocking so we
+                // don't hold a tokio blocking thread while rayon's thread pool
+                // does the parallel hashing work.
+                let (tx, rx) = tokio::sync::oneshot::channel();
+                rayon::spawn(move || {
+                    let result = match hasher.update_mmap_rayon(file_path) {
+                        Ok(_) => Ok((
+                            DigestInfo::new(hasher.finalize().into(), hasher.count()),
+                            file,
+                        )),
+                        Err(e) => Err(make_err!(
+                            Code::Internal,
+                            "Error in blake3's update_mmap_rayon: {e:?}"
+                        )),
+                    };
+                    drop(tx.send(result));
+                });
+                rx.await.map_err(|_| {
+                    make_err!(Code::Internal, "Rayon task dropped in digest_for_file")
+                })?
             }
         }
     }
diff --git a/nativelink-util/src/evicting_map.rs b/nativelink-util/src/evicting_map.rs
index e779f38b6..5e5c5aa23 100644
--- a/nativelink-util/src/evicting_map.rs
+++ b/nativelink-util/src/evicting_map.rs
@@ -23,15 +23,16 @@ use core::pin::Pin;
 use std::collections::BTreeSet;
 use std::sync::Arc;
 
+use parking_lot::Mutex;
 use futures::StreamExt;
 use futures::stream::FuturesUnordered;
 use lru::LruCache;
 use nativelink_config::stores::EvictionPolicy;
 use nativelink_metric::MetricsComponent;
-use parking_lot::Mutex;
 use serde::{Deserialize, Serialize};
-use tracing::{debug, info};
+use tracing::{debug, warn};
 
+use crate::background_spawn;
 use crate::instant_wrapper::InstantWrapper;
 use crate::metrics_utils::{Counter, CounterWithTime};
 
@@ -89,11 +90,13 @@ impl<T: LenEntry + Send + Sync> LenEntry for Arc<T> {
     }
 }
 
-// Callback to be called when the EvictingMap removes an item
-// either via eviction or direct deletion. This will be called with
-// whatever key type the EvictingMap uses.
-pub trait RemoveItemCallback<Q>: Debug + Send + Sync {
+// Callback invoked when the EvictingMap inserts or removes an item.
+pub trait ItemCallback<Q>: Debug + Send + Sync {
     fn callback(&self, store_key: &Q) -> Pin<Box<dyn Future<Output = ()> + Send>>;
+
+    /// Called synchronously when a new item is inserted.
+    /// Default is a no-op.
+    fn on_insert(&self, _store_key: &Q, _size: u64) {}
 }
 
 #[derive(Debug, MetricsComponent)]
@@ -101,7 +104,7 @@ struct State<
     K: Ord + Hash + Eq + Clone + Debug + Send + Borrow<Q>,
     Q: Ord + Hash + Eq + Debug,
     T: LenEntry + Debug + Send,
-    C: RemoveItemCallback<Q>,
+    C: ItemCallback<Q>,
 > {
     lru: LruCache<K, EvictionItem<T>>,
     btree: Option<BTreeSet<K>>,
@@ -120,7 +123,7 @@ struct State<
     lifetime_inserted_bytes: Counter,
 
     _key_type: PhantomData<Q>,
-    remove_callbacks: Vec<C>,
+    item_callbacks: Vec<C>,
 }
 
 type RemoveFuture = Pin<Box<dyn Future<Output = ()> + Send>>;
@@ -129,7 +132,7 @@ impl<
     K: Ord + Hash + Eq + Clone + Debug + Send + Sync + Borrow<Q>,
     Q: Ord + Hash + Eq + Debug + Sync,
     T: LenEntry + Debug + Sync + Send,
-    C: RemoveItemCallback<Q>,
+    C: ItemCallback<Q>,
 > State<K, Q, T, C>
 {
     /// Removes an item from the cache and returns the data for deferred cleanup.
@@ -157,7 +160,7 @@ impl<
         }
 
         let callbacks = self
-            .remove_callbacks
+            .item_callbacks
             .iter()
             .map(|callback| callback.callback(key))
             .collect();
@@ -168,6 +171,10 @@ impl<
 
     /// Inserts a new item into the cache. If the key already exists, the old item is returned
     /// for deferred cleanup.
+    ///
+    /// Note: This method does NOT fire `on_insert` callbacks. The caller is
+    /// responsible for collecting the key+size pairs and firing callbacks
+    /// after releasing the State mutex to avoid nested locking.
     #[must_use]
     fn put(&mut self, key: &K, eviction_item: EvictionItem<T>) -> Option<(T, Vec<RemoveFuture>)>
     where
@@ -183,18 +190,20 @@ impl<
             .map(|old_item| self.remove(key.borrow(), &old_item, true))
     }
 
-    fn add_remove_callback(&mut self, callback: C) {
-        self.remove_callbacks.push(callback);
+    fn add_item_callback(&mut self, callback: C) {
+        self.item_callbacks.push(callback);
     }
 }
 
 #[derive(Debug, Clone, Copy)]
-pub struct NoopRemove;
+pub struct NoopCallback;
 
-impl<Q> RemoveItemCallback<Q> for NoopRemove {
+impl<Q> ItemCallback<Q> for NoopCallback {
     fn callback(&self, _store_key: &Q) -> Pin<Box<dyn Future<Output = ()> + Send>> {
         Box::pin(async {})
     }
+
+    fn on_insert(&self, _store_key: &Q, _size: u64) {}
 }
 
 #[derive(Debug, MetricsComponent)]
@@ -203,7 +212,7 @@ pub struct EvictingMap<
     Q: Ord + Hash + Eq + Debug,
     T: LenEntry + Debug + Send,
     I: InstantWrapper,
-    C: RemoveItemCallback<Q> = NoopRemove,
+    C: ItemCallback<Q> = NoopCallback,
 > {
     #[metric]
     state: Mutex<State<K, Q, T, C>>,
@@ -224,7 +233,7 @@ where
     Q: Ord + Hash + Eq + Debug + Sync,
     T: LenEntry + Debug + Clone + Send + Sync,
     I: InstantWrapper,
-    C: RemoveItemCallback<Q>,
+    C: ItemCallback<Q>,
 {
     pub fn new(config: &EvictionPolicy, anchor_time: I) -> Self {
         Self {
@@ -240,7 +249,7 @@ where
                 replaced_items: CounterWithTime::default(),
                 lifetime_inserted_bytes: Counter::default(),
                 _key_type: PhantomData,
-                remove_callbacks: Vec::new(),
+                item_callbacks: Vec::new(),
             }),
             anchor_time,
             max_bytes: config.max_bytes as u64,
@@ -265,7 +274,7 @@ where
     /// and return the number of items that were processed.
     /// The `handler` function should return `true` to continue processing the next item
     /// or `false` to stop processing.
-    pub fn range<F>(&self, prefix_range: impl RangeBounds<Q> + Send, mut handler: F) -> u64
+    pub async fn range<F>(&self, prefix_range: impl RangeBounds<Q> + Send, mut handler: F) -> u64
     where
         F: FnMut(&K, &T) -> bool + Send,
         K: Ord,
@@ -291,7 +300,7 @@ where
 
     /// Returns the number of key-value pairs that are currently in the the cache.
     /// Function is not for production code paths.
-    pub fn len_for_test(&self) -> usize {
+    pub async fn len_for_test(&self) -> usize {
         self.state.lock().lru.len()
     }
 
@@ -335,6 +344,9 @@ where
             self.max_bytes
         };
 
+        let elapsed_seconds =
+            i32::try_from(self.anchor_time.elapsed().as_secs()).unwrap_or(i32::MAX);
+
         let mut items_to_unref = Vec::new();
         let mut removal_futures = Vec::new();
 
@@ -343,7 +355,13 @@ where
                 .lru
                 .pop_lru()
                 .expect("Tried to peek() then pop() but failed");
-            debug!(?key, "Evicting",);
+            let age_secs = elapsed_seconds.saturating_sub(eviction_item.seconds_since_anchor);
+            let size = eviction_item.data.len();
+            if age_secs < 120 {
+                warn!(?key, age_secs, size, "Evicting recently-inserted item");
+            } else {
+                debug!(?key, age_secs, size, "Evicting");
+            }
             let (data, futures) = state.remove(key.borrow(), &eviction_item, false);
             items_to_unref.push(data);
             removal_futures.extend(futures.into_iter());
@@ -385,7 +403,16 @@ where
         R: Borrow<Q> + Send,
     {
         let (removal_futures, data_to_unref) = {
+            let lock_start = std::time::Instant::now();
             let mut state = self.state.lock();
+            let lock_wait = lock_start.elapsed();
+            if lock_wait.as_millis() > 1 {
+                warn!(
+                    lock_wait_ms = lock_wait.as_millis(),
+                    op = "sizes_for_keys",
+                    "EvictingMap: lock contention",
+                );
+            }
 
             let lru_len = state.lru.len();
             let mut data_to_unref = Vec::new();
@@ -404,7 +431,15 @@ where
                         if self.should_evict(lru_len, entry, 0, u64::MAX) {
                             *result = None;
                             if let Some((key, eviction_item)) = state.lru.pop_entry(key.borrow()) {
-                                info!(?key, "Item expired, evicting");
+                                let elapsed_seconds =
+                                    i32::try_from(self.anchor_time.elapsed().as_secs()).unwrap_or(i32::MAX);
+                                let age_secs = elapsed_seconds.saturating_sub(eviction_item.seconds_since_anchor);
+                                let size = eviction_item.data.len();
+                                if age_secs < 120 {
+                                    warn!(?key, age_secs, size, "Expired recently-inserted item");
+                                } else {
+                                    debug!(?key, age_secs, size, "Item expired, evicting");
+                                }
                                 let (data, futures) =
                                     state.remove(key.borrow(), &eviction_item, false);
                                 // Store data for later unref - we can't drop state here as we're still iterating
@@ -426,50 +461,142 @@ where
             (removal_futures, data_to_unref)
         };
 
-        // Perform the async callbacks outside of the lock
-        let mut callbacks: FuturesUnordered<_> = removal_futures.into_iter().collect();
-        while callbacks.next().await.is_some() {}
-        let mut callbacks: FuturesUnordered<_> =
-            data_to_unref.iter().map(LenEntry::unref).collect();
-        while callbacks.next().await.is_some() {}
+        // Fire-and-forget eviction cleanup in background.
+        if !removal_futures.is_empty() || !data_to_unref.is_empty() {
+            drop(background_spawn!("evicting_map_sizes_cleanup", async move {
+                let mut callbacks: FuturesUnordered<_> = removal_futures.into_iter().collect();
+                while callbacks.next().await.is_some() {}
+                let mut callbacks: FuturesUnordered<_> =
+                    data_to_unref.iter().map(LenEntry::unref).collect();
+                while callbacks.next().await.is_some() {}
+            }));
+        }
     }
 
     pub async fn get(&self, key: &Q) -> Option<T> {
-        // Fast path: Check if we need eviction before acquiring lock for eviction
-        let needs_eviction = {
-            let state = self.state.lock();
+        let lock_start = std::time::Instant::now();
+        let mut state = self.state.lock();
+        let lock_wait = lock_start.elapsed();
+        if lock_wait.as_millis() > 1 {
+            warn!(
+                lock_wait_ms = lock_wait.as_millis(),
+                op = "get",
+                "EvictingMap: lock contention",
+            );
+        }
+
+        // Perform eviction if needed, collecting items for background cleanup.
+        let eviction_cleanup = {
             if let Some((_, peek_entry)) = state.lru.peek_lru() {
-                self.should_evict(
+                if self.should_evict(
                     state.lru.len(),
                     peek_entry,
                     state.sum_store_size,
                     self.max_bytes,
-                )
+                ) {
+                    let (items_to_unref, removal_futures) = self.evict_items(&mut *state);
+                    if !removal_futures.is_empty() || !items_to_unref.is_empty() {
+                        Some((items_to_unref, removal_futures))
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
             } else {
-                false
+                None
             }
         };
 
-        // Perform eviction if needed
-        if needs_eviction {
-            let (items_to_unref, removal_futures) = {
-                let mut state = self.state.lock();
-                self.evict_items(&mut *state)
-            };
-            // Unref items outside of lock
-            let mut callbacks: FuturesUnordered<_> = removal_futures.into_iter().collect();
-            while callbacks.next().await.is_some() {}
-            let mut callbacks: FuturesUnordered<_> =
-                items_to_unref.iter().map(LenEntry::unref).collect();
-            while callbacks.next().await.is_some() {}
+        // Get the item while still holding the lock.
+        let result = state.lru.get_mut(key.borrow()).map(|entry| {
+            entry.seconds_since_anchor =
+                i32::try_from(self.anchor_time.elapsed().as_secs()).unwrap_or(i32::MAX);
+            entry.data.clone()
+        });
+
+        drop(state);
+
+        // Fire-and-forget eviction cleanup in background.
+        if let Some((items_to_unref, removal_futures)) = eviction_cleanup {
+            drop(background_spawn!("evicting_map_get_cleanup", async move {
+                let mut futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
+                while futures.next().await.is_some() {}
+                let mut callbacks: FuturesUnordered<_> =
+                    items_to_unref.iter().map(LenEntry::unref).collect();
+                while callbacks.next().await.is_some() {}
+            }));
         }
 
-        // Now get the item
+        result
+    }
+
+    /// Retrieves multiple entries in a single lock acquisition, reducing
+    /// contention compared to calling `get()` in a loop.
+    pub async fn get_many<'b, Iter>(&self, keys: Iter) -> Vec<Option<T>>
+    where
+        Iter: IntoIterator<Item = &'b Q>,
+        Q: 'b,
+    {
+        let lock_start = std::time::Instant::now();
         let mut state = self.state.lock();
-        let entry = state.lru.get_mut(key.borrow())?;
-        entry.seconds_since_anchor =
-            i32::try_from(self.anchor_time.elapsed().as_secs()).unwrap_or(i32::MAX);
-        Some(entry.data.clone())
+        let lock_wait = lock_start.elapsed();
+        if lock_wait.as_millis() > 1 {
+            warn!(
+                lock_wait_ms = lock_wait.as_millis(),
+                op = "get_many",
+                "EvictingMap: lock contention",
+            );
+        }
+
+        // Perform eviction if needed, collecting items for background cleanup.
+        let eviction_cleanup = {
+            if let Some((_, peek_entry)) = state.lru.peek_lru() {
+                if self.should_evict(
+                    state.lru.len(),
+                    peek_entry,
+                    state.sum_store_size,
+                    self.max_bytes,
+                ) {
+                    let (items_to_unref, removal_futures) = self.evict_items(&mut *state);
+                    if !removal_futures.is_empty() || !items_to_unref.is_empty() {
+                        Some((items_to_unref, removal_futures))
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
+            } else {
+                None
+            }
+        };
+
+        let now = i32::try_from(self.anchor_time.elapsed().as_secs()).unwrap_or(i32::MAX);
+        let results: Vec<Option<T>> = keys
+            .into_iter()
+            .map(|key: &'b Q| {
+                state.lru.get_mut(key.borrow()).map(|entry| {
+                    entry.seconds_since_anchor = now;
+                    entry.data.clone()
+                })
+            })
+            .collect();
+
+        drop(state);
+
+        // Fire-and-forget eviction cleanup in background.
+        if let Some((items_to_unref, removal_futures)) = eviction_cleanup {
+            drop(background_spawn!("evicting_map_get_many_cleanup", async move {
+                let mut futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
+                while futures.next().await.is_some() {}
+                let mut callbacks: FuturesUnordered<_> =
+                    items_to_unref.iter().map(LenEntry::unref).collect();
+                while callbacks.next().await.is_some() {}
+            }));
+        }
+
+        results
     }
 
     /// Returns the replaced item if any.
@@ -487,23 +614,58 @@ where
 
     /// Returns the replaced item if any.
     pub async fn insert_with_time(&self, key: K, data: T, seconds_since_anchor: i32) -> Option<T> {
-        let (items_to_unref, removal_futures) = {
+        let (replaced_items, evicted_items, removal_futures, insert_notifications) = {
+            let lock_start = std::time::Instant::now();
             let mut state = self.state.lock();
+            let lock_wait = lock_start.elapsed();
+            if lock_wait.as_millis() > 1 {
+                warn!(
+                    lock_wait_ms = lock_wait.as_millis(),
+                    op = "insert",
+                    "EvictingMap: lock contention",
+                );
+            }
             self.inner_insert_many(&mut state, [(key, data)], seconds_since_anchor)
         };
+        // State lock released. Fire insert callbacks outside the critical section.
+        if !insert_notifications.is_empty() {
+            let state = self.state.lock();
+            for (key, size) in &insert_notifications {
+                for cb in &state.item_callbacks {
+                    cb.on_insert(key.borrow(), *size);
+                }
+            }
+        }
 
-        let mut futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
-        while futures.next().await.is_some() {}
+        // Replaced items share the same key (and thus content path) as the
+        // new insert. Their unrefs MUST complete before the caller continues
+        // to rename the new file into the same path.
+        let result = if !replaced_items.is_empty() {
+            let futures: FuturesUnordered<_> = replaced_items
+                .into_iter()
+                .map(|item| async move {
+                    item.unref().await;
+                    item
+                })
+                .collect();
+            futures.collect::<Vec<_>>().await.into_iter().next()
+        } else {
+            None
+        };
 
-        // Unref items outside of lock
-        let futures: FuturesUnordered<_> = items_to_unref
-            .into_iter()
-            .map(|item| async move {
-                item.unref().await;
-                item
-            })
-            .collect();
-        futures.collect::<Vec<_>>().await.into_iter().next()
+        // Fire-and-forget eviction cleanup (different keys, no path conflict)
+        // and removal callbacks (cache invalidation, protected by stale-positive handling).
+        if !removal_futures.is_empty() || !evicted_items.is_empty() {
+            drop(background_spawn!("evicting_map_insert_cleanup", async move {
+                let mut futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
+                while futures.next().await.is_some() {}
+                let mut callbacks: FuturesUnordered<_> =
+                    evicted_items.iter().map(LenEntry::unref).collect();
+                while callbacks.next().await.is_some() {}
+            }));
+        }
+
+        result
     }
 
     /// Same as `insert()`, but optimized for multiple inserts.
@@ -522,20 +684,35 @@ where
             return Vec::new();
         }
 
-        let (items_to_unref, removal_futures) = {
+        let (replaced_items, evicted_items, removal_futures, insert_notifications) = {
+            let lock_start = std::time::Instant::now();
             let mut state = self.state.lock();
+            let lock_wait = lock_start.elapsed();
+            if lock_wait.as_millis() > 1 {
+                warn!(
+                    lock_wait_ms = lock_wait.as_millis(),
+                    op = "insert_many",
+                    "EvictingMap: lock contention",
+                );
+            }
             self.inner_insert_many(
                 &mut state,
                 inserts,
                 i32::try_from(self.anchor_time.elapsed().as_secs()).unwrap_or(i32::MAX),
             )
         };
+        // State lock released. Fire insert callbacks outside the critical section.
+        if !insert_notifications.is_empty() {
+            let state = self.state.lock();
+            for (key, size) in &insert_notifications {
+                for cb in &state.item_callbacks {
+                    cb.on_insert(key.borrow(), *size);
+                }
+            }
+        }
 
-        let mut futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
-        while futures.next().await.is_some() {}
-
-        // Unref items outside of lock
-        items_to_unref
+        // Replaced items share the same key/path — must await their unrefs.
+        let result: Vec<T> = replaced_items
             .into_iter()
             .map(|item| async move {
                 item.unref().await;
@@ -543,15 +720,39 @@ where
             })
             .collect::<FuturesUnordered<_>>()
             .collect::<Vec<_>>()
-            .await
+            .await;
+
+        // Fire-and-forget eviction cleanup (different keys, no path conflict).
+        if !removal_futures.is_empty() || !evicted_items.is_empty() {
+            drop(background_spawn!("evicting_map_insert_many_cleanup", async move {
+                let mut futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
+                while futures.next().await.is_some() {}
+                let mut callbacks: FuturesUnordered<_> =
+                    evicted_items.iter().map(LenEntry::unref).collect();
+                while callbacks.next().await.is_some() {}
+            }));
+        }
+
+        result
     }
 
+    /// Returns `(replaced_items, evicted_items, removal_futures, insert_notifications)`.
+    /// - `replaced_items`: items that were replaced by new inserts (same key).
+    /// - `evicted_items`: items evicted due to size/age/count limits.
+    /// - `removal_futures`: callbacks from item_callbacks for all removed items.
+    /// - `insert_notifications`: (key, size) pairs for firing on_insert callbacks
+    ///   outside the State mutex critical section.
+    ///
+    /// Callers should fire-and-forget the eviction cleanup (evicted_items unrefs
+    /// + removal_futures) via `background_spawn!` to avoid blocking the caller.
+    /// Callers MUST fire on_insert callbacks for each insert_notification after
+    /// releasing the State mutex to avoid nested locking.
     fn inner_insert_many<It>(
         &self,
         state: &mut State<K, Q, T, C>,
         inserts: It,
         seconds_since_anchor: i32,
-    ) -> (Vec<T>, Vec<RemoveFuture>)
+    ) -> (Vec<T>, Vec<T>, Vec<RemoveFuture>, Vec<(K, u64)>)
     where
         It: IntoIterator<Item = (K, T)> + Send,
         // Note: It's not enough to have the inserts themselves be Send. The
@@ -560,6 +761,7 @@ where
     {
         let mut replaced_items = Vec::new();
         let mut removal_futures = Vec::new();
+        let mut insert_notifications = Vec::new();
         for (key, data) in inserts {
             let new_item_size = data.len();
             let eviction_item = EvictionItem {
@@ -573,22 +775,28 @@ where
             }
             state.sum_store_size += new_item_size;
             state.lifetime_inserted_bytes.add(new_item_size);
+            insert_notifications.push((key, new_item_size));
         }
 
         // Perform eviction after all insertions
-        let (items_to_unref, futures) = self.evict_items(state);
+        let (evicted_items, futures) = self.evict_items(state);
         removal_futures.extend(futures);
 
-        // Note: We cannot drop the state lock here since we're borrowing it,
-        // but the caller will handle unreffing these items after releasing the lock
-        replaced_items.extend(items_to_unref);
-
-        (replaced_items, removal_futures)
+        (replaced_items, evicted_items, removal_futures, insert_notifications)
     }
 
     pub async fn remove(&self, key: &Q) -> bool {
-        let (items_to_unref, removed_item, removal_futures) = {
+        let (evicted_items, removed_item, removal_futures) = {
+            let lock_start = std::time::Instant::now();
             let mut state = self.state.lock();
+            let lock_wait = lock_start.elapsed();
+            if lock_wait.as_millis() > 1 {
+                warn!(
+                    lock_wait_ms = lock_wait.as_millis(),
+                    op = "remove",
+                    "EvictingMap: lock contention",
+                );
+            }
 
             // First perform eviction
             let (evicted_items, mut removal_futures) = self.evict_items(&mut *state);
@@ -605,21 +813,25 @@ where
             (evicted_items, removed, removal_futures)
         };
 
-        let mut callbacks: FuturesUnordered<_> = removal_futures.into_iter().collect();
-        while callbacks.next().await.is_some() {}
-
-        // Unref evicted items outside of lock
-        let mut callbacks: FuturesUnordered<_> =
-            items_to_unref.iter().map(LenEntry::unref).collect();
-        while callbacks.next().await.is_some() {}
-
-        // Unref removed item if any
-        if let Some(item) = removed_item {
-            item.unref().await;
-            return true;
+        let was_removed = removed_item.is_some();
+
+        // Fire-and-forget all cleanup (evicted + removed + callbacks) in background.
+        let has_cleanup =
+            !removal_futures.is_empty() || !evicted_items.is_empty() || removed_item.is_some();
+        if has_cleanup {
+            drop(background_spawn!("evicting_map_remove_cleanup", async move {
+                let mut futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
+                while futures.next().await.is_some() {}
+                let mut callbacks: FuturesUnordered<_> = evicted_items
+                    .iter()
+                    .chain(removed_item.iter())
+                    .map(LenEntry::unref)
+                    .collect();
+                while callbacks.next().await.is_some() {}
+            }));
         }
 
-        false
+        was_removed
     }
 
     /// Same as `remove()`, but allows for a conditional to be applied to the
@@ -648,29 +860,46 @@ where
 
                 (evicted_items, removal_futures, removed_item)
             } else {
-                (vec![], vec![].into_iter().collect(), None)
+                return false;
             }
         };
 
-        // Perform the async callbacks outside of the lock
-        let mut removal_futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
-        while removal_futures.next().await.is_some() {}
+        let was_removed = removed_item.is_some();
+
+        // Fire-and-forget all cleanup in background.
+        let has_cleanup =
+            !removal_futures.is_empty() || !evicted_items.is_empty() || removed_item.is_some();
+        if has_cleanup {
+            drop(background_spawn!("evicting_map_remove_if_cleanup", async move {
+                let mut futures: FuturesUnordered<_> = removal_futures.into_iter().collect();
+                while futures.next().await.is_some() {}
+                let mut callbacks: FuturesUnordered<_> = evicted_items
+                    .iter()
+                    .chain(removed_item.iter())
+                    .map(LenEntry::unref)
+                    .collect();
+                while callbacks.next().await.is_some() {}
+            }));
+        }
 
-        // Unref evicted items
-        let mut callbacks: FuturesUnordered<_> =
-            evicted_items.iter().map(LenEntry::unref).collect();
-        while callbacks.next().await.is_some() {}
+        was_removed
+    }
 
-        // Unref removed item if any
-        if let Some(item) = removed_item {
-            item.unref().await;
-            true
-        } else {
-            false
-        }
+    pub fn add_item_callback(&self, callback: C) {
+        self.state.lock().add_item_callback(callback);
     }
 
-    pub fn add_remove_callback(&self, callback: C) {
-        self.state.lock().add_remove_callback(callback);
+    /// Returns all entries in the cache with their LRU timestamps as absolute
+    /// seconds since UNIX epoch. Each entry is (key, unix_timestamp_secs).
+    ///
+    /// This is a peek-only operation: it does NOT promote entries in the LRU.
+    pub fn get_all_entries_with_timestamps(&self) -> Vec<(K, i64)> {
+        let anchor_epoch = self.anchor_time.unix_timestamp() as i64;
+        let state = self.state.lock();
+        let mut result = Vec::with_capacity(state.lru.len());
+        result.extend(state.lru.iter().map(|(k, v)| {
+            (k.clone(), anchor_epoch + v.seconds_since_anchor as i64)
+        }));
+        result
     }
 }
diff --git a/nativelink-util/src/fs.rs b/nativelink-util/src/fs.rs
index 284d2ca58..015a5228a 100644
--- a/nativelink-util/src/fs.rs
+++ b/nativelink-util/src/fs.rs
@@ -12,36 +12,48 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use core::pin::Pin;
 use core::sync::atomic::{AtomicUsize, Ordering};
-use core::task::{Context, Poll};
 use std::fs::{Metadata, Permissions};
-use std::io::{IoSlice, Seek};
+use std::io::{Read, Seek, Write};
 use std::path::{Path, PathBuf};
 
+use bytes::{Bytes, BytesMut};
 use nativelink_error::{Code, Error, ResultExt, make_err};
 use rlimit::increase_nofile_limit;
 /// We wrap all `tokio::fs` items in our own wrapper so we can limit the number of outstanding
 /// open files at any given time. This will greatly reduce the chance we'll hit open file limit
 /// issues.
 pub use tokio::fs::DirEntry;
-use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncWrite, ReadBuf, SeekFrom, Take};
+use tokio::io::SeekFrom;
 use tokio::sync::{Semaphore, SemaphorePermit};
 use tracing::{error, info, trace, warn};
 
+use crate::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf};
 use crate::spawn_blocking;
 
 /// Default read buffer size when reading to/from disk.
-pub const DEFAULT_READ_BUFF_SIZE: usize = 0x4000;
+pub const DEFAULT_READ_BUFF_SIZE: usize = 64 * 1024;
 
 #[derive(Debug)]
 pub struct FileSlot {
     // We hold the permit because once it is dropped it goes back into the queue.
     _permit: SemaphorePermit<'static>,
-    inner: tokio::fs::File,
+    inner: std::fs::File,
 }
 
 impl FileSlot {
+    /// Returns a reference to the underlying `std::fs::File`.
+    #[inline]
+    pub fn as_std(&self) -> &std::fs::File {
+        &self.inner
+    }
+
+    /// Returns a mutable reference to the underlying `std::fs::File`.
+    #[inline]
+    pub fn as_std_mut(&mut self) -> &mut std::fs::File {
+        &mut self.inner
+    }
+
     /// Advise the kernel to drop page cache for this file's contents.
     /// Only available on Linux;
     #[cfg(target_os = "linux")]
@@ -62,77 +74,25 @@ impl FileSlot {
     pub const fn advise_dontneed(&self) {
         // No-op: posix_fadvise is not available on Mac or Windows.
     }
-}
-
-impl AsRef<tokio::fs::File> for FileSlot {
-    fn as_ref(&self) -> &tokio::fs::File {
-        &self.inner
-    }
-}
-
-impl AsMut<tokio::fs::File> for FileSlot {
-    fn as_mut(&mut self) -> &mut tokio::fs::File {
-        &mut self.inner
-    }
-}
 
-impl AsyncRead for FileSlot {
-    fn poll_read(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-        buf: &mut ReadBuf<'_>,
-    ) -> Poll<Result<(), tokio::io::Error>> {
-        Pin::new(&mut self.inner).poll_read(cx, buf)
-    }
-}
-
-impl AsyncSeek for FileSlot {
-    fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> Result<(), tokio::io::Error> {
-        Pin::new(&mut self.inner).start_seek(position)
-    }
-
-    fn poll_complete(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Result<u64, tokio::io::Error>> {
-        Pin::new(&mut self.inner).poll_complete(cx)
-    }
-}
-
-impl AsyncWrite for FileSlot {
-    fn poll_write(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-        buf: &[u8],
-    ) -> Poll<Result<usize, tokio::io::Error>> {
-        Pin::new(&mut self.inner).poll_write(cx, buf)
-    }
-
-    fn poll_flush(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Result<(), tokio::io::Error>> {
-        Pin::new(&mut self.inner).poll_flush(cx)
-    }
-
-    fn poll_shutdown(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Result<(), tokio::io::Error>> {
-        Pin::new(&mut self.inner).poll_shutdown(cx)
-    }
-
-    fn poll_write_vectored(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-        bufs: &[IoSlice<'_>],
-    ) -> Poll<Result<usize, tokio::io::Error>> {
-        Pin::new(&mut self.inner).poll_write_vectored(cx, bufs)
+    /// Advise the kernel that this file will be read sequentially,
+    /// enabling more aggressive readahead (typically 2-4x default).
+    #[cfg(target_os = "linux")]
+    pub fn advise_sequential(&self) {
+        use std::os::unix::io::AsRawFd;
+        let fd = self.inner.as_raw_fd();
+        let ret = unsafe { libc::posix_fadvise(fd, 0, 0, libc::POSIX_FADV_SEQUENTIAL) };
+        if ret != 0 {
+            tracing::debug!(
+                fd,
+                ret,
+                "posix_fadvise(SEQUENTIAL) returned non-zero (best-effort, ignoring)",
+            );
+        }
     }
 
-    fn is_write_vectored(&self) -> bool {
-        self.inner.is_write_vectored()
-    }
+    #[cfg(not(target_os = "linux"))]
+    pub const fn advise_sequential(&self) {}
 }
 
 // Note: If the default changes make sure you update the documentation in
@@ -231,11 +191,7 @@ pub fn get_open_files_for_test() -> usize {
     OPEN_FILE_LIMIT.load(Ordering::Acquire) - OPEN_FILE_SEMAPHORE.available_permits()
 }
 
-pub async fn open_file(
-    path: impl AsRef<Path>,
-    start: u64,
-    limit: u64,
-) -> Result<Take<FileSlot>, Error> {
+pub async fn open_file(path: impl AsRef<Path>, start: u64) -> Result<FileSlot, Error> {
     let path = path.as_ref().to_owned();
     let (permit, os_file) = call_with_permit(move |permit| {
         let mut os_file =
@@ -250,9 +206,8 @@ pub async fn open_file(
     .await?;
     Ok(FileSlot {
         _permit: permit,
-        inner: tokio::fs::File::from_std(os_file),
-    }
-    .take(limit))
+        inner: os_file,
+    })
 }
 
 pub async fn create_file(path: impl AsRef<Path>) -> Result<FileSlot, Error> {
@@ -272,10 +227,111 @@ pub async fn create_file(path: impl AsRef<Path>) -> Result<FileSlot, Error> {
     .await?;
     Ok(FileSlot {
         _permit: permit,
-        inner: tokio::fs::File::from_std(os_file),
+        inner: os_file,
     })
 }
 
+/// Read from `file` in a blocking thread, sending chunks to `writer`.
+/// Reads up to `limit` bytes starting from the current file position.
+/// `read_buffer_size` controls the chunk size (typically 256 KiB).
+/// Returns the `FileSlot` so the caller can reuse or drop it.
+pub async fn read_file_to_channel(
+    file: FileSlot,
+    writer: &mut DropCloserWriteHalf,
+    limit: u64,
+    read_buffer_size: usize,
+) -> Result<FileSlot, Error> {
+    let (sync_tx, mut async_rx) = tokio::sync::mpsc::channel::<Result<Bytes, Error>>(4);
+
+    let read_task = spawn_blocking!("fs_read_file", move || {
+        let mut f = file;
+        let mut remaining = limit;
+        loop {
+            let to_read = read_buffer_size.min(remaining as usize);
+            if to_read == 0 {
+                break;
+            }
+            let mut buf = BytesMut::zeroed(to_read);
+            match f.as_std_mut().read(&mut buf[..]) {
+                Ok(0) => break,
+                Ok(n) => {
+                    buf.truncate(n);
+                    remaining -= n as u64;
+                    if sync_tx.blocking_send(Ok(buf.freeze())).is_err() {
+                        break; // reader dropped
+                    }
+                }
+                Err(e) => {
+                    drop(sync_tx.blocking_send(Err(e.into())));
+                    break;
+                }
+            }
+        }
+        f
+    });
+
+    // Receive chunks and forward to the async writer.
+    while let Some(result) = async_rx.recv().await {
+        let chunk = result?;
+        writer
+            .send(chunk)
+            .await
+            .err_tip(|| "Failed to send chunk from file reader")?;
+    }
+    // Ensure the blocking task completed successfully.
+    read_task
+        .await
+        .map_err(|e| make_err!(Code::Internal, "read task join failed: {e:?}"))
+}
+
+/// Write to `file` from a blocking thread, receiving chunks from `reader`.
+/// Returns total bytes written and the `FileSlot`.
+pub async fn write_file_from_channel(
+    file: FileSlot,
+    reader: &mut DropCloserReadHalf,
+) -> Result<(u64, FileSlot), Error> {
+    let (async_tx, mut sync_rx) = tokio::sync::mpsc::channel::<Bytes>(4);
+
+    let write_task = spawn_blocking!("fs_write_file", move || {
+        let mut f = file;
+        let mut total: u64 = 0;
+        while let Some(data) = sync_rx.blocking_recv() {
+            f.as_std_mut()
+                .write_all(&data)
+                .map_err(|e| Into::<Error>::into(e))?;
+            total += data.len() as u64;
+        }
+        Ok::<_, Error>((total, f))
+    });
+
+    // Async side: recv from channel, send to blocking writer.
+    let send_result: Result<(), Error> = async {
+        loop {
+            let data = reader
+                .recv()
+                .await
+                .err_tip(|| "Failed to recv in write_file_from_channel")?;
+            if data.is_empty() {
+                break; // EOF
+            }
+            if async_tx.send(data).await.is_err() {
+                // Writer task died — we'll get the error from write_task.
+                break;
+            }
+        }
+        Ok(())
+    }
+    .await;
+    drop(async_tx); // Signal EOF to writer.
+
+    let (total, file) = write_task
+        .await
+        .map_err(|e| make_err!(Code::Internal, "write task join failed: {e:?}"))??;
+
+    send_result?;
+    Ok((total, file))
+}
+
 pub async fn hard_link(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<(), Error> {
     let src = src.as_ref().to_owned();
     let dst = dst.as_ref().to_owned();
diff --git a/nativelink-util/src/fs_util.rs b/nativelink-util/src/fs_util.rs
index c84215448..a785ec1eb 100644
--- a/nativelink-util/src/fs_util.rs
+++ b/nativelink-util/src/fs_util.rs
@@ -24,7 +24,7 @@ use tokio::fs;
 ///
 /// # Arguments
 /// * `src_dir` - Source directory path (must exist)
-/// * `dst_dir` - Destination directory path (will be created)
+/// * `dst_dir` - Destination directory path (will be created if it doesn't exist)
 ///
 /// # Returns
 /// * `Ok(())` on success
@@ -37,7 +37,6 @@ use tokio::fs;
 ///
 /// # Errors
 /// - Source directory doesn't exist
-/// - Destination already exists
 /// - Cross-filesystem hardlinking attempted
 /// - Filesystem doesn't support hardlinks
 /// - Permission denied
@@ -48,13 +47,7 @@ pub async fn hardlink_directory_tree(src_dir: &Path, dst_dir: &Path) -> Result<(
         src_dir.display()
     );
 
-    error_if!(
-        dst_dir.exists(),
-        "Destination directory already exists: {}",
-        dst_dir.display()
-    );
-
-    // Create the root destination directory
+    // Create the root destination directory (idempotent — ok if it already exists)
     fs::create_dir_all(dst_dir).await.err_tip(|| {
         format!(
             "Failed to create destination directory: {}",
@@ -163,10 +156,17 @@ fn set_readonly_recursive_impl<'a>(
     path: &'a Path,
 ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>> {
     Box::pin(async move {
-        let metadata = fs::metadata(path)
+        // Use symlink_metadata to avoid following symlinks (security: prevents
+        // changing permissions on external paths via crafted symlinks).
+        let metadata = fs::symlink_metadata(path)
             .await
             .err_tip(|| format!("Failed to get metadata for: {}", path.display()))?;
 
+        // Skip symlinks — do not follow them or change their target's permissions.
+        if metadata.is_symlink() {
+            return Ok(());
+        }
+
         if metadata.is_dir() {
             let mut entries = fs::read_dir(path)
                 .await
@@ -187,9 +187,11 @@ fn set_readonly_recursive_impl<'a>(
             use std::os::unix::fs::PermissionsExt;
             let mut perms = metadata.permissions();
 
-            // If it's a directory, set to r-xr-xr-x (555)
-            // If it's a file, set to r--r--r-- (444)
-            let mode = if metadata.is_dir() { 0o555 } else { 0o444 };
+            // Strip write bits but preserve execute bits.
+            // Files marked is_executable (e.g., shell scripts) are 0o555;
+            // stripping write keeps them at 0o555. Non-executable files
+            // at 0o644 become 0o444. Directories at 0o755 become 0o555.
+            let mode = perms.mode() & !0o222;
             perms.set_mode(mode);
 
             fs::set_permissions(path, perms)
@@ -229,10 +231,17 @@ fn calculate_directory_size_impl<'a>(
     path: &'a Path,
 ) -> Pin<Box<dyn Future<Output = Result<u64, Error>> + Send + 'a>> {
     Box::pin(async move {
-        let metadata = fs::metadata(path)
+        // Use symlink_metadata to avoid following symlinks (security: prevents
+        // counting external files reachable via crafted symlinks).
+        let metadata = fs::symlink_metadata(path)
             .await
             .err_tip(|| format!("Failed to get metadata for: {}", path.display()))?;
 
+        // Symlinks count as 0 bytes — do not follow them.
+        if metadata.is_symlink() {
+            return Ok(0);
+        }
+
         if metadata.is_file() {
             return Ok(metadata.len());
         }
@@ -370,14 +379,24 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_hardlink_existing_destination() -> Result<(), Error> {
-        let (_temp_dir, src_dir) = create_test_directory().await?;
-        let dst_dir = _temp_dir.path().join("existing");
+    async fn test_hardlink_into_existing_destination() -> Result<(), Error> {
+        let (temp_dir, src_dir) = create_test_directory().await?;
+        let dst_dir = temp_dir.path().join("existing");
 
+        // Pre-create the destination directory (simulates work_directory already existing)
         fs::create_dir(&dst_dir).await?;
 
-        let result = hardlink_directory_tree(&src_dir, &dst_dir).await;
-        assert!(result.is_err());
+        // Should succeed — hardlink contents into existing directory
+        hardlink_directory_tree(&src_dir, &dst_dir).await?;
+
+        // Verify structure
+        assert!(dst_dir.join("file1.txt").exists());
+        assert!(dst_dir.join("subdir").is_dir());
+        assert!(dst_dir.join("subdir/file2.txt").exists());
+
+        // Verify contents
+        let content1 = fs::read_to_string(dst_dir.join("file1.txt")).await?;
+        assert_eq!(content1, "Hello, World!");
 
         Ok(())
     }
diff --git a/nativelink-util/src/lib.rs b/nativelink-util/src/lib.rs
index 8ab85754e..5949f7f77 100644
--- a/nativelink-util/src/lib.rs
+++ b/nativelink-util/src/lib.rs
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 pub mod action_messages;
+pub mod blob_locality_map;
 pub mod buf_channel;
 pub mod channel_body_for_tests;
 pub mod chunked_stream;
@@ -24,6 +25,7 @@ pub mod fastcdc;
 pub mod fs;
 pub mod fs_util;
 pub mod health_utils;
+pub mod log_utils;
 pub mod instant_wrapper;
 pub mod known_platform_property_provider;
 pub mod metrics;
@@ -36,6 +38,7 @@ pub mod proto_stream_utils;
 pub mod resource_info;
 pub mod retry;
 pub mod shutdown_guard;
+pub mod stall_detector;
 pub mod store_trait;
 pub mod task;
 pub mod telemetry;
diff --git a/nativelink-util/src/log_utils.rs b/nativelink-util/src/log_utils.rs
new file mode 100644
index 000000000..3de473391
--- /dev/null
+++ b/nativelink-util/src/log_utils.rs
@@ -0,0 +1,25 @@
+// Copyright 2024 The NativeLink Authors. All rights reserved.
+//
+// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    See LICENSE file for details
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use core::time::Duration;
+
+/// Computes throughput in megabits per second.
+#[inline]
+pub fn throughput_mbps(size_bytes: u64, elapsed: Duration) -> f64 {
+    let secs = elapsed.as_secs_f64();
+    if secs == 0.0 {
+        return 0.0;
+    }
+    (size_bytes as f64 * 8.0) / (secs * 1_000_000.0)
+}
diff --git a/nativelink-util/src/platform_properties.rs b/nativelink-util/src/platform_properties.rs
index 37d19b2e3..1b6e5a5f0 100644
--- a/nativelink-util/src/platform_properties.rs
+++ b/nativelink-util/src/platform_properties.rs
@@ -21,7 +21,7 @@ use nativelink_metric::{
 use nativelink_proto::build::bazel::remote::execution::v2::Platform as ProtoPlatform;
 use nativelink_proto::build::bazel::remote::execution::v2::platform::Property as ProtoProperty;
 use serde::{Deserialize, Serialize};
-use tracing::info;
+use tracing::debug;
 
 /// `PlatformProperties` helps manage the configuration of platform properties to
 /// keys and types. The scheduler uses these properties to decide what jobs
@@ -54,12 +54,12 @@ impl PlatformProperties {
                     if full_worker_logging {
                         match check_value {
                             PlatformPropertyValue::Minimum(_) => {
-                                info!(
+                                debug!(
                                     "Property mismatch on worker property {property}. {worker_value:?} < {check_value:?}"
                                 );
                             }
                             _ => {
-                                info!(
+                                debug!(
                                     "Property mismatch on worker property {property}. {worker_value:?} != {check_value:?}"
                                 );
                             }
@@ -69,7 +69,7 @@ impl PlatformProperties {
                 }
             } else {
                 if full_worker_logging {
-                    info!("Property missing on worker property {property}");
+                    debug!("Property missing on worker property {property}");
                 }
                 return false;
             }
diff --git a/nativelink-util/src/stall_detector.rs b/nativelink-util/src/stall_detector.rs
new file mode 100644
index 000000000..d6128bb4a
--- /dev/null
+++ b/nativelink-util/src/stall_detector.rs
@@ -0,0 +1,217 @@
+// Copyright 2024 The NativeLink Authors. All rights reserved.
+//
+// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    See LICENSE file for details
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Stall detection and thread dump utilities.
+//!
+//! When an async operation takes longer than a configured threshold,
+//! [`StallGuard`] dumps all thread stacks to a file for post-mortem analysis.
+
+use core::time::Duration;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+use tracing::error;
+
+/// Minimum interval between consecutive stack dumps (seconds).
+/// Prevents flooding /tmp with dumps during a sustained stall.
+const MIN_DUMP_INTERVAL_SECS: u64 = 30;
+
+/// Unix epoch seconds of the last dump. Used for rate-limiting.
+static LAST_DUMP_EPOCH: AtomicU64 = AtomicU64::new(0);
+
+/// Default stall threshold for store operations.
+pub const DEFAULT_STALL_THRESHOLD: Duration = Duration::from_secs(30);
+
+/// A guard that spawns a background task to detect stalls. When the
+/// guarded operation completes (i.e., the guard is dropped), the
+/// background task is cancelled. If the operation exceeds `threshold`,
+/// a thread dump is written to `/tmp/nativelink-stall-<ts>.txt`.
+///
+/// This relies on tokio's timer infrastructure, so it cannot detect
+/// stalls caused by the tokio runtime itself being blocked. The
+/// runtime-watchdog OS thread in nativelink.rs covers that case.
+#[must_use = "StallGuard is immediately cancelled if not held in a variable"]
+#[derive(Debug)]
+pub struct StallGuard {
+    handle: tokio::task::JoinHandle<()>,
+}
+
+impl StallGuard {
+    /// Create a stall guard for an operation with the given label.
+    /// If the guard is not dropped within `threshold`, a stack dump fires.
+    pub fn new(threshold: Duration, label: &'static str) -> Self {
+        Self::new_inner(threshold, label, None)
+    }
+
+    /// Create a stall guard with additional dynamic context (e.g. digest
+    /// hash, size, operation details). The context string is included in
+    /// the stall message and thread dump header when the threshold fires.
+    pub fn with_context(threshold: Duration, label: &'static str, context: String) -> Self {
+        Self::new_inner(threshold, label, Some(context))
+    }
+
+    fn new_inner(threshold: Duration, label: &'static str, context: Option<String>) -> Self {
+        let handle = tokio::spawn(async move {
+            tokio::time::sleep(threshold).await;
+            let ctx_suffix = context
+                .as_deref()
+                .map_or_else(String::new, |c| format!(" [{c}]"));
+            let now = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap_or_default()
+                .as_secs();
+            let prev = LAST_DUMP_EPOCH.load(Ordering::Relaxed);
+            if now.saturating_sub(prev) >= MIN_DUMP_INTERVAL_SECS
+                && LAST_DUMP_EPOCH
+                    .compare_exchange(prev, now, Ordering::SeqCst, Ordering::Relaxed)
+                    .is_ok()
+            {
+                error!(
+                    "STORE OPERATION STALL: {label}{ctx_suffix} has been running for >{threshold:.0?} — dumping thread stacks",
+                );
+                let dump_label = if ctx_suffix.is_empty() {
+                    label.to_string()
+                } else {
+                    format!("{label}{ctx_suffix}")
+                };
+                dump_thread_stacks(&dump_label);
+            } else {
+                error!(
+                    "STORE OPERATION STALL: {label}{ctx_suffix} has been running for >{threshold:.0?} (dump rate-limited)",
+                );
+            }
+        });
+        Self { handle }
+    }
+}
+
+impl Drop for StallGuard {
+    fn drop(&mut self) {
+        self.handle.abort();
+    }
+}
+
+/// Dump all thread stacks to `/tmp/nativelink-stall-<timestamp>.txt`.
+///
+/// On Linux, reads `/proc/self/task/` to enumerate threads and collects
+/// thread name, wait channel, state, context switches, and kernel stack.
+///
+/// On non-Linux platforms, this is a no-op (logs a message).
+pub fn dump_thread_stacks(label: &str) {
+    #[cfg(target_os = "linux")]
+    dump_thread_stacks_linux(label);
+
+    #[cfg(not(target_os = "linux"))]
+    {
+        let timestamp = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs();
+        error!(
+            "Thread dump not available on this platform (trigger: {label}, ts: {timestamp})"
+        );
+    }
+}
+
+#[cfg(target_os = "linux")]
+fn dump_thread_stacks_linux(label: &str) {
+    use std::fmt::Write as _;
+
+    let timestamp_ms = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_millis();
+    let path = format!("/tmp/nativelink-stall-{timestamp_ms}.txt");
+    let mut output = String::new();
+
+    let _ = writeln!(output, "=== STORE OPERATION STALL THREAD DUMP ===");
+    let _ = writeln!(output, "Trigger: {label}");
+    let _ = writeln!(output, "Timestamp: {timestamp_ms}");
+    let _ = writeln!(output, "PID: {}", std::process::id());
+    let _ = writeln!(output);
+
+    let task_dir = "/proc/self/task";
+    let entries = match std::fs::read_dir(task_dir) {
+        Ok(e) => e,
+        Err(err) => {
+            error!("Failed to read {task_dir}: {err}");
+            return;
+        }
+    };
+
+    let mut tids: Vec<_> = entries
+        .filter_map(|e| e.ok())
+        .filter_map(|e| e.file_name().to_str().map(String::from))
+        .collect();
+    tids.sort();
+
+    let _ = writeln!(output, "Thread count: {}", tids.len());
+    let _ = writeln!(output);
+
+    for tid in &tids {
+        let _ = writeln!(output, "--- TID {tid} ---");
+        let base = format!("{task_dir}/{tid}");
+
+        // Thread name
+        if let Ok(comm) = std::fs::read_to_string(format!("{base}/comm")) {
+            let _ = write!(output, "  comm: {comm}");
+        }
+        // Wait channel (kernel function the thread is sleeping in)
+        if let Ok(wchan) = std::fs::read_to_string(format!("{base}/wchan")) {
+            let _ = writeln!(output, "  wchan: {wchan}");
+        }
+        // Status (state, voluntary/involuntary context switches)
+        if let Ok(status) = std::fs::read_to_string(format!("{base}/status")) {
+            for line in status.lines() {
+                if line.starts_with("State:")
+                    || line.starts_with("voluntary_ctxt_switches:")
+                    || line.starts_with("nonvoluntary_ctxt_switches:")
+                {
+                    let _ = writeln!(output, "  {line}");
+                }
+            }
+        }
+        // Kernel stack (requires CAP_SYS_PTRACE or permissive ptrace_scope)
+        if let Ok(stack) = std::fs::read_to_string(format!("{base}/stack")) {
+            if !stack.trim().is_empty() {
+                let _ = writeln!(output, "  kernel stack:");
+                for line in stack.lines() {
+                    let _ = writeln!(output, "    {line}");
+                }
+            }
+        }
+        let _ = writeln!(output);
+    }
+
+    match std::fs::write(&path, &output) {
+        Ok(()) => error!("Thread dump written to {path}"),
+        Err(err) => error!("Failed to write thread dump to {path}: {err}"),
+    }
+
+    // Capture userspace backtraces via eu-stack for full Rust call stacks.
+    let bt_path = format!("/tmp/nativelink-stall-{timestamp_ms}-bt.txt");
+    let pid = std::process::id();
+    match std::process::Command::new("eu-stack")
+        .args(["-p", &pid.to_string(), "-l"])
+        .output()
+    {
+        Ok(out) => {
+            let combined = [&out.stdout[..], b"\n--- stderr ---\n", &out.stderr[..]].concat();
+            match std::fs::write(&bt_path, &combined) {
+                Ok(()) => error!("Userspace backtrace written to {bt_path}"),
+                Err(err) => error!("Failed to write backtrace to {bt_path}: {err}"),
+            }
+        }
+        Err(err) => error!("Failed to run eu-stack: {err}"),
+    }
+}
diff --git a/nativelink-util/src/store_trait.rs b/nativelink-util/src/store_trait.rs
index b7be933da..b838aa794 100644
--- a/nativelink-util/src/store_trait.rs
+++ b/nativelink-util/src/store_trait.rs
@@ -25,14 +25,26 @@ use std::ffi::OsString;
 use std::sync::{Arc, OnceLock};
 
 use async_trait::async_trait;
-use bytes::{Bytes, BytesMut};
+use bytes::Bytes;
 use futures::{Future, FutureExt, Stream, join, try_join};
 use nativelink_error::{Code, Error, ResultExt, error_if, make_err};
+
+tokio::task_local! {
+    /// Set to `true` when the current CAS request originates from a worker
+    /// (not a client like Bazel). `WorkerProxyStore` checks this to decide
+    /// between proxying blob data (for clients) and returning a redirect
+    /// with peer endpoints (for workers).
+    pub static IS_WORKER_REQUEST: bool;
+}
+
+/// Prefix for redirect errors returned by `WorkerProxyStore` to worker callers.
+/// The remainder of the message is a comma-separated list of peer gRPC endpoints
+/// that have the requested blob. Example: `"NL_REDIRECT:grpc://w1:50081,grpc://w2:50081"`
+pub const REDIRECT_PREFIX: &str = "NL_REDIRECT:";
 use nativelink_metric::MetricsComponent;
 use rand::rngs::StdRng;
 use rand::{RngCore, SeedableRng};
 use serde::{Deserialize, Serialize};
-use tokio::io::{AsyncReadExt, AsyncSeekExt};
 use tracing::warn;
 
 use crate::buf_channel::{DropCloserReadHalf, DropCloserWriteHalf, make_buf_channel_pair};
@@ -81,11 +93,12 @@ pub enum UploadSizeInfo {
 pub async fn slow_update_store_with_file<S: StoreDriver + ?Sized>(
     store: Pin<&S>,
     digest: impl Into<StoreKey<'_>>,
-    file: &mut fs::FileSlot,
+    mut file: fs::FileSlot,
     upload_size: UploadSizeInfo,
-) -> Result<(), Error> {
-    file.rewind()
-        .await
+) -> Result<fs::FileSlot, Error> {
+    use std::io::Seek;
+    file.as_std_mut()
+        .seek(std::io::SeekFrom::Start(0))
         .err_tip(|| "Failed to rewind in upload_file_to_store")?;
     let (mut tx, rx) = make_buf_channel_pair();
 
@@ -93,25 +106,17 @@ pub async fn slow_update_store_with_file<S: StoreDriver + ?Sized>(
         .update(digest.into(), rx, upload_size)
         .map(|r| r.err_tip(|| "Could not upload data to store in upload_file_to_store"));
     let read_data_fut = async move {
-        loop {
-            let mut buf = BytesMut::with_capacity(fs::DEFAULT_READ_BUFF_SIZE);
-            let read = file
-                .read_buf(&mut buf)
-                .await
-                .err_tip(|| "Failed to read in upload_file_to_store")?;
-            if read == 0 {
-                break;
-            }
-            tx.send(buf.freeze())
-                .await
-                .err_tip(|| "Failed to send in upload_file_to_store")?;
-        }
+        let file = fs::read_file_to_channel(file, &mut tx, u64::MAX, fs::DEFAULT_READ_BUFF_SIZE)
+            .await
+            .err_tip(|| "Failed to read in upload_file_to_store")?;
         tx.send_eof()
-            .err_tip(|| "Could not send EOF to store in upload_file_to_store")
+            .err_tip(|| "Could not send EOF to store in upload_file_to_store")?;
+        Ok::<_, Error>(file)
     };
-    tokio::pin!(read_data_fut);
     let (update_res, read_res) = tokio::join!(update_fut, read_data_fut);
-    update_res.merge(read_res)
+    update_res?;
+    let file = read_res?;
+    Ok(file)
 }
 
 /// Optimizations that stores may want to expose to the callers.
@@ -389,11 +394,11 @@ impl Store {
     }
 
     #[inline]
-    pub fn register_remove_callback(
+    pub fn register_item_callback(
         &self,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error> {
-        self.inner.clone().register_remove_callback(callback)
+        self.inner.clone().register_item_callback(callback)
     }
 }
 
@@ -661,7 +666,7 @@ pub trait StoreDriver:
         self: Pin<&Self>,
         key: StoreKey<'_>,
         path: OsString,
-        mut file: fs::FileSlot,
+        file: fs::FileSlot,
         upload_size: UploadSizeInfo,
     ) -> Result<Option<fs::FileSlot>, Error> {
         let inner_store = self.inner_store(Some(key.borrow()));
@@ -674,7 +679,7 @@ pub trait StoreDriver:
                 .update_with_whole_file(key, path, file, upload_size)
                 .await;
         }
-        slow_update_store_with_file(self, key, &mut file, upload_size).await?;
+        let file = slow_update_store_with_file(self, key, file, upload_size).await?;
         Ok(Some(file))
     }
 
@@ -843,20 +848,21 @@ pub trait StoreDriver:
     // Register health checks used to monitor the store.
     fn register_health(self: Arc<Self>, _registry: &mut HealthRegistryBuilder) {}
 
-    fn register_remove_callback(
+    fn register_item_callback(
         self: Arc<Self>,
-        callback: Arc<dyn RemoveItemCallback>,
+        callback: Arc<dyn ItemCallback>,
     ) -> Result<(), Error>;
 }
 
-// Callback to be called when a store deletes an item. This is used so
-// compound stores can remove items from their internal state when their
-// underlying stores remove items e.g. caches
-pub trait RemoveItemCallback: Debug + Send + Sync {
+// Callback invoked when a store inserts or deletes an item.
+pub trait ItemCallback: Debug + Send + Sync {
     fn callback<'a>(
         &'a self,
         store_key: StoreKey<'a>,
     ) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>>;
+
+    /// Called synchronously when a new item is inserted.
+    fn on_insert(&self, _store_key: StoreKey<'_>, _size: u64) {}
 }
 
 /// The instructions on how to decode a value from a Bytes & version into
diff --git a/nativelink-util/src/tls_utils.rs b/nativelink-util/src/tls_utils.rs
index 15f685861..71f198be0 100644
--- a/nativelink-util/src/tls_utils.rs
+++ b/nativelink-util/src/tls_utils.rs
@@ -120,6 +120,19 @@ pub fn endpoint_from(
         tonic::transport::Endpoint::from(endpoint)
     };
 
+    // Always enable TCP_NODELAY to reduce latency on gRPC connections.
+    // Nagle's algorithm delays small writes (up to 40ms), which is
+    // harmful for gRPC's many small HTTP/2 frames.
+    let endpoint_transport = endpoint_transport.tcp_nodelay(true);
+
+    // Set HTTP/2 flow-control windows to match the server defaults (16 MiB
+    // stream, 32 MiB connection).  Tonic/h2 defaults to 64 KiB for both,
+    // which caps aggregate throughput per connection to ~128 MB/s at 0.5 ms
+    // RTT — far below 10 GbE capacity when many streams share a connection.
+    let endpoint_transport = endpoint_transport
+        .initial_stream_window_size(16 * 1024 * 1024)
+        .initial_connection_window_size(32 * 1024 * 1024);
+
     Ok(endpoint_transport)
 }
 
@@ -162,10 +175,16 @@ pub fn endpoint(endpoint_config: &GrpcEndpoint) -> Result<tonic::transport::Endp
 
     let mut endpoint = endpoint
         .connect_timeout(connect_timeout)
+        .tcp_nodelay(endpoint_config.tcp_nodelay)
         .tcp_keepalive(Some(tcp_keepalive))
         .http2_keep_alive_interval(http2_keepalive_interval)
         .keep_alive_timeout(http2_keepalive_timeout)
-        .keep_alive_while_idle(true);
+        .keep_alive_while_idle(true)
+        // Default to 16 MiB stream window and 32 MiB connection window
+        // to avoid capping per-stream throughput at ~64 MB/s with 1ms RTT
+        // (hyper's default of 64 KiB is too small for high-bandwidth links).
+        .initial_stream_window_size(16 * 1024 * 1024)
+        .initial_connection_window_size(32 * 1024 * 1024);
 
     if let Some(concurrency_limit) = endpoint_config.concurrency_limit {
         endpoint = endpoint.concurrency_limit(concurrency_limit);
diff --git a/nativelink-util/tests/evicting_map_test.rs b/nativelink-util/tests/evicting_map_test.rs
index e3f552f64..5080b0e1b 100644
--- a/nativelink-util/tests/evicting_map_test.rs
+++ b/nativelink-util/tests/evicting_map_test.rs
@@ -592,7 +592,7 @@ async fn range_multiple_items_test() -> Result<(), Error> {
         evicting_map.range(range, |k, v: &BytesWrapper| {
             found_values.push((k.clone(), v.0.clone()));
             true
-        });
+        }).await;
         found_values
     }
 
diff --git a/nativelink-worker/BUILD.bazel b/nativelink-worker/BUILD.bazel
index 5fcffff20..18166d1d3 100644
--- a/nativelink-worker/BUILD.bazel
+++ b/nativelink-worker/BUILD.bazel
@@ -26,12 +26,14 @@ rust_library(
         "//nativelink-error",
         "//nativelink-metric",
         "//nativelink-proto",
+        "//nativelink-service",
         "//nativelink-store",
         "//nativelink-util",
         "@crates//:bytes",
         "@crates//:filetime",
         "@crates//:formatx",
         "@crates//:futures",
+        "@crates//:hostname",
         "@crates//:opentelemetry",
         "@crates//:parking_lot",
         "@crates//:prost",
diff --git a/nativelink-worker/Cargo.toml b/nativelink-worker/Cargo.toml
index 500ab104e..c72dfe4e9 100644
--- a/nativelink-worker/Cargo.toml
+++ b/nativelink-worker/Cargo.toml
@@ -14,6 +14,7 @@ nativelink-config = { path = "../nativelink-config" }
 nativelink-error = { path = "../nativelink-error" }
 nativelink-metric = { path = "../nativelink-metric" }
 nativelink-proto = { path = "../nativelink-proto" }
+nativelink-service = { path = "../nativelink-service" }
 nativelink-store = { path = "../nativelink-store" }
 nativelink-util = { path = "../nativelink-util" }
 
@@ -22,9 +23,12 @@ bytes = { version = "1.10.1", default-features = false }
 filetime = { version = "0.2.25", default-features = false }
 formatx = { version = "0.2.3", default-features = false }
 futures = { version = "0.3.31", default-features = false }
-opentelemetry = { version = "0.29.1", default-features = false }
+hostname = { version = "0.4.0", default-features = false }
+libc = { version = "0.2", default-features = false }
+opentelemetry = { version = "0.31.0", default-features = false }
 parking_lot = { version = "0.12.3", default-features = false }
-prost = { version = "0.13.5", default-features = false }
+prost = { version = "0.14.3", default-features = false }
+prost-types = { version = "0.14.3", default-features = false }
 relative-path = { version = "2.0.0", default-features = false, features = [
   "alloc",
   "std",
@@ -43,9 +47,9 @@ tokio = { version = "1.44.1", features = [
 tokio-stream = { version = "0.1.17", default-features = false, features = [
   "fs",
 ] }
-tonic = { version = "0.13.0", features = [
+tonic = { version = "0.14.5", features = [
   "gzip",
-  "tls-ring",
+  "tls-aws-lc",
   "transport",
 ], default-features = false }
 tracing = { version = "0.1.41", default-features = false }
@@ -61,7 +65,6 @@ hyper = { version = "1.6.0", default-features = false }
 pretty_assertions = { version = "1.4.1", features = [
   "std",
 ], default-features = false }
-prost-types = { version = "0.13.5", default-features = false }
 rand = { version = "0.9.0", default-features = false, features = [
   "thread_rng",
 ] }
@@ -69,6 +72,7 @@ serial_test = { version = "3.2.0", features = [
   "async",
 ], default-features = false }
 tempfile = { version = "3.15.0", default-features = false }
+tonic-prost = { version = "0.14.5", default-features = false }
 tracing-test = { version = "0.2.5", default-features = false, features = [
   "no-env-filter",
 ] }
diff --git a/nativelink-worker/src/directory_cache.rs b/nativelink-worker/src/directory_cache.rs
index 8a016593c..64e90e68d 100644
--- a/nativelink-worker/src/directory_cache.rs
+++ b/nativelink-worker/src/directory_cache.rs
@@ -14,22 +14,134 @@
 
 use core::future::Future;
 use core::pin::Pin;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet, VecDeque};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
-use std::time::SystemTime;
+use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
+use std::time::{Instant, SystemTime};
 
 use nativelink_error::{Code, Error, ResultExt, make_err};
 use nativelink_proto::build::bazel::remote::execution::v2::{
     Directory as ProtoDirectory, DirectoryNode, FileNode, SymlinkNode,
 };
 use nativelink_store::ac_utils::get_and_decode_digest;
+use nativelink_store::fast_slow_store::FastSlowStore;
+use nativelink_store::filesystem_store::{FileEntry, FilesystemStore};
 use nativelink_util::common::DigestInfo;
-use nativelink_util::fs_util::{hardlink_directory_tree, set_readonly_recursive};
+use nativelink_util::fs_util::hardlink_directory_tree;
 use nativelink_util::store_trait::{Store, StoreKey, StoreLike};
 use tokio::fs;
 use tokio::sync::{Mutex, RwLock};
-use tracing::{debug, trace, warn};
+use tracing::{debug, info, trace, warn};
+
+/// Name of the merkle tree metadata file stored alongside each cached directory.
+const MERKLE_METADATA_FILENAME: &str = ".merkle_tree_meta";
+
+/// Cache format version file. Bump when the on-disk format changes in a way
+/// that makes old entries invalid (e.g., permission semantics). On startup,
+/// if the version file is missing or stale, the entire cache is wiped.
+const CACHE_VERSION_FILENAME: &str = ".cache_version";
+/// Bump this when the cache format changes.
+const CACHE_FORMAT_VERSION: u32 = 5;
+
+/// Merkle tree metadata for a cached directory entry.
+///
+/// Stores the mapping from each directory digest in the tree to its relative
+/// path within the cached directory on disk. This allows us to index subtrees
+/// so that future cache misses can reuse already-cached subtrees via symlinks.
+#[derive(Debug, Clone)]
+pub struct MerkleTreeMetadata {
+    /// Map from directory digest -> relative path within the cache entry.
+    /// For the root directory, the relative path is "" (empty string).
+    pub digest_to_relpath: HashMap<DigestInfo, String>,
+}
+
+impl MerkleTreeMetadata {
+    /// Serialize to a simple line-based text format:
+    /// `hash:size_bytes:relative_path\n`
+    fn serialize(&self) -> String {
+        let mut lines = Vec::with_capacity(self.digest_to_relpath.len());
+        for (digest, relpath) in &self.digest_to_relpath {
+            lines.push(format!("{}:{}:{}", digest.packed_hash(), digest.size_bytes(), relpath));
+        }
+        // Sort for deterministic output
+        lines.sort();
+        lines.join("\n")
+    }
+
+    /// Deserialize from the line-based text format.
+    fn deserialize(data: &str) -> Result<Self, Error> {
+        let mut digest_to_relpath = HashMap::new();
+        for line in data.lines() {
+            let line = line.trim();
+            if line.is_empty() {
+                continue;
+            }
+            // Format: hash:size_bytes:relative_path
+            // The relative path may contain colons, so split at most 3 parts.
+            let mut parts = line.splitn(3, ':');
+            let hash = parts.next().ok_or_else(|| {
+                make_err!(Code::Internal, "Missing hash in merkle metadata line: {line}")
+            })?;
+            let size_str = parts.next().ok_or_else(|| {
+                make_err!(Code::Internal, "Missing size in merkle metadata line: {line}")
+            })?;
+            let relpath = parts.next().unwrap_or("");
+
+            let size: i64 = size_str.parse().map_err(|e| {
+                make_err!(Code::Internal, "Invalid size in merkle metadata line: {line}: {e}")
+            })?;
+
+            let digest = DigestInfo::try_new(hash, size)
+                .err_tip(|| format!("Invalid digest in merkle metadata line: {line}"))?;
+
+            digest_to_relpath.insert(digest, relpath.to_string());
+        }
+        Ok(Self { digest_to_relpath })
+    }
+
+    /// Build merkle tree metadata by walking a resolved directory tree.
+    ///
+    /// `tree` is the map from digest -> Directory proto (as returned by
+    /// `resolve_directory_tree`). `root_digest` is the root of the tree.
+    ///
+    /// Returns a mapping from each directory digest to its relative path
+    /// within the cache entry (root = "").
+    fn from_directory_tree(
+        tree: &HashMap<DigestInfo, ProtoDirectory>,
+        root_digest: &DigestInfo,
+    ) -> Self {
+        let mut digest_to_relpath = HashMap::with_capacity(tree.len());
+        let mut queue = VecDeque::new();
+        queue.push_back((*root_digest, String::new()));
+
+        while let Some((digest, relpath)) = queue.pop_front() {
+            if digest_to_relpath.contains_key(&digest) {
+                continue; // Already visited (handles diamond dependencies)
+            }
+            digest_to_relpath.insert(digest, relpath.clone());
+
+            if let Some(dir) = tree.get(&digest) {
+                for subdir_node in &dir.directories {
+                    if let Some(child_digest) = subdir_node
+                        .digest
+                        .as_ref()
+                        .and_then(|d| DigestInfo::try_from(d).ok())
+                    {
+                        let child_relpath = if relpath.is_empty() {
+                            subdir_node.name.clone()
+                        } else {
+                            format!("{}/{}", relpath, subdir_node.name)
+                        };
+                        queue.push_back((child_digest, child_relpath));
+                    }
+                }
+            }
+        }
+
+        Self { digest_to_relpath }
+    }
+}
 
 /// Configuration for the directory cache
 #[derive(Debug, Clone)]
@@ -52,17 +164,30 @@ impl Default for DirectoryCacheConfig {
     }
 }
 
-/// Metadata for a cached directory
-#[derive(Debug, Clone)]
+/// Metadata for a cached directory.
+///
+/// `ref_count` and `last_access` use atomics so that the cache hit fast path
+/// only needs a *read* lock on the cache HashMap (no write lock contention).
+#[derive(Debug)]
 struct CachedDirectoryMetadata {
     /// Path to the cached directory
     path: PathBuf,
     /// Size in bytes
     size: u64,
-    /// Last access time for LRU eviction
-    last_access: SystemTime,
-    /// Reference count (number of active users)
-    ref_count: usize,
+    /// Last access time as duration-since-EPOCH in millis (atomic for read-lock access)
+    last_access_millis: AtomicU64,
+    /// Reference count (number of active hardlink operations in flight)
+    ref_count: AtomicUsize,
+}
+
+impl CachedDirectoryMetadata {
+    fn touch(&self) {
+        let millis = SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_millis() as u64;
+        self.last_access_millis.store(millis, Ordering::Relaxed);
+    }
 }
 
 /// High-performance directory cache that uses hardlinks to avoid repeated
@@ -75,21 +200,83 @@ struct CachedDirectoryMetadata {
 /// 3. If no, construct it once and cache for future use
 ///
 /// This dramatically reduces I/O and improves action startup time.
+///
+/// ## Security Note
+///
+/// Hardlinked files share inodes. If an action process has elevated privileges
+/// (e.g. root, `CAP_DAC_OVERRIDE`), it can bypass read-only permissions and
+/// modify cached files through the workspace hardlink, poisoning the cache for
+/// subsequent actions. For multi-tenant clusters, consider running actions in
+/// user namespaces or using copy-on-write (reflink) instead of hardlinks.
 #[derive(Debug)]
 pub struct DirectoryCache {
     /// Configuration
     config: DirectoryCacheConfig,
     /// Cache mapping digest -> metadata
     cache: Arc<RwLock<HashMap<DigestInfo, CachedDirectoryMetadata>>>,
-    /// Lock for cache construction to prevent stampedes
+    /// Per-digest construction locks to prevent stampedes.
+    ///
+    /// Protocol:
+    /// 1. A task entering construction clones the `Arc<Mutex<()>>`, incrementing
+    ///    strong_count to >= 2 (HashMap entry + task clone).
+    /// 2. On completion, if strong_count == 2 and the entry is still *our* Arc
+    ///    (checked via `Arc::ptr_eq`), no other task is waiting, so we remove it.
+    /// 3. If another task is waiting (strong_count > 2), we leave cleanup to the
+    ///    last finisher. The worst case of a missed cleanup is a stale empty Mutex
+    ///    in the HashMap, which is harmless.
     construction_locks: Arc<Mutex<HashMap<DigestInfo, Arc<Mutex<()>>>>>,
-    /// CAS store for fetching directories
+    /// CAS store for fetching directories (used as fallback in construct_directory_impl)
     cas_store: Store,
+    /// Concrete FastSlowStore for the fast `download_to_directory` path.
+    /// When available, cache-miss construction uses batch RPCs instead of
+    /// serial per-file fetches.
+    fast_slow_store: Option<Arc<FastSlowStore>>,
+    /// Concrete FilesystemStore (the fast store inside FastSlowStore).
+    /// Required for hardlinking files from the CAS to the cache directory.
+    filesystem_store: Option<Arc<FilesystemStore>>,
+    /// Subtree index: maps each directory digest to its absolute path on disk
+    /// within a cached entry. This allows partial reuse of cached subtrees
+    /// when a new root digest is requested that shares subtrees with an
+    /// already-cached root.
+    ///
+    /// Updated when cache entries are inserted or evicted.
+    subtree_index: RwLock<HashMap<DigestInfo, PathBuf>>,
+    /// Reference count for each subtree digest across all cached entries.
+    /// When a digest's count drops to zero, it is truly removed and should
+    /// be reported in the "removed" delta.
+    subtree_refcount: RwLock<HashMap<DigestInfo, usize>>,
+    /// Pending subtree digest changes since the last `take_pending_subtree_changes()` call.
+    /// Protected by a Mutex for interior mutability from insertion/eviction paths.
+    pending_subtree_changes: Mutex<PendingSubtreeChanges>,
+    /// Cumulative hit count for stats logging
+    hit_count: AtomicU64,
+    /// Cumulative miss count for stats logging
+    miss_count: AtomicU64,
+    /// Cumulative subtree hit count for stats logging
+    subtree_hit_count: AtomicU64,
+}
+
+/// Accumulated subtree digest changes between periodic reports.
+#[derive(Debug, Default)]
+pub struct PendingSubtreeChanges {
+    /// Subtree digests added since last report.
+    pub added: HashSet<DigestInfo>,
+    /// Subtree digests removed since last report (only those no longer in ANY cached entry).
+    pub removed: HashSet<DigestInfo>,
 }
 
 impl DirectoryCache {
-    /// Creates a new `DirectoryCache`
-    pub async fn new(config: DirectoryCacheConfig, cas_store: Store) -> Result<Self, Error> {
+    /// Creates a new `DirectoryCache`.
+    ///
+    /// If `fast_slow_store` is provided, cache-miss construction will use the
+    /// fast batch `download_to_directory` path (GetTree + BatchReadBlobs +
+    /// parallel hardlinks). Otherwise falls back to the serial
+    /// `construct_directory_impl` method.
+    pub async fn new(
+        config: DirectoryCacheConfig,
+        cas_store: Store,
+        fast_slow_store: Option<Arc<FastSlowStore>>,
+    ) -> Result<Self, Error> {
         // Ensure cache root exists
         fs::create_dir_all(&config.cache_root).await.err_tip(|| {
             format!(
@@ -98,59 +285,291 @@ impl DirectoryCache {
             )
         })?;
 
+        // Try to extract the FilesystemStore from the FastSlowStore if provided.
+        let filesystem_store = fast_slow_store.as_ref().and_then(|fss| {
+            fss.fast_store()
+                .downcast_ref::<FilesystemStore>(None)
+                .and_then(|fs| fs.get_arc())
+        });
+
+        let has_fast_path = fast_slow_store.is_some() && filesystem_store.is_some();
+
+        if has_fast_path {
+            info!(
+                cache_root = %config.cache_root.display(),
+                max_entries = config.max_entries,
+                max_size_bytes = config.max_size_bytes,
+                fast_path = true,
+                "DirectoryCache initialized: using fast download_to_directory path for cache misses",
+            );
+        } else if fast_slow_store.is_some() {
+            warn!(
+                cache_root = %config.cache_root.display(),
+                max_entries = config.max_entries,
+                max_size_bytes = config.max_size_bytes,
+                "DirectoryCache initialized: FastSlowStore provided but could not extract FilesystemStore; falling back to serial construction",
+            );
+        } else {
+            info!(
+                cache_root = %config.cache_root.display(),
+                max_entries = config.max_entries,
+                max_size_bytes = config.max_size_bytes,
+                fast_path = false,
+                "DirectoryCache initialized: no FastSlowStore, using serial construction",
+            );
+        }
+
+        let mut initial_cache = HashMap::new();
+        let mut initial_subtree_index = HashMap::new();
+        let mut initial_subtree_refcount: HashMap<DigestInfo, usize> = HashMap::new();
+
+        // Check cache format version. If stale or missing, wipe the cache.
+        let version_path = config.cache_root.join(CACHE_VERSION_FILENAME);
+        let version_ok = match fs::read_to_string(&version_path).await {
+            Ok(v) => v.trim().parse::<u32>().ok() == Some(CACHE_FORMAT_VERSION),
+            Err(_) => false,
+        };
+        if !version_ok {
+            info!(
+                expected = CACHE_FORMAT_VERSION,
+                "DirectoryCache: format version mismatch, clearing stale entries",
+            );
+            if let Ok(mut entries) = fs::read_dir(&config.cache_root).await {
+                while let Ok(Some(entry)) = entries.next_entry().await {
+                    let p = entry.path();
+                    if let Ok(meta) = fs::symlink_metadata(&p).await {
+                        if meta.is_dir() {
+                            // Only chmod directories writable, not files (which
+                            // are hardlinked to CAS). On unix, directory write
+                            // permission is sufficient to unlink files.
+                            Self::remove_readonly_dir(&p).await;
+                        } else {
+                            drop(fs::remove_file(&p).await);
+                        }
+                    }
+                }
+            }
+            fs::write(&version_path, format!("{CACHE_FORMAT_VERSION}\n"))
+                .await
+                .err_tip(|| "Failed to write cache version file")?;
+        }
+
+        // Load existing cache entries from disk on startup.
+        let load_start = Instant::now();
+        let mut loaded_count = 0u64;
+        let mut loaded_subtrees = 0u64;
+        let mut loaded_errors = 0u64;
+        if let Ok(mut entries) = fs::read_dir(&config.cache_root).await {
+            while let Ok(Some(entry)) = entries.next_entry().await {
+                let entry_name = entry.file_name().to_string_lossy().to_string();
+                // Skip temp directories and the merkle metadata files
+                if entry_name.starts_with(".tmp-") || entry_name == MERKLE_METADATA_FILENAME {
+                    continue;
+                }
+                let entry_path = entry.path();
+                let Ok(metadata) = fs::symlink_metadata(&entry_path).await else {
+                    continue;
+                };
+                if !metadata.is_dir() {
+                    continue;
+                }
+
+                // Try to parse the entry name as a DigestInfo
+                let Some(digest) = Self::parse_digest_from_dirname(&entry_name) else {
+                    debug!(name = %entry_name, "Skipping non-digest cache directory entry");
+                    continue;
+                };
+
+                // Calculate the directory size
+                let size = match Self::set_readonly_and_calculate_size(&entry_path).await {
+                    Ok(s) => s,
+                    Err(e) => {
+                        warn!(
+                            name = %entry_name,
+                            ?e,
+                            "Failed to calculate size for existing cache entry, skipping",
+                        );
+                        loaded_errors += 1;
+                        continue;
+                    }
+                };
+
+                // Load merkle tree metadata if available
+                let merkle_path = entry_path.join(MERKLE_METADATA_FILENAME);
+                if let Ok(data) = fs::read_to_string(&merkle_path).await {
+                    match MerkleTreeMetadata::deserialize(&data) {
+                        Ok(merkle) => {
+                            for (sub_digest, relpath) in &merkle.digest_to_relpath {
+                                let abs_path = if relpath.is_empty() {
+                                    entry_path.clone()
+                                } else {
+                                    entry_path.join(relpath)
+                                };
+                                initial_subtree_index.insert(*sub_digest, abs_path);
+                                *initial_subtree_refcount.entry(*sub_digest).or_insert(0) += 1;
+                                loaded_subtrees += 1;
+                            }
+                        }
+                        Err(e) => {
+                            debug!(
+                                name = %entry_name,
+                                ?e,
+                                "Failed to parse merkle metadata, subtrees won't be indexed",
+                            );
+                        }
+                    }
+                }
+
+                let now_millis = SystemTime::now()
+                    .duration_since(SystemTime::UNIX_EPOCH)
+                    .unwrap_or_default()
+                    .as_millis() as u64;
+
+                initial_cache.insert(
+                    digest,
+                    CachedDirectoryMetadata {
+                        path: entry_path,
+                        size,
+                        last_access_millis: AtomicU64::new(now_millis),
+                        ref_count: AtomicUsize::new(0),
+                    },
+                );
+                loaded_count += 1;
+            }
+        }
+
+        let load_elapsed = load_start.elapsed();
+        if loaded_count > 0 || loaded_errors > 0 {
+            info!(
+                loaded_entries = loaded_count,
+                loaded_subtrees,
+                load_errors = loaded_errors,
+                elapsed_ms = load_elapsed.as_millis() as u64,
+                "DirectoryCache: loaded existing entries from disk on startup",
+            );
+        }
+
         Ok(Self {
             config,
-            cache: Arc::new(RwLock::new(HashMap::new())),
+            cache: Arc::new(RwLock::new(initial_cache)),
             construction_locks: Arc::new(Mutex::new(HashMap::new())),
             cas_store,
+            fast_slow_store,
+            filesystem_store,
+            subtree_index: RwLock::new(initial_subtree_index),
+            subtree_refcount: RwLock::new(initial_subtree_refcount),
+            pending_subtree_changes: Mutex::new(PendingSubtreeChanges::default()),
+            hit_count: AtomicU64::new(0),
+            miss_count: AtomicU64::new(0),
+            subtree_hit_count: AtomicU64::new(0),
         })
     }
 
-    /// Gets or creates a directory in the cache, then hardlinks it to the destination
+    /// Returns the digests of all currently cached input root directories.
+    /// The scheduler uses this to give routing preference to workers that
+    /// already have an action's input_root_digest cached.
+    pub async fn cached_digests(&self) -> Vec<DigestInfo> {
+        let cache = self.cache.read().await;
+        cache.keys().copied().collect()
+    }
+
+    /// Returns ALL subtree digests currently tracked across all cached entries.
+    /// Used for the initial full snapshot on (re)connect.
+    pub async fn all_subtree_digests(&self) -> Vec<DigestInfo> {
+        let refcount = self.subtree_refcount.read().await;
+        refcount.keys().copied().collect()
+    }
+
+    /// Atomically takes the pending subtree changes since the last call,
+    /// returning (added, removed) digest lists and clearing the internal state.
+    pub async fn take_pending_subtree_changes(&self) -> (Vec<DigestInfo>, Vec<DigestInfo>) {
+        let mut pending = self.pending_subtree_changes.lock().await;
+        let added: Vec<DigestInfo> = pending.added.drain().collect();
+        let removed: Vec<DigestInfo> = pending.removed.drain().collect();
+        (added, removed)
+    }
+
+    /// Records that subtree digests from a merkle tree were added (new cache entry).
+    /// Increments refcounts and records newly-appearing digests in pending added.
+    async fn record_subtree_insertion(&self, merkle: &MerkleTreeMetadata) {
+        let mut refcount = self.subtree_refcount.write().await;
+        let mut pending = self.pending_subtree_changes.lock().await;
+        for sub_digest in merkle.digest_to_relpath.keys() {
+            let count = refcount.entry(*sub_digest).or_insert(0);
+            if *count == 0 {
+                // This digest is newly appearing across all cached entries.
+                pending.added.insert(*sub_digest);
+                // If it was in the removed set (evicted then re-added before
+                // the delta was taken), cancel it out.
+                pending.removed.remove(sub_digest);
+            }
+            *count += 1;
+        }
+    }
+
+    /// Records that subtree digests from a merkle tree were removed (evicted cache entry).
+    /// Decrements refcounts and records fully-removed digests in pending removed.
+    async fn record_subtree_removal(&self, merkle_digests: &[DigestInfo]) {
+        let mut refcount = self.subtree_refcount.write().await;
+        let mut pending = self.pending_subtree_changes.lock().await;
+        for sub_digest in merkle_digests {
+            if let Some(count) = refcount.get_mut(sub_digest) {
+                *count = count.saturating_sub(1);
+                if *count == 0 {
+                    refcount.remove(sub_digest);
+                    // This digest is no longer in ANY cached entry.
+                    pending.removed.insert(*sub_digest);
+                    // If it was in the added set (added then evicted before
+                    // the delta was taken), cancel it out.
+                    pending.added.remove(sub_digest);
+                }
+            }
+        }
+    }
+
+    /// Gets or creates a directory in the cache, then hardlinks it to the destination.
     ///
     /// # Arguments
     /// * `digest` - Digest of the root Directory proto
-    /// * `dest_path` - Where to hardlink/create the directory
+    /// * `dest_path` - Where to hardlink/create the directory (may already exist)
     ///
     /// # Returns
     /// * `Ok(true)` - Cache hit (directory was hardlinked)
-    /// * `Ok(false)` - Cache miss (directory was constructed)
+    /// * `Ok(false)` - Cache miss (directory was constructed and cached)
     /// * `Err` - Error during construction or hardlinking
     pub async fn get_or_create(&self, digest: DigestInfo, dest_path: &Path) -> Result<bool, Error> {
-        // Fast path: check if already in cache
-        {
-            let mut cache = self.cache.write().await;
-            if let Some(metadata) = cache.get_mut(&digest) {
-                // Update access time and ref count
-                metadata.last_access = SystemTime::now();
-                metadata.ref_count += 1;
-
-                debug!(
-                    ?digest,
-                    path = ?metadata.path,
-                    "Directory cache HIT"
-                );
-
-                // Try to hardlink from cache
-                match hardlink_directory_tree(&metadata.path, dest_path).await {
-                    Ok(()) => {
-                        metadata.ref_count -= 1;
-                        return Ok(true);
-                    }
-                    Err(e) => {
-                        warn!(
-                            ?digest,
-                            error = ?e,
-                            "Failed to hardlink from cache, will reconstruct"
-                        );
-                        metadata.ref_count -= 1;
-                        // Fall through to reconstruction
-                    }
-                }
-            }
+        let overall_start = Instant::now();
+
+        // Fast path: check if already in cache (read lock only for the lookup)
+        if self.try_hardlink_cached(&digest, dest_path).await? {
+            let hits = self.hit_count.fetch_add(1, Ordering::Relaxed) + 1;
+            let misses = self.miss_count.load(Ordering::Relaxed);
+            let total = hits + misses;
+            let hit_rate = if total > 0 { (hits as f64 / total as f64) * 100.0 } else { 0.0 };
+            debug!(
+                hash = %&digest.packed_hash().to_string()[..12],
+                elapsed_ms = overall_start.elapsed().as_millis() as u64,
+                hits,
+                misses,
+                hit_rate = format!("{hit_rate:.1}%"),
+                "DirectoryCache HIT (hardlinked from cache)",
+            );
+            return Ok(true);
         }
 
-        debug!(?digest, "Directory cache MISS");
+        let misses = self.miss_count.fetch_add(1, Ordering::Relaxed) + 1;
+        let hits = self.hit_count.load(Ordering::Relaxed);
+        let total = hits + misses;
+        let hit_rate = if total > 0 { (hits as f64 / total as f64) * 100.0 } else { 0.0 };
+        debug!(
+            hash = %&digest.packed_hash().to_string()[..12],
+            size_bytes = digest.size_bytes(),
+            hits,
+            misses,
+            hit_rate = format!("{hit_rate:.1}%"),
+            has_fast_path = self.fast_slow_store.is_some() && self.filesystem_store.is_some(),
+            "DirectoryCache MISS, starting construction",
+        );
 
         // Get or create construction lock to prevent stampede
         let construction_lock = {
@@ -164,163 +583,1077 @@ impl DirectoryCache {
         // Only one task constructs at a time for this digest
         let _guard = construction_lock.lock().await;
 
-        // Check again in case another task just constructed it
-        {
-            let cache = self.cache.read().await;
-            if let Some(metadata) = cache.get(&digest) {
-                return match hardlink_directory_tree(&metadata.path, dest_path).await {
-                    Ok(()) => Ok(true),
+        // Double-check after acquiring lock — another task may have just constructed it
+        if self.try_hardlink_cached(&digest, dest_path).await? {
+            self.cleanup_construction_lock(&digest, &construction_lock);
+            return Ok(true);
+        }
+
+        // Construct in a temp path, rename to final path on success.
+        // This prevents orphaned partial directories on failure.
+        let cache_path = self.get_cache_path(&digest);
+        let temp_path = self.config.cache_root.join(format!(
+            ".tmp-{digest}-{}-{}",
+            std::process::id(),
+            self.next_temp_id(),
+        ));
+
+        // Clean up any stale temp path from a previous crashed attempt
+        drop(fs::remove_dir_all(&temp_path).await);
+
+        let construction_result: Result<u64, Error> = async {
+            fs::create_dir_all(&temp_path).await.err_tip(|| {
+                format!("Failed to create temp dir: {}", temp_path.display())
+            })?;
+
+            // Step 1: Resolve the merkle tree if we have a FastSlowStore.
+            // This gives us the full directory tree structure, which we use for:
+            //   (a) subtree matching against the subtree_index
+            //   (b) storing merkle metadata alongside the cache entry
+            let resolved_tree = if let Some(fss) = &self.fast_slow_store {
+                match crate::running_actions_manager::resolve_directory_tree(fss, &digest).await {
+                    Ok(tree) => Some(tree),
                     Err(e) => {
                         warn!(
-                            ?digest,
-                            error = ?e,
-                            "Failed to hardlink after construction"
+                            hash = %&digest.packed_hash().to_string()[..12],
+                            ?e,
+                            "DirectoryCache: failed to resolve directory tree, skipping subtree matching",
                         );
-                        // Construct directly at dest_path
-                        self.construct_directory(digest, dest_path).await?;
-                        Ok(false)
+                        None
                     }
-                };
+                }
+            } else {
+                None
+            };
+
+            // Step 2: Check for cached subtrees and construct a partial build plan.
+            // A "subtree hit" means a directory node in the requested tree is
+            // already materialized on disk from a different cached root. We can
+            // symlink to it instead of downloading.
+            let subtree_hits: HashMap<DigestInfo, PathBuf> = if let Some(tree) = &resolved_tree {
+                let index = self.subtree_index.read().await;
+                let mut hits = HashMap::new();
+                for dir_digest in tree.keys() {
+                    // Don't count the root itself (that's a full cache hit, handled above)
+                    if *dir_digest == digest {
+                        continue;
+                    }
+                    if let Some(cached_path) = index.get(dir_digest) {
+                        // Verify the cached path still exists on disk
+                        if cached_path.exists() {
+                            hits.insert(*dir_digest, cached_path.clone());
+                        }
+                    }
+                }
+                hits
+            } else {
+                HashMap::new()
+            };
+
+            if !subtree_hits.is_empty() {
+                let subtree_count = subtree_hits.len();
+                let total_dirs = resolved_tree.as_ref().map_or(0, |t| t.len());
+                self.subtree_hit_count.fetch_add(subtree_count as u64, Ordering::Relaxed);
+                debug!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    subtree_hits = subtree_count,
+                    total_dirs,
+                    "DirectoryCache: found cached subtrees, will symlink instead of downloading",
+                );
             }
-        }
 
-        // Construct the directory in cache
-        let cache_path = self.get_cache_path(&digest);
-        self.construct_directory(digest, &cache_path).await?;
+            // Step 3: Build the directory tree.
+            // If we have subtree hits and a resolved tree, use subtree-aware
+            // construction. Otherwise, fall back to full construction.
+            if let Some(tree) = &resolved_tree {
+                if !subtree_hits.is_empty() {
+                    // Subtree-aware construction: walk the tree, symlink cached
+                    // subtrees, and only download uncached portions.
+                    self.construct_with_subtrees(
+                        &digest,
+                        tree,
+                        &subtree_hits,
+                        &temp_path,
+                    )
+                    .await
+                    .err_tip(|| "Failed subtree-aware construction")?;
+                } else {
+                    // No subtree hits -- use fast download_to_directory if available.
+                    self.construct_full(&digest, &temp_path).await
+                        .err_tip(|| "Failed full construction")?;
+                }
+            } else {
+                // No resolved tree -- use full construction.
+                self.construct_full(&digest, &temp_path).await
+                    .err_tip(|| "Failed full construction (no resolved tree)")?;
+            }
 
-        // Make it read-only to prevent modifications
-        set_readonly_recursive(&cache_path)
-            .await
-            .err_tip(|| "Failed to set cache directory to readonly")?;
+            // Step 4: Store merkle tree metadata alongside the cache entry.
+            if let Some(tree) = &resolved_tree {
+                let merkle_meta = MerkleTreeMetadata::from_directory_tree(tree, &digest);
+                let merkle_path = temp_path.join(MERKLE_METADATA_FILENAME);
+                let serialized = merkle_meta.serialize();
+                if let Err(e) = fs::write(&merkle_path, serialized.as_bytes()).await {
+                    warn!(
+                        hash = %&digest.packed_hash().to_string()[..12],
+                        ?e,
+                        "DirectoryCache: failed to write merkle metadata, subtrees won't be indexed",
+                    );
+                }
+            }
 
-        // Calculate size
-        let size = nativelink_util::fs_util::calculate_directory_size(&cache_path)
-            .await
-            .err_tip(|| "Failed to calculate directory size")?;
+            // Combined walk: set read-only permissions and calculate size in one pass.
+            let readonly_start = Instant::now();
+            let size = Self::set_readonly_and_calculate_size(&temp_path).await
+                .err_tip(|| "Failed to set readonly and calculate size for cache directory")?;
+            debug!(
+                hash = %&digest.packed_hash().to_string()[..12],
+                size_bytes = size,
+                size_mb = format!("{:.2}", size as f64 / (1024.0 * 1024.0)),
+                elapsed_ms = readonly_start.elapsed().as_millis() as u64,
+                "DirectoryCache: set_readonly_and_calculate_size completed",
+            );
+            // macOS requires the source directory to be writable for rename(2).
+            // Temporarily restore write permission on the root, rename, then
+            // lock it down again.
+            #[cfg(unix)]
+            {
+                use std::os::unix::fs::PermissionsExt;
+                let mut perms = fs::metadata(&temp_path).await
+                    .err_tip(|| "Failed to get temp dir metadata before rename")?
+                    .permissions();
+                perms.set_mode(0o755);
+                fs::set_permissions(&temp_path, perms).await
+                    .err_tip(|| "Failed to make temp dir writable before rename")?;
+            }
+            fs::rename(&temp_path, &cache_path).await.err_tip(|| {
+                format!(
+                    "Failed to rename temp dir {} to cache path {}",
+                    temp_path.display(),
+                    cache_path.display()
+                )
+            })?;
+            #[cfg(unix)]
+            {
+                use std::os::unix::fs::PermissionsExt;
+                let mut perms = fs::metadata(&cache_path).await
+                    .err_tip(|| "Failed to get cache dir metadata after rename")?
+                    .permissions();
+                perms.set_mode(0o555);
+                fs::set_permissions(&cache_path, perms).await
+                    .err_tip(|| "Failed to lock down cache dir after rename")?;
+            }
 
-        // Add to cache
-        {
-            let mut cache = self.cache.write().await;
+            // Step 5: Update the subtree index with all directories from this entry,
+            // and record the insertion for delta reporting.
+            if let Some(tree) = &resolved_tree {
+                let merkle_meta = MerkleTreeMetadata::from_directory_tree(tree, &digest);
+                let mut index = self.subtree_index.write().await;
+                for (sub_digest, relpath) in &merkle_meta.digest_to_relpath {
+                    let abs_path = if relpath.is_empty() {
+                        cache_path.clone()
+                    } else {
+                        cache_path.join(relpath)
+                    };
+                    index.insert(*sub_digest, abs_path);
+                }
+                drop(index);
+                self.record_subtree_insertion(&merkle_meta).await;
+            }
 
-            // Evict if necessary
-            self.evict_if_needed(size, &mut cache).await?;
+            Ok(size)
+        }
+        .await;
+
+        let size = match construction_result {
+            Ok(s) => s,
+            Err(e) => {
+                warn!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    ?e,
+                    elapsed_ms = overall_start.elapsed().as_millis() as u64,
+                    "DirectoryCache MISS construction FAILED",
+                );
+                Self::remove_readonly_dir(&temp_path).await;
+                self.cleanup_construction_lock(&digest, &construction_lock);
+                return Err(e);
+            }
+        };
 
+        // Insert with ref_count=1 to prevent eviction during hardlink.
+        // Collect eviction candidates while holding the lock, then delete outside.
+        let (evicted_paths, cache_entries, cache_total_size) = {
+            let mut cache = self.cache.write().await;
+            let evicted = self.collect_evictions(size, &mut cache);
             cache.insert(
                 digest,
                 CachedDirectoryMetadata {
                     path: cache_path.clone(),
                     size,
-                    last_access: SystemTime::now(),
-                    ref_count: 0,
+                    last_access_millis: AtomicU64::new(
+                        SystemTime::now()
+                            .duration_since(SystemTime::UNIX_EPOCH)
+                            .unwrap_or_default()
+                            .as_millis() as u64,
+                    ),
+                    ref_count: AtomicUsize::new(1),
                 },
             );
+            let total_size: u64 = cache.values().map(|m| m.size).sum();
+            (evicted, cache.len(), total_size)
+        };
+
+        debug!(
+            hash = %&digest.packed_hash().to_string()[..12],
+            size_bytes = size,
+            size_mb = format!("{:.2}", size as f64 / (1024.0 * 1024.0)),
+            cache_entries,
+            cache_total_size_mb = format!("{:.2}", cache_total_size as f64 / (1024.0 * 1024.0)),
+            evicted_count = evicted_paths.len(),
+            elapsed_ms = overall_start.elapsed().as_millis() as u64,
+            "DirectoryCache MISS construction complete, inserted into cache",
+        );
+
+        // Delete evicted directories outside the lock.
+        // Cached directories are read-only (0o555/0o444), so we must make them
+        // writable before removal. Also clean up the subtree index.
+        if !evicted_paths.is_empty() {
+            let mut index = self.subtree_index.write().await;
+            for path in &evicted_paths {
+                self.remove_subtree_index_for_path(path, &mut index).await;
+            }
+            drop(index);
+            for path in evicted_paths {
+                Self::remove_readonly_dir(&path).await;
+            }
         }
 
-        // Hardlink to destination
-        hardlink_directory_tree(&cache_path, dest_path)
-            .await
-            .err_tip(|| "Failed to hardlink newly cached directory")?;
+        // Hardlink to destination (safe — ref_count=1 prevents eviction)
+        let hardlink_start = Instant::now();
+        let hardlink_result = hardlink_directory_tree(&cache_path, dest_path).await;
+        let hardlink_elapsed = hardlink_start.elapsed();
+
+        // Decrement ref_count regardless of hardlink result
+        {
+            let cache = self.cache.read().await;
+            if let Some(metadata) = cache.get(&digest) {
+                metadata.ref_count.fetch_sub(1, Ordering::Relaxed);
+            }
+        }
+
+        // Drop the construction lock guard before cleanup
+        drop(_guard);
+        self.cleanup_construction_lock(&digest, &construction_lock);
+
+        match &hardlink_result {
+            Ok(()) => {
+                debug!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    hardlink_ms = hardlink_elapsed.as_millis() as u64,
+                    total_ms = overall_start.elapsed().as_millis() as u64,
+                    "DirectoryCache: hardlinked newly constructed directory to dest",
+                );
+            }
+            Err(e) => {
+                warn!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    ?e,
+                    hardlink_ms = hardlink_elapsed.as_millis() as u64,
+                    "DirectoryCache: failed to hardlink newly constructed directory to dest",
+                );
+            }
+        }
+
+        hardlink_result.err_tip(|| "Failed to hardlink newly cached directory")?;
 
         Ok(false)
     }
 
-    /// Constructs a directory from the CAS at the given path
-    fn construct_directory<'a>(
-        &'a self,
-        digest: DigestInfo,
-        dest_path: &'a Path,
-    ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>> {
-        Box::pin(async move {
-            debug!(?digest, ?dest_path, "Constructing directory");
+    /// Attempts to hardlink a cached directory to dest, guarding eviction with ref_count.
+    /// Returns `Ok(true)` on cache hit + successful hardlink, `Ok(false)` on cache miss
+    /// or failed hardlink (caller should fall through to reconstruction).
+    async fn try_hardlink_cached(
+        &self,
+        digest: &DigestInfo,
+        dest_path: &Path,
+    ) -> Result<bool, Error> {
+        let (src_path, cached_size) = {
+            // Read lock is sufficient — ref_count and last_access are atomic.
+            let cache = self.cache.read().await;
+            let Some(metadata) = cache.get(digest) else {
+                debug!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    "DirectoryCache: not in cache (miss)",
+                );
+                return Ok(false);
+            };
+            metadata.touch();
+            metadata.ref_count.fetch_add(1, Ordering::Relaxed);
+            (metadata.path.clone(), metadata.size)
+        };
 
-            // Fetch the Directory proto
-            let directory: ProtoDirectory = get_and_decode_digest(&self.cas_store, digest.into())
-                .await
-                .err_tip(|| format!("Failed to fetch directory digest: {digest:?}"))?;
+        debug!(
+            hash = %&digest.packed_hash().to_string()[..12],
+            cached_size_bytes = cached_size,
+            "DirectoryCache: found in cache, hardlinking",
+        );
 
-            // Create the destination directory
-            fs::create_dir_all(dest_path)
-                .await
-                .err_tip(|| format!("Failed to create directory: {}", dest_path.display()))?;
+        let hardlink_start = Instant::now();
+        let result = hardlink_directory_tree(&src_path, dest_path).await;
+        let hardlink_elapsed = hardlink_start.elapsed();
 
-            // Process files
-            for file in &directory.files {
-                self.create_file(dest_path, file).await?;
+        // Always decrement ref_count
+        {
+            let cache = self.cache.read().await;
+            if let Some(metadata) = cache.get(digest) {
+                metadata.ref_count.fetch_sub(1, Ordering::Relaxed);
             }
+        }
 
-            // Process subdirectories recursively
-            for dir_node in &directory.directories {
-                self.create_subdirectory(dest_path, dir_node).await?;
+        match result {
+            Ok(()) => {
+                debug!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    cached_size_bytes = cached_size,
+                    hardlink_ms = hardlink_elapsed.as_millis() as u64,
+                    "DirectoryCache: hardlink from cache succeeded",
+                );
+                Ok(true)
+            }
+            Err(e) => {
+                warn!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    error = ?e,
+                    hardlink_ms = hardlink_elapsed.as_millis() as u64,
+                    "DirectoryCache: hardlink from cache FAILED, will reconstruct",
+                );
+                Ok(false)
+            }
+        }
+    }
+
+    /// Removes the construction lock entry if no other task is waiting on it.
+    fn cleanup_construction_lock(&self, digest: &DigestInfo, lock: &Arc<Mutex<()>>) {
+        // Acquire the outer mutex to make the check+remove atomic with respect
+        // to new tasks cloning from the HashMap.
+        if let Ok(mut locks) = self.construction_locks.try_lock() {
+            // Only remove if the entry is still *our* lock (not a replacement)
+            // and no other task is holding a clone.
+            if let Some(existing) = locks.get(digest) {
+                if Arc::ptr_eq(existing, lock) && Arc::strong_count(lock) <= 2 {
+                    locks.remove(digest);
+                }
             }
+        }
+    }
 
-            // Process symlinks
-            for symlink in &directory.symlinks {
-                self.create_symlink(dest_path, symlink).await?;
+    /// Recursively removes a read-only directory by first restoring write
+    /// permissions on directories. Files are NOT chmoded because they are
+    /// hardlinked to CAS entries — changing their mode would corrupt the
+    /// shared inode's permissions for all concurrent actions.
+    /// On unix, only the parent directory needs write permission to unlink files.
+    async fn remove_readonly_dir(path: &Path) {
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt;
+            if let Ok(metadata) = fs::symlink_metadata(path).await {
+                if metadata.is_dir() {
+                    drop(fs::set_permissions(path, std::fs::Permissions::from_mode(0o755)).await);
+                    if let Ok(mut entries) = fs::read_dir(path).await {
+                        while let Ok(Some(entry)) = entries.next_entry().await {
+                            if let Ok(meta) = fs::symlink_metadata(entry.path()).await {
+                                if meta.is_dir() {
+                                    Box::pin(Self::remove_readonly_dir(&entry.path())).await;
+                                }
+                                // Do NOT chmod files — they are hardlinked to CAS.
+                            }
+                        }
+                    }
+                }
             }
+        }
 
-            Ok(())
-        })
+        if let Err(e) = fs::remove_dir_all(path).await {
+            warn!(path = ?path, error = ?e, "Failed to remove evicted directory from disk");
+        }
     }
 
-    /// Creates a file from a `FileNode`
-    async fn create_file(&self, parent: &Path, file_node: &FileNode) -> Result<(), Error> {
-        let file_path = parent.join(&file_node.name);
-        let digest = DigestInfo::try_from(
-            file_node
-                .digest
-                .clone()
-                .ok_or_else(|| make_err!(Code::InvalidArgument, "File node missing digest"))?,
-        )
-        .err_tip(|| "Invalid file digest")?;
+    /// Monotonically increasing counter for unique temp paths.
+    fn next_temp_id(&self) -> u64 {
+        use std::sync::atomic::AtomicU64 as StaticAtomicU64;
+        static COUNTER: StaticAtomicU64 = StaticAtomicU64::new(0);
+        COUNTER.fetch_add(1, Ordering::Relaxed)
+    }
 
-        trace!(?file_path, ?digest, "Creating file");
+    /// Validates that a node name is a single safe path component.
+    /// Rejects path separators, traversal components, empty names, and null bytes.
+    fn validate_node_name(name: &str) -> Result<(), Error> {
+        if name.is_empty()
+            || name == "."
+            || name == ".."
+            || name.contains('/')
+            || name.contains('\\')
+            || name.contains('\0')
+        {
+            return Err(make_err!(
+                Code::InvalidArgument,
+                "Invalid node name in Directory proto: {:?}",
+                name
+            ));
+        }
+        Ok(())
+    }
 
-        // Fetch file content from CAS
-        let data = self
-            .cas_store
-            .get_part_unchunked(StoreKey::Digest(digest), 0, None)
-            .await
-            .err_tip(|| format!("Failed to fetch file: {}", file_path.display()))?;
+    /// Validates that a symlink target does not escape the workspace root.
+    /// Rejects absolute paths. For relative paths, verifies the resolved path
+    /// stays within the workspace by counting `..` components.
+    fn validate_symlink_target(target: &str, depth: usize) -> Result<(), Error> {
+        if target.is_empty() || target.contains('\0') {
+            return Err(make_err!(
+                Code::InvalidArgument,
+                "Invalid symlink target: {:?}",
+                target
+            ));
+        }
 
-        // Write to disk
-        fs::write(&file_path, data.as_ref())
-            .await
-            .err_tip(|| format!("Failed to write file: {}", file_path.display()))?;
+        // Reject absolute symlink targets
+        if target.starts_with('/') || target.starts_with('\\') {
+            return Err(make_err!(
+                Code::InvalidArgument,
+                "Absolute symlink target not allowed: {:?}",
+                target
+            ));
+        }
 
-        // Set permissions
-        #[cfg(unix)]
-        if file_node.is_executable {
-            use std::os::unix::fs::PermissionsExt;
-            let mut perms = fs::metadata(&file_path)
-                .await
-                .err_tip(|| "Failed to get file metadata")?
-                .permissions();
-            perms.set_mode(0o755);
-            fs::set_permissions(&file_path, perms)
-                .await
-                .err_tip(|| "Failed to set file permissions")?;
+        // Count net upward traversals. `depth` is how deep we are in the tree.
+        let mut net_up: usize = 0;
+        for component in target.split('/') {
+            match component {
+                ".." => {
+                    net_up += 1;
+                    if net_up > depth {
+                        return Err(make_err!(
+                            Code::InvalidArgument,
+                            "Symlink target escapes workspace root: {:?}",
+                            target
+                        ));
+                    }
+                }
+                "" | "." => {}
+                _ => {
+                    net_up = net_up.saturating_sub(1);
+                }
+            }
         }
 
         Ok(())
     }
 
-    /// Creates a subdirectory from a `DirectoryNode`
-    async fn create_subdirectory(
-        &self,
-        parent: &Path,
-        dir_node: &DirectoryNode,
-    ) -> Result<(), Error> {
+    /// Walks a directory tree, setting all entries to read-only and computing
+    /// the total file size in a single traversal (avoiding two separate walks).
+    /// Directories are set to 0o555, files have write bits stripped.
+    fn set_readonly_and_calculate_size<'a>(
+        path: &'a Path,
+    ) -> Pin<Box<dyn Future<Output = Result<u64, Error>> + Send + 'a>> {
+        Box::pin(async move {
+            let metadata = fs::symlink_metadata(path)
+                .await
+                .err_tip(|| format!("Failed to get metadata for: {}", path.display()))?;
+
+            // Skip symlinks -- do not follow them or change permissions.
+            if metadata.is_symlink() {
+                return Ok(0);
+            }
+
+            if metadata.is_dir() {
+                let mut entries = fs::read_dir(path)
+                    .await
+                    .err_tip(|| format!("Failed to read directory: {}", path.display()))?;
+
+                let mut total_size = 0u64;
+                while let Some(entry) = entries
+                    .next_entry()
+                    .await
+                    .err_tip(|| format!("Failed to get next entry in: {}", path.display()))?
+                {
+                    total_size += Self::set_readonly_and_calculate_size(&entry.path()).await?;
+                }
+
+                // Set directory to read-only (0o555) to protect cache integrity.
+                // Since we use hardlinks (not symlinks), actions never access
+                // cached directories directly — they get fresh writable copies.
+                #[cfg(unix)]
+                {
+                    use std::os::unix::fs::PermissionsExt;
+                    let mut perms = metadata.permissions();
+                    perms.set_mode(0o555);
+                    fs::set_permissions(path, perms)
+                        .await
+                        .err_tip(|| format!("Failed to set permissions for: {}", path.display()))?;
+                }
+                #[cfg(windows)]
+                {
+                    let mut perms = metadata.permissions();
+                    perms.set_readonly(true);
+                    fs::set_permissions(path, perms)
+                        .await
+                        .err_tip(|| format!("Failed to set permissions for: {}", path.display()))?;
+                }
+
+                Ok(total_size)
+            } else if metadata.is_file() {
+                let size = metadata.len();
+
+                // Ensure all cached files are 0o555 (read+execute, no write).
+                // This both protects cache integrity and ensures shell scripts
+                // remain executable. Old CAS files with 0o644 become 0o555.
+                #[cfg(unix)]
+                {
+                    use std::os::unix::fs::PermissionsExt;
+                    let current_mode = metadata.permissions().mode() & 0o777;
+                    if current_mode != 0o555 {
+                        let mut perms = metadata.permissions();
+                        perms.set_mode(0o555);
+                        fs::set_permissions(path, perms)
+                            .await
+                            .err_tip(|| format!("Failed to set permissions for: {}", path.display()))?;
+                    }
+                }
+                #[cfg(windows)]
+                {
+                    let mut perms = metadata.permissions();
+                    perms.set_readonly(true);
+                    fs::set_permissions(path, perms)
+                        .await
+                        .err_tip(|| format!("Failed to set permissions for: {}", path.display()))?;
+                }
+
+                Ok(size)
+            } else {
+                Ok(0)
+            }
+        })
+    }
+
+    /// Full construction path: tries fast download_to_directory, falls back to serial.
+    /// Used when there are no subtree hits.
+    async fn construct_full(&self, digest: &DigestInfo, temp_path: &Path) -> Result<(), Error> {
+        // Try the fast batch path first if concrete stores are available.
+        let fast_path_result = if let (Some(fss), Some(_fs_store)) =
+            (&self.fast_slow_store, &self.filesystem_store)
+        {
+            let fs_pin = Pin::new(
+                fss.fast_store()
+                    .downcast_ref::<FilesystemStore>(None)
+                    .err_tip(|| "Could not downcast fast store to FilesystemStore")?,
+            );
+            let temp_str = temp_path.to_string_lossy().to_string();
+            debug!(
+                hash = %&digest.packed_hash().to_string()[..12],
+                "DirectoryCache: fast download_to_directory starting",
+            );
+            let construction_start = Instant::now();
+            let result = crate::running_actions_manager::download_to_directory(
+                fss, fs_pin, digest, &temp_str,
+            )
+            .await;
+            let elapsed = construction_start.elapsed();
+            match &result {
+                Ok(()) => {
+                    debug!(
+                        hash = %&digest.packed_hash().to_string()[..12],
+                        elapsed_ms = elapsed.as_millis() as u64,
+                        "DirectoryCache: fast download_to_directory completed",
+                    );
+                    Some(Ok(()))
+                }
+                Err(e) => {
+                    warn!(
+                        hash = %&digest.packed_hash().to_string()[..12],
+                        ?e,
+                        elapsed_ms = elapsed.as_millis() as u64,
+                        "DirectoryCache: fast download_to_directory failed, trying serial fallback",
+                    );
+                    // Clean up the partial temp directory before fallback
+                    drop(fs::remove_dir_all(temp_path).await);
+                    drop(fs::create_dir_all(temp_path).await);
+                    Some(Err(e.clone()))
+                }
+            }
+        } else {
+            None
+        };
+
+        // Use the fast path result, or fall back to serial construction.
+        match fast_path_result {
+            Some(Ok(())) => Ok(()),
+            Some(Err(_)) | None => {
+                if fast_path_result.is_none() {
+                    debug!(
+                        hash = %&digest.packed_hash().to_string()[..12],
+                        "DirectoryCache: using serial construct_directory_impl (no fast path available)",
+                    );
+                }
+                let serial_start = Instant::now();
+                self.construct_directory(*digest, temp_path).await
+                    .err_tip(|| "Failed to construct directory for cache")?;
+                debug!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    elapsed_ms = serial_start.elapsed().as_millis() as u64,
+                    "DirectoryCache: serial construct_directory_impl completed",
+                );
+                Ok(())
+            }
+        }
+    }
+
+    /// Subtree-aware construction: walks the resolved directory tree, creates
+    /// hardlinked subtrees for cached portions, and only downloads uncached
+    /// portions via `download_to_directory` or serial fallback.
+    ///
+    /// Uses file hardlinks (creating fresh directories) rather than directory
+    /// symlinks because Bazel actions create output directories inside the
+    /// input tree — symlinks would mutate the cache.
+    async fn construct_with_subtrees(
+        &self,
+        root_digest: &DigestInfo,
+        tree: &HashMap<DigestInfo, ProtoDirectory>,
+        subtree_hits: &HashMap<DigestInfo, PathBuf>,
+        dest_path: &Path,
+    ) -> Result<(), Error> {
+        let construction_start = Instant::now();
+
+        // BFS walk of the tree, creating directories and symlinks.
+        // When we encounter a subtree hit, we create a directory symlink and
+        // skip its entire subtree (no need to traverse children).
+        let mut queue = VecDeque::new();
+        queue.push_back((*root_digest, dest_path.to_path_buf()));
+
+        let mut dirs_created = 0usize;
+        let mut subtrees_linked = 0usize;
+        let mut files_to_download = Vec::new();
+        let mut symlinks_to_create: Vec<(String, PathBuf)> = Vec::new();
+
+        while let Some((dir_digest, dir_path)) = queue.pop_front() {
+            let directory = tree.get(&dir_digest).ok_or_else(|| {
+                make_err!(
+                    Code::Internal,
+                    "Directory {:?} not found in resolved tree during subtree construction",
+                    dir_digest
+                )
+            })?;
+
+            // Process subdirectories
+            for subdir_node in &directory.directories {
+                Self::validate_node_name(&subdir_node.name)?;
+                let child_digest: DigestInfo = subdir_node
+                    .digest
+                    .as_ref()
+                    .ok_or_else(|| {
+                        make_err!(Code::InvalidArgument, "Directory node missing digest")
+                    })?
+                    .try_into()
+                    .err_tip(|| "Invalid directory digest in subtree construction")?;
+
+                let child_path = dir_path.join(&subdir_node.name);
+
+                if let Some(cached_path) = subtree_hits.get(&child_digest) {
+                    // Subtree hit: hardlink files from cached subtree into
+                    // fresh writable directories. We can't use directory symlinks
+                    // because Bazel creates output directories inside the input
+                    // tree, which would mutate the cache.
+                    match hardlink_directory_tree(cached_path, &child_path).await {
+                        Ok(()) => {
+                            subtrees_linked += 1;
+                            debug!(
+                                child_hash = %&child_digest.packed_hash().to_string()[..12],
+                                src = %cached_path.display(),
+                                dst = %child_path.display(),
+                                "DirectoryCache: hardlinked cached subtree",
+                            );
+                            // Do NOT enqueue children -- the hardlink covers the entire subtree.
+                            continue;
+                        }
+                        Err(e) => {
+                            // The cached subtree was evicted between our
+                            // exists() check and now. Fall back to creating
+                            // the directory and downloading its contents.
+                            warn!(
+                                child_hash = %&child_digest.packed_hash().to_string()[..12],
+                                src = %cached_path.display(),
+                                ?e,
+                                "DirectoryCache: subtree evicted during construction, falling back to download",
+                            );
+                        }
+                    }
+                }
+
+                // No subtree hit (or subtree evicted) -- create the directory and recurse.
+                fs::create_dir_all(&child_path).await.err_tip(|| {
+                    format!("Failed to create directory: {}", child_path.display())
+                })?;
+                dirs_created += 1;
+                queue.push_back((child_digest, child_path));
+            }
+
+            // Collect files that need to be downloaded for this (non-symlinked) directory.
+            for file_node in &directory.files {
+                Self::validate_node_name(&file_node.name)?;
+                let file_digest: DigestInfo = file_node
+                    .digest
+                    .as_ref()
+                    .ok_or_else(|| {
+                        make_err!(Code::InvalidArgument, "File node missing digest")
+                    })?
+                    .try_into()
+                    .err_tip(|| "Invalid file digest in subtree construction")?;
+
+                let file_path = dir_path.join(&file_node.name);
+                files_to_download.push((file_digest, file_path, file_node.is_executable));
+            }
+
+            // Collect symlinks from the proto
+            for symlink_node in &directory.symlinks {
+                Self::validate_node_name(&symlink_node.name)?;
+                let link_path = dir_path.join(&symlink_node.name);
+                symlinks_to_create.push((symlink_node.target.clone(), link_path));
+            }
+        }
+
+        debug!(
+            hash = %&root_digest.packed_hash().to_string()[..12],
+            dirs_created,
+            subtrees_linked,
+            files_to_download = files_to_download.len(),
+            symlinks = symlinks_to_create.len(),
+            "DirectoryCache: subtree-aware construction plan",
+        );
+
+        // Create symlinks from the proto
+        #[cfg(target_family = "unix")]
+        for (target, link_path) in &symlinks_to_create {
+            fs::symlink(target, link_path)
+                .await
+                .err_tip(|| format!("Failed to create symlink: {} -> {}", link_path.display(), target))?;
+        }
+
+        // Download uncached files.
+        // If we have a FastSlowStore + FilesystemStore, use hardlinks from CAS.
+        // Otherwise fall back to serial CAS fetch.
+        if !files_to_download.is_empty() {
+            if let (Some(fss), Some(_fs_store)) = (&self.fast_slow_store, &self.filesystem_store) {
+                let fs_store_pin = Pin::new(
+                    fss.fast_store()
+                        .downcast_ref::<FilesystemStore>(None)
+                        .err_tip(|| "Could not downcast fast store to FilesystemStore")?,
+                );
+
+                // Check which blobs are already in the fast store.
+                // Skip zero-byte digests — they aren't stored in FilesystemStore.
+                let unique_digests: Vec<DigestInfo> = {
+                    let mut seen = HashSet::new();
+                    files_to_download
+                        .iter()
+                        .filter_map(|(d, _, _)| {
+                            if d.size_bytes() > 0 && seen.insert(*d) { Some(*d) } else { None }
+                        })
+                        .collect()
+                };
+                let store_keys: Vec<StoreKey<'_>> =
+                    unique_digests.iter().map(|d| (*d).into()).collect();
+                let mut has_results = vec![None; store_keys.len()];
+                Pin::new(fss.fast_store())
+                    .has_with_results(&store_keys, &mut has_results)
+                    .await
+                    .err_tip(|| "Batch has_with_results in subtree construction")?;
+
+                // Populate missing blobs into the fast store.
+                let missing: Vec<&DigestInfo> = unique_digests
+                    .iter()
+                    .zip(has_results.iter())
+                    .filter_map(|(d, r)| if r.is_none() { Some(d) } else { None })
+                    .collect();
+
+                if !missing.is_empty() {
+                    debug!(
+                        hash = %&root_digest.packed_hash().to_string()[..12],
+                        missing = missing.len(),
+                        "DirectoryCache: fetching missing blobs for uncached files",
+                    );
+                    for d in &missing {
+                        let key: StoreKey<'_> = (**d).into();
+                        fss.populate_fast_store(key).await
+                            .err_tip(|| format!("Failed to populate fast store for {:?}", d))?;
+                    }
+                }
+
+                // Hardlink files from the fast store to their destination paths.
+                for (file_digest, file_path, is_executable) in &files_to_download {
+                    if file_digest.size_bytes() == 0 {
+                        // Zero-byte files aren't stored in FilesystemStore.
+                        // Create them directly.
+                        fs::write(&file_path, b"")
+                            .await
+                            .err_tip(|| format!("Failed to create empty file: {}", file_path.display()))?;
+                    } else {
+                        let file_entry = fs_store_pin
+                            .get_file_entry_for_digest(file_digest)
+                            .await
+                            .err_tip(|| format!("Getting file entry for {:?}", file_digest))?;
+                        let dest = file_path.clone();
+                        file_entry
+                            .get_file_path_locked(|src_path| async move {
+                                fs::hard_link(&src_path, &dest)
+                                    .await
+                                    .err_tip(|| format!(
+                                        "Failed to hardlink {:?} to {}",
+                                        src_path,
+                                        dest.display(),
+                                    ))
+                            })
+                            .await?;
+                    }
+
+                    // Ensure all files have 0o555. CAS files ingested before the
+                    // 0o555 default may still be 0o644; we must fix them here since
+                    // hardlinks share the inode and set_readonly_and_calculate_size
+                    // would turn 0o644 into 0o444 (no execute), breaking shell scripts.
+                    #[cfg(unix)]
+                    {
+                        use std::os::unix::fs::PermissionsExt;
+                        let meta = fs::metadata(&file_path).await
+                            .err_tip(|| "Failed to get file metadata for permission fix")?;
+                        let current_mode = meta.permissions().mode() & 0o777;
+                        let new_mode = if *is_executable {
+                            current_mode | 0o111
+                        } else {
+                            0o555
+                        };
+                        if new_mode != current_mode {
+                            let mut perms = meta.permissions();
+                            perms.set_mode(new_mode);
+                            fs::set_permissions(&file_path, perms).await
+                                .err_tip(|| "Failed to set file permission")?;
+                        }
+                    }
+                }
+            } else {
+                // Serial fallback: fetch each file from CAS individually.
+                for (file_digest, file_path, _is_executable) in &files_to_download {
+                    let data = self
+                        .cas_store
+                        .get_part_unchunked(StoreKey::Digest(*file_digest), 0, None)
+                        .await
+                        .err_tip(|| format!("Failed to fetch file: {}", file_path.display()))?;
+                    fs::write(&file_path, data.as_ref())
+                        .await
+                        .err_tip(|| format!("Failed to write file: {}", file_path.display()))?;
+
+                    // Always set 0o555 to match CAS defaults (see create_file).
+                    #[cfg(unix)]
+                    {
+                        use std::os::unix::fs::PermissionsExt;
+                        let mut perms = fs::metadata(&file_path).await
+                            .err_tip(|| "Failed to get file metadata")?
+                            .permissions();
+                        perms.set_mode(0o555);
+                        fs::set_permissions(&file_path, perms).await
+                            .err_tip(|| "Failed to set file permissions")?;
+                    }
+                }
+            }
+        }
+
+        let elapsed = construction_start.elapsed();
+        debug!(
+            hash = %&root_digest.packed_hash().to_string()[..12],
+            dirs_created,
+            subtrees_linked,
+            files_downloaded = files_to_download.len(),
+            elapsed_ms = elapsed.as_millis() as u64,
+            "DirectoryCache: subtree-aware construction completed",
+        );
+
+        Ok(())
+    }
+
+    /// Removes subtree index entries that belong to a given cache entry path.
+    /// Loads the merkle metadata file from the cache entry to determine which
+    /// digests to remove. Also decrements subtree refcounts and records
+    /// fully-removed digests for delta reporting.
+    async fn remove_subtree_index_for_path(
+        &self,
+        cache_entry_path: &Path,
+        index: &mut HashMap<DigestInfo, PathBuf>,
+    ) {
+        let merkle_path = cache_entry_path.join(MERKLE_METADATA_FILENAME);
+        if let Ok(data) = fs::read_to_string(&merkle_path).await {
+            if let Ok(merkle) = MerkleTreeMetadata::deserialize(&data) {
+                let mut removed = 0usize;
+                let merkle_digests: Vec<DigestInfo> =
+                    merkle.digest_to_relpath.keys().copied().collect();
+                for (sub_digest, relpath) in &merkle.digest_to_relpath {
+                    // Only remove if the index entry points to this specific cache entry.
+                    let abs_path = if relpath.is_empty() {
+                        cache_entry_path.to_path_buf()
+                    } else {
+                        cache_entry_path.join(relpath)
+                    };
+                    if let Some(existing) = index.get(sub_digest) {
+                        if *existing == abs_path {
+                            index.remove(sub_digest);
+                            removed += 1;
+                        }
+                    }
+                }
+                // Record subtree removals for delta reporting.
+                // This decrements refcounts and only marks digests as removed
+                // when they are no longer present in ANY cached entry.
+                self.record_subtree_removal(&merkle_digests).await;
+                debug!(
+                    path = %cache_entry_path.display(),
+                    removed_subtrees = removed,
+                    "DirectoryCache: cleaned up subtree index for evicted entry",
+                );
+            }
+        }
+    }
+
+    /// Try to parse a directory entry name as a DigestInfo.
+    /// Expected format is the same as `DigestInfo::to_string()`,
+    /// i.e., `{hash}-{size_bytes}`.
+    fn parse_digest_from_dirname(name: &str) -> Option<DigestInfo> {
+        // DigestInfo::to_string() produces "{hash}-{size}", so split on the last '-'
+        let last_dash = name.rfind('-')?;
+        let hash = &name[..last_dash];
+        let size_str = &name[last_dash + 1..];
+        let size: i64 = size_str.parse().ok()?;
+        DigestInfo::try_new(hash, size).ok()
+    }
+
+    /// Constructs a directory from the CAS at the given path.
+    /// `depth` tracks nesting depth for symlink target validation.
+    fn construct_directory_impl<'a>(
+        &'a self,
+        digest: DigestInfo,
+        dest_path: &'a Path,
+        depth: usize,
+    ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>> {
+        Box::pin(async move {
+            debug!(?digest, ?dest_path, "Constructing directory");
+
+            // Fetch the Directory proto
+            let directory: ProtoDirectory = get_and_decode_digest(&self.cas_store, digest.into())
+                .await
+                .err_tip(|| format!("Failed to fetch directory digest: {digest:?}"))?;
+
+            // Create the destination directory
+            fs::create_dir_all(dest_path)
+                .await
+                .err_tip(|| format!("Failed to create directory: {}", dest_path.display()))?;
+
+            // Process files
+            for file in &directory.files {
+                Self::validate_node_name(&file.name)?;
+                self.create_file(dest_path, file).await?;
+            }
+
+            // Process subdirectories recursively
+            for dir_node in &directory.directories {
+                Self::validate_node_name(&dir_node.name)?;
+                self.create_subdirectory(dest_path, dir_node, depth + 1)
+                    .await?;
+            }
+
+            // Process symlinks
+            for symlink in &directory.symlinks {
+                Self::validate_node_name(&symlink.name)?;
+                Self::validate_symlink_target(&symlink.target, depth)?;
+                self.create_symlink(dest_path, symlink).await?;
+            }
+
+            Ok(())
+        })
+    }
+
+    /// Constructs a directory from the CAS at the given path
+    fn construct_directory<'a>(
+        &'a self,
+        digest: DigestInfo,
+        dest_path: &'a Path,
+    ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>> {
+        self.construct_directory_impl(digest, dest_path, 0)
+    }
+
+    /// Creates a file from a `FileNode`
+    async fn create_file(&self, parent: &Path, file_node: &FileNode) -> Result<(), Error> {
+        let file_path = parent.join(&file_node.name);
+        let digest = DigestInfo::try_from(
+            file_node
+                .digest
+                .as_ref()
+                .ok_or_else(|| make_err!(Code::InvalidArgument, "File node missing digest"))?
+                .clone(),
+        )
+        .err_tip(|| "Invalid file digest")?;
+
+        trace!(?file_path, ?digest, "Creating file");
+
+        // Fetch file content from CAS
+        let data = self
+            .cas_store
+            .get_part_unchunked(StoreKey::Digest(digest), 0, None)
+            .await
+            .err_tip(|| format!("Failed to fetch file: {}", file_path.display()))?;
+
+        // Write to disk
+        fs::write(&file_path, data.as_ref())
+            .await
+            .err_tip(|| format!("Failed to write file: {}", file_path.display()))?;
+
+        // Always set 0o555 to match CAS store defaults. Some build tools
+        // (rules_cc, rules_rust) set is_executable=false on shell scripts
+        // that must be executable; 0o555 as the base avoids EPERM.
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::PermissionsExt;
+            let mut perms = fs::metadata(&file_path)
+                .await
+                .err_tip(|| "Failed to get file metadata")?
+                .permissions();
+            perms.set_mode(0o555);
+            fs::set_permissions(&file_path, perms)
+                .await
+                .err_tip(|| "Failed to set file permissions")?;
+        }
+
+        Ok(())
+    }
+
+    /// Creates a subdirectory from a `DirectoryNode`
+    async fn create_subdirectory(
+        &self,
+        parent: &Path,
+        dir_node: &DirectoryNode,
+        depth: usize,
+    ) -> Result<(), Error> {
         let dir_path = parent.join(&dir_node.name);
-        let digest =
-            DigestInfo::try_from(dir_node.digest.clone().ok_or_else(|| {
-                make_err!(Code::InvalidArgument, "Directory node missing digest")
-            })?)
-            .err_tip(|| "Invalid directory digest")?;
+        let digest = DigestInfo::try_from(
+            dir_node
+                .digest
+                .as_ref()
+                .ok_or_else(|| {
+                    make_err!(Code::InvalidArgument, "Directory node missing digest")
+                })?
+                .clone(),
+        )
+        .err_tip(|| "Invalid directory digest")?;
 
         trace!(?dir_path, ?digest, "Creating subdirectory");
 
         // Recursively construct subdirectory
-        self.construct_directory(digest, &dir_path).await
+        self.construct_directory_impl(digest, &dir_path, depth)
+            .await
     }
 
     /// Creates a symlink from a `SymlinkNode`
@@ -347,74 +1680,107 @@ impl DirectoryCache {
         Ok(())
     }
 
-    /// Evicts entries if cache is too full
-    async fn evict_if_needed(
+    /// Collects entries to evict to make room for `incoming_size` bytes.
+    /// Removes them from the HashMap and returns their paths for disk cleanup.
+    /// This is called while holding the write lock; actual disk I/O happens after
+    /// the lock is released.
+    fn collect_evictions(
         &self,
         incoming_size: u64,
         cache: &mut HashMap<DigestInfo, CachedDirectoryMetadata>,
-    ) -> Result<(), Error> {
-        // Check entry count
+    ) -> Vec<PathBuf> {
+        let mut evicted_paths = Vec::new();
+
+        // Evict by entry count
         while cache.len() >= self.config.max_entries {
-            self.evict_lru(cache).await?;
+            if let Some((path, digest, size)) = self.evict_lru_entry(cache) {
+                debug!(
+                    hash = %&digest.packed_hash().to_string()[..12],
+                    size_bytes = size,
+                    reason = "count_limit",
+                    entries_remaining = cache.len(),
+                    max_entries = self.config.max_entries,
+                    "DirectoryCache: evicting entry",
+                );
+                evicted_paths.push(path);
+            } else {
+                warn!(
+                    entries = cache.len(),
+                    max = self.config.max_entries,
+                    "DirectoryCache: over entry limit but all entries are in use"
+                );
+                break;
+            }
         }
 
-        // Check total size
+        // Evict by size
         if self.config.max_size_bytes > 0 {
-            let current_size: u64 = cache.values().map(|m| m.size).sum();
-            let mut size_after = current_size + incoming_size;
-
-            while size_after > self.config.max_size_bytes {
-                let evicted_size = self.evict_lru(cache).await?;
-                size_after -= evicted_size;
+            loop {
+                let current_size: u64 = cache.values().map(|m| m.size).sum();
+                if current_size + incoming_size <= self.config.max_size_bytes {
+                    break;
+                }
+                if let Some((path, digest, size)) = self.evict_lru_entry(cache) {
+                    debug!(
+                        hash = %&digest.packed_hash().to_string()[..12],
+                        size_bytes = size,
+                        size_freed_mb = format!("{:.2}", size as f64 / (1024.0 * 1024.0)),
+                        reason = "size_limit",
+                        entries_remaining = cache.len(),
+                        current_total_mb = format!("{:.2}", cache.values().map(|m| m.size).sum::<u64>() as f64 / (1024.0 * 1024.0)),
+                        max_size_mb = format!("{:.2}", self.config.max_size_bytes as f64 / (1024.0 * 1024.0)),
+                        "DirectoryCache: evicting entry",
+                    );
+                    evicted_paths.push(path);
+                } else {
+                    warn!(
+                        current_size = current_size + incoming_size,
+                        max = self.config.max_size_bytes,
+                        "DirectoryCache: over size limit but all entries are in use"
+                    );
+                    break;
+                }
             }
         }
 
-        Ok(())
+        evicted_paths
     }
 
-    /// Evicts the least recently used entry
-    async fn evict_lru(
+    /// Removes the LRU entry with ref_count == 0 from the cache HashMap.
+    /// Returns the evicted entry's (path, digest, size) for logging and disk
+    /// cleanup, or `None` if no evictable entry exists.
+    fn evict_lru_entry(
         &self,
         cache: &mut HashMap<DigestInfo, CachedDirectoryMetadata>,
-    ) -> Result<u64, Error> {
-        // Find LRU entry that isn't currently in use
+    ) -> Option<(PathBuf, DigestInfo, u64)> {
         let to_evict = cache
             .iter()
-            .filter(|(_, m)| m.ref_count == 0)
-            .min_by_key(|(_, m)| m.last_access)
+            .filter(|(_, m)| m.ref_count.load(Ordering::Relaxed) == 0)
+            .min_by_key(|(_, m)| m.last_access_millis.load(Ordering::Relaxed))
             .map(|(digest, _)| *digest);
 
         if let Some(digest) = to_evict {
             if let Some(metadata) = cache.remove(&digest) {
-                debug!(?digest, size = metadata.size, "Evicting cached directory");
-
-                // Remove from disk
-                if let Err(e) = fs::remove_dir_all(&metadata.path).await {
-                    warn!(
-                        ?digest,
-                        path = ?metadata.path,
-                        error = ?e,
-                        "Failed to remove evicted directory from disk"
-                    );
-                }
-
-                return Ok(metadata.size);
+                return Some((metadata.path, digest, metadata.size));
             }
         }
 
-        Ok(0)
+        None
     }
 
     /// Gets the cache path for a digest
     fn get_cache_path(&self, digest: &DigestInfo) -> PathBuf {
-        self.config.cache_root.join(format!("{digest}"))
+        self.config.cache_root.join(digest.to_string())
     }
 
     /// Returns cache statistics
     pub async fn stats(&self) -> CacheStats {
         let cache = self.cache.read().await;
         let total_size: u64 = cache.values().map(|m| m.size).sum();
-        let in_use = cache.values().filter(|m| m.ref_count > 0).count();
+        let in_use = cache
+            .values()
+            .filter(|m| m.ref_count.load(Ordering::Relaxed) > 0)
+            .count();
 
         CacheStats {
             entries: cache.len(),
@@ -493,6 +1859,83 @@ mod tests {
         (store, dir_digest)
     }
 
+    /// Creates a store with two different directory digests for eviction testing.
+    async fn setup_two_digest_store() -> (Store, DigestInfo, DigestInfo) {
+        let store = Store::new(MemoryStore::new(&Default::default()));
+
+        // File A
+        let content_a = b"File A content";
+        let digest_a = DigestInfo::try_new(
+            "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2",
+            content_a.len() as i64,
+        )
+        .unwrap();
+        store
+            .as_store_driver_pin()
+            .update_oneshot(digest_a.into(), content_a.to_vec().into())
+            .await
+            .unwrap();
+
+        // Directory A
+        let dir_a = ProtoDirectory {
+            files: vec![FileNode {
+                name: "a.txt".to_string(),
+                digest: Some(digest_a.into()),
+                ..Default::default()
+            }],
+            ..Default::default()
+        };
+        let mut dir_a_data = Vec::new();
+        dir_a.encode(&mut dir_a_data).unwrap();
+        let dir_digest_a = DigestInfo::try_new(
+            "aaaa567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
+            dir_a_data.len() as i64,
+        )
+        .unwrap();
+        store
+            .as_store_driver_pin()
+            .update_oneshot(dir_digest_a.into(), dir_a_data.into())
+            .await
+            .unwrap();
+
+        // File B
+        let content_b = b"File B content!!";
+        let digest_b = DigestInfo::try_new(
+            "b1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6b1b2",
+            content_b.len() as i64,
+        )
+        .unwrap();
+        store
+            .as_store_driver_pin()
+            .update_oneshot(digest_b.into(), content_b.to_vec().into())
+            .await
+            .unwrap();
+
+        // Directory B
+        let dir_b = ProtoDirectory {
+            files: vec![FileNode {
+                name: "b.txt".to_string(),
+                digest: Some(digest_b.into()),
+                ..Default::default()
+            }],
+            ..Default::default()
+        };
+        let mut dir_b_data = Vec::new();
+        dir_b.encode(&mut dir_b_data).unwrap();
+        let dir_digest_b = DigestInfo::try_new(
+            "bbbb567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
+            dir_b_data.len() as i64,
+        )
+        .unwrap();
+        store
+            .as_store_driver_pin()
+            .update_oneshot(dir_digest_b.into(), dir_b_data.into())
+            .await
+            .unwrap();
+
+        (store, dir_digest_a, dir_digest_b)
+    }
+
     #[tokio::test]
     async fn test_directory_cache_basic() -> Result<(), Error> {
         let temp_dir = TempDir::new().unwrap();
@@ -505,7 +1948,7 @@ mod tests {
             cache_root,
         };
 
-        let cache = DirectoryCache::new(config, store).await?;
+        let cache = DirectoryCache::new(config, store, None).await?;
 
         // First access - cache miss
         let dest1 = temp_dir.path().join("dest1");
@@ -525,4 +1968,692 @@ mod tests {
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_hardlink_into_existing_directory() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, dir_digest) = setup_test_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 10,
+            max_size_bytes: 1024 * 1024,
+            cache_root,
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        // Pre-create destination directory (simulates work_directory already existing)
+        let dest = temp_dir.path().join("existing_dest");
+        fs::create_dir(&dest).await.unwrap();
+
+        // Should succeed even though dest already exists (Bug 1 fix)
+        let hit = cache.get_or_create(dir_digest, &dest).await?;
+        assert!(!hit, "First access should be cache miss");
+        assert!(dest.join("test.txt").exists());
+
+        // Cache hit into another pre-existing directory
+        let dest2 = temp_dir.path().join("existing_dest2");
+        fs::create_dir(&dest2).await.unwrap();
+        let hit = cache.get_or_create(dir_digest, &dest2).await?;
+        assert!(hit, "Second access should be cache hit");
+        assert!(dest2.join("test.txt").exists());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_construction_failure_cleanup() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+
+        // Create a store with no data — construction will fail when fetching the digest
+        let store = Store::new(MemoryStore::new(&Default::default()));
+
+        let bogus_digest = DigestInfo::try_new(
+            "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+            42,
+        )
+        .unwrap();
+
+        let config = DirectoryCacheConfig {
+            max_entries: 10,
+            max_size_bytes: 1024 * 1024,
+            cache_root: cache_root.clone(),
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        let dest = temp_dir.path().join("dest");
+        let result = cache.get_or_create(bogus_digest, &dest).await;
+        assert!(result.is_err(), "Should fail when digest not in store");
+
+        // Bug 2 fix: No orphaned temp directories should remain
+        let mut entries = fs::read_dir(&cache_root).await.unwrap();
+        let mut leftover = Vec::new();
+        while let Some(entry) = entries.next_entry().await.unwrap() {
+            leftover.push(entry.file_name().to_string_lossy().to_string());
+        }
+        assert!(
+            leftover.is_empty(),
+            "No orphaned temp dirs should remain in cache_root, found: {leftover:?}"
+        );
+
+        // Verify construction lock was cleaned up (Bug 3 fix)
+        let locks = cache.construction_locks.lock().await;
+        assert!(
+            locks.is_empty(),
+            "Construction lock should be cleaned up after failure"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_eviction_all_in_use() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, dir_digest) = setup_test_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 1,
+            max_size_bytes: 0,
+            cache_root,
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        // Fill the cache
+        let dest1 = temp_dir.path().join("dest1");
+        cache.get_or_create(dir_digest, &dest1).await?;
+
+        // Simulate all entries being in-use
+        {
+            let cache_map = cache.cache.read().await;
+            if let Some(metadata) = cache_map.get(&dir_digest) {
+                metadata.ref_count.store(1, Ordering::Relaxed);
+            }
+        }
+
+        // Bug 4 fix: collect_evictions should not loop infinitely.
+        {
+            let mut cache_map = cache.cache.write().await;
+            let evicted = cache.collect_evictions(100, &mut cache_map);
+            assert!(evicted.is_empty(), "Nothing should be evictable");
+            assert_eq!(cache_map.len(), 1, "Entry should still be present");
+        }
+
+        // Clean up ref_count
+        {
+            let cache_map = cache.cache.read().await;
+            if let Some(metadata) = cache_map.get(&dir_digest) {
+                metadata.ref_count.store(0, Ordering::Relaxed);
+            }
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn test_concurrent_same_digest() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, dir_digest) = setup_test_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 10,
+            max_size_bytes: 1024 * 1024,
+            cache_root,
+        };
+
+        let cache = Arc::new(DirectoryCache::new(config, store, None).await?);
+
+        // Spawn multiple concurrent requests for the same digest
+        let mut handles = Vec::new();
+        for i in 0..5 {
+            let cache = Arc::clone(&cache);
+            let dest = temp_dir.path().join(format!("concurrent_dest_{i}"));
+            handles.push(tokio::spawn(async move {
+                cache.get_or_create(dir_digest, &dest).await
+            }));
+        }
+
+        let mut hits = 0;
+        let mut misses = 0;
+        for handle in handles {
+            let result = handle.await.unwrap()?;
+            if result {
+                hits += 1;
+            } else {
+                misses += 1;
+            }
+        }
+
+        // Exactly one task should construct (miss), the rest should hit cache
+        assert_eq!(misses, 1, "Exactly one task should construct the directory");
+        assert_eq!(hits, 4, "Other tasks should get cache hits");
+
+        // Verify only one cache entry exists
+        let stats = cache.stats().await;
+        assert_eq!(stats.entries, 1);
+        assert_eq!(stats.in_use_entries, 0, "All ref_counts should be back to 0");
+
+        // Verify construction locks are cleaned up (Bug 3)
+        let locks = cache.construction_locks.lock().await;
+        assert!(
+            locks.is_empty(),
+            "Construction locks should be cleaned up, found: {}",
+            locks.len()
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_construction_lock_cleanup() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, dir_digest) = setup_test_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 10,
+            max_size_bytes: 1024 * 1024,
+            cache_root,
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        let dest = temp_dir.path().join("dest");
+        cache.get_or_create(dir_digest, &dest).await?;
+
+        let locks = cache.construction_locks.lock().await;
+        assert!(
+            locks.is_empty(),
+            "Construction lock should be removed after get_or_create completes"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_eviction_removes_oldest_entry() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, digest_a, digest_b) = setup_two_digest_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 1, // Only 1 entry allowed
+            max_size_bytes: 0,
+            cache_root: cache_root.clone(),
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        // Insert entry A
+        let dest_a = temp_dir.path().join("dest_a");
+        cache.get_or_create(digest_a, &dest_a).await?;
+        assert_eq!(cache.stats().await.entries, 1);
+
+        // Insert entry B — should evict A
+        let dest_b = temp_dir.path().join("dest_b");
+        cache.get_or_create(digest_b, &dest_b).await?;
+        assert_eq!(cache.stats().await.entries, 1);
+
+        // A's cache directory should be gone from disk
+        let cache_path_a = cache_root.join(digest_a.to_string());
+        assert!(
+            !cache_path_a.exists(),
+            "Evicted entry A should be removed from disk"
+        );
+
+        // B should be in cache
+        let cache_path_b = cache_root.join(digest_b.to_string());
+        assert!(cache_path_b.exists(), "Entry B should be on disk");
+
+        // Requesting A again should be a miss (reconstruct)
+        let dest_a2 = temp_dir.path().join("dest_a2");
+        let hit = cache.get_or_create(digest_a, &dest_a2).await?;
+        assert!(!hit, "A should be a cache miss after eviction");
+        assert!(dest_a2.join("a.txt").exists());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_path_traversal_rejected() -> Result<(), Error> {
+        // Test validate_node_name directly
+        assert!(DirectoryCache::validate_node_name("good_file.txt").is_ok());
+        assert!(DirectoryCache::validate_node_name("subdir").is_ok());
+
+        // These should all be rejected
+        assert!(DirectoryCache::validate_node_name("").is_err());
+        assert!(DirectoryCache::validate_node_name(".").is_err());
+        assert!(DirectoryCache::validate_node_name("..").is_err());
+        assert!(DirectoryCache::validate_node_name("../etc/passwd").is_err());
+        assert!(DirectoryCache::validate_node_name("/etc/passwd").is_err());
+        assert!(DirectoryCache::validate_node_name("foo/bar").is_err());
+        assert!(DirectoryCache::validate_node_name("foo\\bar").is_err());
+        assert!(DirectoryCache::validate_node_name("foo\0bar").is_err());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_symlink_target_validation() -> Result<(), Error> {
+        // Valid relative targets
+        assert!(DirectoryCache::validate_symlink_target("file.txt", 0).is_ok());
+        assert!(DirectoryCache::validate_symlink_target("subdir/file.txt", 0).is_ok());
+        assert!(DirectoryCache::validate_symlink_target("../sibling", 1).is_ok());
+
+        // Absolute targets rejected
+        assert!(DirectoryCache::validate_symlink_target("/etc/shadow", 0).is_err());
+        assert!(DirectoryCache::validate_symlink_target("\\windows\\system32", 0).is_err());
+
+        // Traversal beyond root rejected
+        assert!(DirectoryCache::validate_symlink_target("..", 0).is_err());
+        assert!(DirectoryCache::validate_symlink_target("../..", 1).is_err());
+        assert!(DirectoryCache::validate_symlink_target("../../escape", 1).is_err());
+
+        // Deep enough to allow traversal
+        assert!(DirectoryCache::validate_symlink_target("../..", 2).is_ok());
+
+        // Empty and null rejected
+        assert!(DirectoryCache::validate_symlink_target("", 0).is_err());
+        assert!(DirectoryCache::validate_symlink_target("foo\0bar", 0).is_err());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_path_traversal_in_directory_proto() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let store = Store::new(MemoryStore::new(&Default::default()));
+
+        // Create a malicious directory proto with a path-traversal file name
+        let file_content = b"malicious";
+        let file_digest = DigestInfo::try_new(
+            "c0535e4be2b79ffd93291305436bf889314e4a3faec05ecffcbb7df31ad9e51a",
+            9,
+        )
+        .unwrap();
+        store
+            .as_store_driver_pin()
+            .update_oneshot(file_digest.into(), file_content.to_vec().into())
+            .await
+            .unwrap();
+
+        let malicious_dir = ProtoDirectory {
+            files: vec![FileNode {
+                name: "../escape.txt".to_string(),
+                digest: Some(file_digest.into()),
+                ..Default::default()
+            }],
+            ..Default::default()
+        };
+        let mut dir_data = Vec::new();
+        malicious_dir.encode(&mut dir_data).unwrap();
+        let dir_digest = DigestInfo::try_new(
+            "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
+            dir_data.len() as i64,
+        )
+        .unwrap();
+        store
+            .as_store_driver_pin()
+            .update_oneshot(dir_digest.into(), dir_data.into())
+            .await
+            .unwrap();
+
+        let config = DirectoryCacheConfig {
+            max_entries: 10,
+            max_size_bytes: 1024 * 1024,
+            cache_root,
+        };
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        let dest = temp_dir.path().join("dest");
+        let result = cache.get_or_create(dir_digest, &dest).await;
+        assert!(result.is_err(), "Path traversal should be rejected");
+
+        // The escape file should NOT exist in the parent directory
+        assert!(
+            !temp_dir.path().join("escape.txt").exists(),
+            "Path traversal should not create files outside dest"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_absolute_symlink_rejected() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let store = Store::new(MemoryStore::new(&Default::default()));
+
+        let malicious_dir = ProtoDirectory {
+            symlinks: vec![SymlinkNode {
+                name: "evil_link".to_string(),
+                target: "/etc/shadow".to_string(),
+                ..Default::default()
+            }],
+            ..Default::default()
+        };
+        let mut dir_data = Vec::new();
+        malicious_dir.encode(&mut dir_data).unwrap();
+        let dir_digest = DigestInfo::try_new(
+            "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd",
+            dir_data.len() as i64,
+        )
+        .unwrap();
+        store
+            .as_store_driver_pin()
+            .update_oneshot(dir_digest.into(), dir_data.into())
+            .await
+            .unwrap();
+
+        let config = DirectoryCacheConfig {
+            max_entries: 10,
+            max_size_bytes: 1024 * 1024,
+            cache_root,
+        };
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        let dest = temp_dir.path().join("dest");
+        let result = cache.get_or_create(dir_digest, &dest).await;
+        assert!(result.is_err(), "Absolute symlink target should be rejected");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_ref_count_returns_to_zero_after_operations() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, dir_digest) = setup_test_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 10,
+            max_size_bytes: 1024 * 1024,
+            cache_root,
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        // Cache miss
+        let dest1 = temp_dir.path().join("dest1");
+        cache.get_or_create(dir_digest, &dest1).await?;
+
+        // Cache hit
+        let dest2 = temp_dir.path().join("dest2");
+        cache.get_or_create(dir_digest, &dest2).await?;
+
+        // ref_count should be 0 after both operations
+        let stats = cache.stats().await;
+        assert_eq!(stats.in_use_entries, 0, "ref_count should be 0 after all operations");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_size_based_eviction() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, digest_a, digest_b) = setup_two_digest_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 100,       // High entry limit
+            max_size_bytes: 20,     // Very small — forces size-based eviction
+            cache_root: cache_root.clone(),
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        // Insert entry A (14 bytes for "File A content")
+        let dest_a = temp_dir.path().join("dest_a");
+        cache.get_or_create(digest_a, &dest_a).await?;
+        assert_eq!(cache.stats().await.entries, 1);
+
+        // Insert entry B (16 bytes for "File B content!!") — total would be 30 > 20,
+        // so A should be evicted
+        let dest_b = temp_dir.path().join("dest_b");
+        cache.get_or_create(digest_b, &dest_b).await?;
+        assert_eq!(cache.stats().await.entries, 1);
+
+        // A should have been evicted
+        let cache_map = cache.cache.read().await;
+        assert!(
+            !cache_map.contains_key(&digest_a),
+            "Digest A should have been evicted due to size limit"
+        );
+        assert!(
+            cache_map.contains_key(&digest_b),
+            "Digest B should be present"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_merkle_tree_metadata_roundtrip() -> Result<(), Error> {
+        // Test serialization/deserialization of MerkleTreeMetadata
+        let mut digest_to_relpath = HashMap::new();
+        let d1 = DigestInfo::try_new(
+            "aaaa567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
+            100,
+        )
+        .unwrap();
+        let d2 = DigestInfo::try_new(
+            "bbbb567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
+            200,
+        )
+        .unwrap();
+
+        digest_to_relpath.insert(d1, String::new()); // root
+        digest_to_relpath.insert(d2, "subdir/nested".to_string());
+
+        let meta = MerkleTreeMetadata { digest_to_relpath };
+        let serialized = meta.serialize();
+        let deserialized = MerkleTreeMetadata::deserialize(&serialized)?;
+
+        assert_eq!(deserialized.digest_to_relpath.len(), 2);
+        assert_eq!(deserialized.digest_to_relpath.get(&d1).unwrap(), "");
+        assert_eq!(
+            deserialized.digest_to_relpath.get(&d2).unwrap(),
+            "subdir/nested"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_merkle_tree_metadata_from_directory_tree() -> Result<(), Error> {
+        // Build a small directory tree and verify MerkleTreeMetadata generation
+        let file_digest = DigestInfo::try_new(
+            "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
+            13,
+        )
+        .unwrap();
+
+        // Child directory
+        let child_dir = ProtoDirectory {
+            files: vec![FileNode {
+                name: "child_file.txt".to_string(),
+                digest: Some(file_digest.into()),
+                ..Default::default()
+            }],
+            ..Default::default()
+        };
+        let mut child_data = Vec::new();
+        child_dir.encode(&mut child_data).unwrap();
+        let child_digest = DigestInfo::try_new(
+            "cccc567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
+            child_data.len() as i64,
+        )
+        .unwrap();
+
+        // Root directory referencing the child
+        let root_dir = ProtoDirectory {
+            files: vec![FileNode {
+                name: "root_file.txt".to_string(),
+                digest: Some(file_digest.into()),
+                ..Default::default()
+            }],
+            directories: vec![DirectoryNode {
+                name: "child".to_string(),
+                digest: Some(child_digest.into()),
+            }],
+            ..Default::default()
+        };
+        let mut root_data = Vec::new();
+        root_dir.encode(&mut root_data).unwrap();
+        let root_digest = DigestInfo::try_new(
+            "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
+            root_data.len() as i64,
+        )
+        .unwrap();
+
+        let mut tree = HashMap::new();
+        tree.insert(root_digest, root_dir);
+        tree.insert(child_digest, child_dir);
+
+        let meta = MerkleTreeMetadata::from_directory_tree(&tree, &root_digest);
+        assert_eq!(meta.digest_to_relpath.len(), 2);
+        assert_eq!(meta.digest_to_relpath.get(&root_digest).unwrap(), "");
+        assert_eq!(meta.digest_to_relpath.get(&child_digest).unwrap(), "child");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_parse_digest_from_dirname() -> Result<(), Error> {
+        // Valid format: hash-size
+        let name = "aaaa567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef-100";
+        let parsed = DirectoryCache::parse_digest_from_dirname(name);
+        assert!(parsed.is_some());
+        let d = parsed.unwrap();
+        assert_eq!(d.size_bytes(), 100);
+
+        // Invalid: no dash
+        assert!(DirectoryCache::parse_digest_from_dirname("nodashhere").is_none());
+
+        // Invalid: not a number after dash
+        assert!(DirectoryCache::parse_digest_from_dirname("hash-notanumber").is_none());
+
+        // Invalid: empty
+        assert!(DirectoryCache::parse_digest_from_dirname("").is_none());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_merkle_metadata_stored_on_construction() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, dir_digest) = setup_test_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 10,
+            max_size_bytes: 1024 * 1024,
+            cache_root: cache_root.clone(),
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        // Construct a directory (serial path, no FastSlowStore)
+        let dest = temp_dir.path().join("dest");
+        cache.get_or_create(dir_digest, &dest).await?;
+
+        // Merkle metadata file should NOT exist because we don't have
+        // FastSlowStore (resolve_directory_tree requires it).
+        // This is expected -- subtree indexing is only available with
+        // the fast path.
+        let cache_path = cache.get_cache_path(&dir_digest);
+        let merkle_path = cache_path.join(MERKLE_METADATA_FILENAME);
+        // Without FastSlowStore, no merkle metadata is generated
+        assert!(
+            !merkle_path.exists(),
+            "Merkle metadata should not exist without FastSlowStore"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_subtree_index_populated_and_cleaned_on_eviction() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, digest_a, digest_b) = setup_two_digest_store().await;
+
+        let config = DirectoryCacheConfig {
+            max_entries: 1,
+            max_size_bytes: 0,
+            cache_root: cache_root.clone(),
+        };
+
+        let cache = DirectoryCache::new(config, store, None).await?;
+
+        // Insert entry A
+        let dest_a = temp_dir.path().join("dest_a");
+        cache.get_or_create(digest_a, &dest_a).await?;
+
+        // Without FastSlowStore, subtree index should be empty (no merkle tree resolved)
+        {
+            let index = cache.subtree_index.read().await;
+            assert!(
+                index.is_empty(),
+                "Subtree index should be empty without FastSlowStore"
+            );
+        }
+
+        // Insert entry B (evicts A)
+        let dest_b = temp_dir.path().join("dest_b");
+        cache.get_or_create(digest_b, &dest_b).await?;
+        assert_eq!(cache.stats().await.entries, 1);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_cache_reload_from_disk() -> Result<(), Error> {
+        let temp_dir = TempDir::new().unwrap();
+        let cache_root = temp_dir.path().join("cache");
+        let (store, dir_digest) = setup_test_store().await;
+
+        // Create a cache and populate it
+        {
+            let config = DirectoryCacheConfig {
+                max_entries: 10,
+                max_size_bytes: 1024 * 1024,
+                cache_root: cache_root.clone(),
+            };
+            let cache = DirectoryCache::new(config, store.clone(), None).await?;
+            let dest = temp_dir.path().join("dest1");
+            cache.get_or_create(dir_digest, &dest).await?;
+            assert_eq!(cache.stats().await.entries, 1);
+        }
+
+        // Create a NEW cache pointing to the same cache_root -- it should
+        // reload the existing entry from disk.
+        {
+            let config = DirectoryCacheConfig {
+                max_entries: 10,
+                max_size_bytes: 1024 * 1024,
+                cache_root: cache_root.clone(),
+            };
+            let cache = DirectoryCache::new(config, store, None).await?;
+            assert_eq!(
+                cache.stats().await.entries,
+                1,
+                "Cache should have reloaded the entry from disk"
+            );
+
+            // The reloaded entry should be usable (cache hit)
+            let dest2 = temp_dir.path().join("dest2");
+            let hit = cache.get_or_create(dir_digest, &dest2).await?;
+            assert!(hit, "Reloaded entry should produce a cache hit");
+            assert!(dest2.join("test.txt").exists());
+        }
+
+        Ok(())
+    }
 }
diff --git a/nativelink-worker/src/local_worker.rs b/nativelink-worker/src/local_worker.rs
index c8e5f76f6..04d88ff7e 100644
--- a/nativelink-worker/src/local_worker.rs
+++ b/nativelink-worker/src/local_worker.rs
@@ -17,7 +17,7 @@ use core::str;
 use core::sync::atomic::{AtomicU64, Ordering};
 use core::time::Duration;
 use std::borrow::Cow;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::env;
 use std::process::Stdio;
 use std::sync::{Arc, Weak};
@@ -31,18 +31,21 @@ use nativelink_metric::{MetricsComponent, RootMetricsComponent};
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::update_for_worker::Update;
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::worker_api_client::WorkerApiClient;
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
-    ExecuteComplete, ExecuteResult, GoingAwayRequest, KeepAliveRequest, UpdateForWorker,
-    execute_result,
+    BlobDigestInfo, BlobsAvailableNotification, ExecuteComplete, ExecuteResult, GoingAwayRequest,
+    KeepAliveRequest, UpdateForWorker, execute_result,
 };
 use nativelink_store::fast_slow_store::FastSlowStore;
+use nativelink_store::filesystem_store::FilesystemStore;
 use nativelink_util::action_messages::{ActionResult, ActionStage, OperationId};
-use nativelink_util::common::fs;
+use nativelink_util::common::{DigestInfo, fs};
 use nativelink_util::digest_hasher::DigestHasherFunc;
 use nativelink_util::metrics_utils::{AsyncCounterWrapper, CounterWithTime};
 use nativelink_util::shutdown_guard::ShutdownGuard;
-use nativelink_util::store_trait::Store;
+use nativelink_util::store_trait::{ItemCallback, Store, StoreDriver, StoreKey};
+use nativelink_util::task::JoinHandleDropGuard;
 use nativelink_util::{spawn, tls_utils};
 use opentelemetry::context::Context;
+use parking_lot::Mutex;
 use tokio::process;
 use tokio::sync::{broadcast, mpsc};
 use tokio::time::sleep;
@@ -57,6 +60,114 @@ use crate::running_actions_manager::{
 use crate::worker_api_client_wrapper::{WorkerApiClientTrait, WorkerApiClientWrapper};
 use crate::worker_utils::make_connect_worker_request;
 
+/// Default interval for periodic BlobsAvailable reports (milliseconds).
+const DEFAULT_BLOBS_AVAILABLE_INTERVAL_MS: u64 = 500;
+
+/// Returns the current CPU load as a percentage (load_avg_1m / num_cpus * 100).
+/// Returns 0 if the load cannot be determined.
+fn get_cpu_load_pct() -> u32 {
+    let num_cpus = std::thread::available_parallelism()
+        .map(|n| n.get() as f64)
+        .unwrap_or(1.0);
+    let mut loadavg: [f64; 1] = [0.0];
+    // SAFETY: getloadavg writes at most `nelem` doubles into the array.
+    let ret = unsafe { libc::getloadavg(loadavg.as_mut_ptr(), 1) };
+    if ret < 1 {
+        return 0;
+    }
+    let pct = (loadavg[0] / num_cpus * 100.0).round() as u32;
+    // Clamp to a reasonable maximum (can exceed 100 on overloaded systems).
+    pct.min(1000)
+}
+
+/// Build the advertised gRPC endpoint for peer blob sharing.
+/// Uses the machine's hostname so a single config works across all workers.
+/// The hostname is resolved once and cached for the lifetime of the process.
+fn cas_advertised_endpoint(port: u16) -> String {
+    use std::sync::OnceLock;
+    static HOSTNAME: OnceLock<String> = OnceLock::new();
+    let hostname = HOSTNAME.get_or_init(|| {
+        match hostname::get() {
+            Ok(h) => {
+                let name = h.to_string_lossy().into_owned();
+                // Append .local for mDNS resolution if the hostname is bare
+                // (no dots), so the server can resolve it via multicast DNS.
+                if name.contains('.') {
+                    name
+                } else {
+                    format!("{name}.local")
+                }
+            }
+            Err(err) => {
+                error!(
+                    ?err,
+                    "hostname::get() failed, using 'localhost' — peer blob sharing will not work across machines"
+                );
+                "localhost".to_string()
+            }
+        }
+    });
+    format!("grpc://{hostname}:{port}")
+}
+
+/// Accumulated blob changes between BlobsAvailable ticks.
+#[derive(Debug, Default)]
+pub struct BlobChanges {
+    /// digest → last_access_timestamp (unix seconds).
+    pub added: HashMap<DigestInfo, i64>,
+    pub evicted: HashSet<DigestInfo>,
+}
+
+/// Tracks inserts and evictions from the FilesystemStore between ticks.
+/// Registered as a callback on the FilesystemStore's evicting map.
+#[derive(Debug)]
+pub struct BlobChangeTracker {
+    pending: Mutex<BlobChanges>,
+}
+
+impl BlobChangeTracker {
+    pub fn new() -> Arc<Self> {
+        Arc::new(Self {
+            pending: Mutex::new(BlobChanges::default()),
+        })
+    }
+
+    /// Atomically swap out accumulated changes, returning them.
+    /// The internal state is replaced with an empty BlobChanges.
+    pub fn swap(&self) -> BlobChanges {
+        let mut pending = self.pending.lock();
+        std::mem::take(&mut *pending)
+    }
+}
+
+impl ItemCallback for BlobChangeTracker {
+    // On evict: add to evicted, remove from added (cancel out insert+evict).
+    fn callback<'a>(
+        &'a self,
+        store_key: StoreKey<'a>,
+    ) -> Pin<Box<dyn Future<Output = ()> + Send + 'a>> {
+        if let StoreKey::Digest(digest) = store_key {
+            let mut pending = self.pending.lock();
+            pending.added.remove(&digest);
+            pending.evicted.insert(digest);
+        }
+        Box::pin(core::future::ready(()))
+    }
+
+    // On insert: add to added, remove from evicted (cancel out evict+reinsert).
+    fn on_insert(&self, store_key: StoreKey<'_>, _size: u64) {
+        if let StoreKey::Digest(digest) = store_key {
+            let ts = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .map(|d| d.as_secs() as i64)
+                .unwrap_or(0);
+            let mut pending = self.pending.lock();
+            pending.evicted.remove(&digest);
+            pending.added.insert(digest, ts);
+        }
+    }
+}
+
 /// Amount of time to wait if we have actions in transit before we try to
 /// consider an error to have occurred.
 const ACTIONS_IN_TRANSIT_TIMEOUT_S: f32 = 10.;
@@ -74,6 +185,20 @@ const DEFAULT_ENDPOINT_TIMEOUT_S: f32 = 5.;
 const DEFAULT_MAX_ACTION_TIMEOUT: Duration = Duration::from_secs(1200); // 20 mins.
 const DEFAULT_MAX_UPLOAD_TIMEOUT: Duration = Duration::from_secs(600); // 10 mins.
 
+/// Holds the FilesystemStore reference and change tracker needed for
+/// periodic BlobsAvailable reporting.
+#[derive(Clone, Debug)]
+pub struct BlobsAvailableState {
+    /// Reference to the worker's local FilesystemStore (the fast store in FastSlowStore).
+    fs_store: Arc<FilesystemStore>,
+    /// Tracks inserted and evicted digests between periodic ticks.
+    tracker: Arc<BlobChangeTracker>,
+    /// The worker's CAS endpoint for peer serving (e.g. "grpc://192.168.191.5:50081").
+    cas_endpoint: String,
+    /// How often to send periodic BlobsAvailable (0 = disabled).
+    interval: Duration,
+}
+
 struct LocalWorkerImpl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> {
     config: &'a LocalWorkerConfig,
     // According to the tonic documentation it is a cheap operation to clone this.
@@ -86,6 +211,8 @@ struct LocalWorkerImpl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsM
     // on by the scheduler.
     actions_in_transit: Arc<AtomicU64>,
     metrics: Arc<Metrics>,
+    /// State for periodic BlobsAvailable reporting. None if disabled (no CAS endpoint).
+    blobs_available_state: Option<BlobsAvailableState>,
 }
 
 pub async fn preconditions_met(
@@ -146,6 +273,7 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
         worker_id: String,
         running_actions_manager: Arc<U>,
         metrics: Arc<Metrics>,
+        blobs_available_state: Option<BlobsAvailableState>,
     ) -> Self {
         Self {
             config,
@@ -158,6 +286,7 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
             // on by the scheduler.
             actions_in_transit: Arc::new(AtomicU64::new(0)),
             metrics,
+            blobs_available_state,
         }
     }
 
@@ -175,7 +304,11 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
             // We always send 2 keep alive requests per timeout. Http2 should manage most of our
             // timeout issues, this is a secondary check to ensure we can still send data.
             sleep(Duration::from_secs_f32(timeout / 2.)).await;
-            if let Err(e) = grpc_client.keep_alive(KeepAliveRequest {}).await {
+            let load = get_cpu_load_pct();
+            debug!("KeepAlive cpu_load_pct={load}");
+            if let Err(e) = grpc_client.keep_alive(KeepAliveRequest {
+                cpu_load_pct: load,
+            }).await {
                 return Err(make_err!(
                     Code::Internal,
                     "Failed to send KeepAlive in LocalWorker : {:?}",
@@ -185,6 +318,125 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
         }
     }
 
+    /// Sends a periodic BlobsAvailable notification.
+    /// - First tick: full snapshot of all digests with timestamps (scans store once).
+    ///   Also sends a full subtree snapshot with ALL subtree digests.
+    /// - Subsequent ticks: delta from callback-accumulated changes (no scan).
+    ///   Sends delta-encoded subtree changes (added/removed).
+    async fn send_periodic_blobs_available(
+        grpc_client: &mut T,
+        state: &BlobsAvailableState,
+        running_actions_manager: &Arc<U>,
+        is_first: bool,
+    ) {
+        let (digest_infos, evicted_digests) = if is_first {
+            // Full snapshot: scan everything once.
+            let all = state.fs_store.get_all_digests_with_timestamps();
+            // Drain any changes that accumulated during startup.
+            drop(state.tracker.swap());
+
+            let infos: Vec<BlobDigestInfo> = all
+                .iter()
+                .map(|(digest, ts)| BlobDigestInfo {
+                    digest: Some((*digest).into()),
+                    last_access_timestamp: *ts,
+                })
+                .collect();
+
+            (infos, Vec::new())
+        } else {
+            // Delta: swap out accumulated changes.
+            let changes = state.tracker.swap();
+            if changes.added.is_empty() && changes.evicted.is_empty() {
+                // Even if no blob changes, we may have subtree changes to report.
+                // We'll check below and skip only if both are empty.
+            }
+
+            let infos: Vec<BlobDigestInfo> = changes
+                .added
+                .iter()
+                .map(|(digest, &ts)| BlobDigestInfo {
+                    digest: Some((*digest).into()),
+                    last_access_timestamp: ts,
+                })
+                .collect();
+            let evicted_protos = changes.evicted.iter().map(|d| (*d).into()).collect();
+
+            (infos, evicted_protos)
+        };
+
+        // Collect subtree delta or full snapshot.
+        let (cached_directory_digests, added_subtree_digests, removed_subtree_digests, is_full_subtree_snapshot) = if is_first {
+            // Full subtree snapshot: send ALL subtree digests in cached_directory_digests.
+            // Also drain any pending changes accumulated during startup.
+            drop(running_actions_manager.take_pending_subtree_changes().await);
+            let all_subtrees = running_actions_manager.all_subtree_digests().await;
+            let all_subtree_protos = all_subtrees.into_iter().map(|d| d.into()).collect();
+            (all_subtree_protos, Vec::new(), Vec::new(), true)
+        } else {
+            // Delta: take pending subtree changes.
+            let (added, removed) = running_actions_manager.take_pending_subtree_changes().await;
+            let added_protos = added.into_iter().map(|d| d.into()).collect();
+            let removed_protos = removed.into_iter().map(|d| d.into()).collect();
+            (Vec::new(), added_protos, removed_protos, false)
+        };
+
+        let new_or_touched_count = digest_infos.len();
+        let evicted_count = evicted_digests.len();
+        let cached_dir_count = cached_directory_digests.len();
+        let added_subtree_count = added_subtree_digests.len();
+        let removed_subtree_count = removed_subtree_digests.len();
+
+        // Skip sending if there are truly no changes at all.
+        if !is_first
+            && new_or_touched_count == 0
+            && evicted_count == 0
+            && added_subtree_count == 0
+            && removed_subtree_count == 0
+        {
+            trace!("BlobsAvailable: no changes since last tick, skipping");
+            return;
+        }
+
+        let load = get_cpu_load_pct();
+        debug!("BlobsAvailable cpu_load_pct={load}");
+        let notification = BlobsAvailableNotification {
+            worker_cas_endpoint: state.cas_endpoint.clone(),
+            digests: Vec::new(),
+            is_full_snapshot: is_first,
+            evicted_digests,
+            digest_infos,
+            cpu_load_pct: load,
+            cached_directory_digests,
+            added_subtree_digests,
+            removed_subtree_digests,
+            is_full_subtree_snapshot,
+        };
+
+        if let Err(err) = grpc_client.blobs_available(notification).await {
+            warn!(
+                ?err,
+                new_or_touched_count,
+                evicted_count,
+                cached_dir_count,
+                added_subtree_count,
+                removed_subtree_count,
+                is_first,
+                "Failed to send periodic BlobsAvailable"
+            );
+        } else {
+            debug!(
+                new_or_touched_count,
+                evicted_count,
+                cached_dir_count,
+                added_subtree_count,
+                removed_subtree_count,
+                is_first,
+                "Sent periodic BlobsAvailable"
+            );
+        }
+    }
+
     async fn run(
         &self,
         update_for_worker_stream: Streaming<UpdateForWorker>,
@@ -205,6 +457,32 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
         let mut futures = FuturesUnordered::new();
         futures.push(self.start_keep_alive().boxed());
 
+        // Start periodic BlobsAvailable reporting if configured.
+        if let Some(ref state) = self.blobs_available_state {
+            if !state.interval.is_zero() {
+                let mut grpc_client = self.grpc_client.clone();
+                let state = state.clone();
+                let ram = self.running_actions_manager.clone();
+                futures.push(
+                    async move {
+                        let mut is_first = true;
+                        loop {
+                            sleep(state.interval).await;
+                            Self::send_periodic_blobs_available(
+                                &mut grpc_client,
+                                &state,
+                                &ram,
+                                is_first,
+                            )
+                            .await;
+                            is_first = false;
+                        }
+                    }
+                    .boxed(),
+                );
+            }
+        }
+
         let (add_future_channel, add_future_rx) = mpsc::unbounded_channel();
         let mut add_future_rx = UnboundedReceiverStream::new(add_future_rx).fuse();
 
@@ -248,6 +526,44 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
                                 );
                             }
                         }
+                        Update::TouchBlobs(touch_request) => {
+                            // Touch blobs in the local store to update access times
+                            // and prevent premature eviction of referenced blobs.
+                            let digest_count = touch_request.digests.len();
+                            trace!(digest_count, "Received TouchBlobs request");
+                            if let Some(ref state) = self.blobs_available_state {
+                                let fs_store = state.fs_store.clone();
+                                let digests: Vec<DigestInfo> = touch_request
+                                    .digests
+                                    .into_iter()
+                                    .filter_map(|d| DigestInfo::try_from(d).ok())
+                                    .collect();
+                                // Best-effort: call has() on each digest to update
+                                // the EvictingMap's LRU access time.
+                                let keys: Vec<StoreKey<'_>> = digests
+                                    .iter()
+                                    .map(|d| StoreKey::from(*d))
+                                    .collect();
+                                let mut results = vec![None; keys.len()];
+                                if let Err(err) = Pin::new(fs_store.as_ref())
+                                    .has_with_results(&keys, &mut results)
+                                    .await
+                                {
+                                    warn!(
+                                        ?err,
+                                        digest_count,
+                                        "TouchBlobs: failed to touch digests in FilesystemStore"
+                                    );
+                                } else {
+                                    let found = results.iter().filter(|r| r.is_some()).count();
+                                    trace!(
+                                        digest_count,
+                                        found,
+                                        "TouchBlobs: touched digests in FilesystemStore"
+                                    );
+                                }
+                            }
+                        }
                         Update::StartAction(start_execute) => {
                             // Don't accept any new requests if we're shutting down.
                             if shutting_down {
@@ -297,10 +613,6 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
                                 let actions_in_transit = self.actions_in_transit.clone();
                                 let worker_id = self.worker_id.clone();
                                 let running_actions_manager = self.running_actions_manager.clone();
-                                let mut grpc_client = self.grpc_client.clone();
-                                let complete = ExecuteComplete {
-                                    operation_id: operation_id.clone(),
-                                };
                                 self.metrics.clone().wrap(move |metrics| async move {
                                     metrics.preconditions.wrap(preconditions_met(precondition_script_cfg, &extra_envs))
                                     .and_then(|()| running_actions_manager.create_and_add_action(worker_id, start_execute))
@@ -319,18 +631,21 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
                                             .clone()
                                             .prepare_action()
                                             .and_then(RunningAction::execute)
-                                            .and_then(|result| async move {
-                                                // Notify that execution has completed so it can schedule a new action.
-                                                drop(grpc_client.execution_complete(complete).await);
-                                                Ok(result)
-                                            })
+                                            // upload_results now only uploads to the local fast store
+                                            // (FilesystemStore). The remote CAS upload is deferred to
+                                            // the background after the result is reported.
                                             .and_then(RunningAction::upload_results)
                                             .and_then(RunningAction::get_finished_result)
-                                            // Note: We need ensure we run cleanup even if one of the other steps fail.
                                             .then(|result| async move {
-                                                if let Err(e) = action.cleanup().await {
-                                                    return Result::<ActionResult, Error>::Err(e).merge(result);
-                                                }
+                                                // Spawn cleanup in the background — it only removes
+                                                // the work directory (files already renamed into CAS).
+                                                // The cleaning_up_operations + wait_for_cleanup mechanism
+                                                // handles the race if the same action is retried.
+                                                tokio::spawn(async move {
+                                                    if let Err(e) = action.cleanup().await {
+                                                        error!(?e, "Background cleanup failed");
+                                                    }
+                                                });
                                                 result
                                             })
                                     }).await
@@ -339,24 +654,87 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
 
                             let make_publish_future = {
                                 let mut grpc_client = self.grpc_client.clone();
+                                let cas_endpoint_for_notify = self.config.cas_server_port
+                                    .map(|port| cas_advertised_endpoint(port))
+                                    .unwrap_or_default();
 
                                 let running_actions_manager = self.running_actions_manager.clone();
+                                let exec_load = get_cpu_load_pct();
+                                debug!("ExecuteComplete cpu_load_pct={exec_load}");
+                                let complete = ExecuteComplete {
+                                    operation_id: operation_id.clone(),
+                                    cpu_load_pct: exec_load,
+                                };
                                 move |res: Result<ActionResult, Error>| async move {
                                     let instance_name = maybe_instance_name
                                         .err_tip(|| "`instance_name` could not be resolved; this is likely an internal error in local_worker.")?;
                                     match res {
                                         Ok(mut action_result) => {
-                                            // Save in the action cache before notifying the scheduler that we've completed.
-                                            if let Some(digest_info) = action_digest.clone().and_then(|action_digest| action_digest.try_into().ok()) {
-                                                if let Err(err) = running_actions_manager.cache_action_result(digest_info, &mut action_result, digest_hasher).await {
-                                                    error!(
-                                                        ?err,
-                                                        ?action_digest,
-                                                        "Error saving action in store",
-                                                    );
+                                            // Collect output digests upfront so both futures
+                                            // can proceed without borrowing action_result.
+                                            let output_digests: Vec<_> = {
+                                                let mut v = Vec::new();
+                                                if !cas_endpoint_for_notify.is_empty() {
+                                                    for file in &action_result.output_files {
+                                                        v.push(file.digest.into());
+                                                    }
+                                                    for folder in &action_result.output_folders {
+                                                        v.push(folder.tree_digest.into());
+                                                    }
+                                                    if action_result.stdout_digest.size_bytes() > 0 {
+                                                        v.push(action_result.stdout_digest.into());
+                                                    }
+                                                    if action_result.stderr_digest.size_bytes() > 0 {
+                                                        v.push(action_result.stderr_digest.into());
+                                                    }
                                                 }
-                                            }
-                                            let action_stage = ActionStage::Completed(action_result);
+                                                v
+                                            };
+
+                                            // 1. BlobsAvailableNotif and cache_action_result run
+                                            //    concurrently — they use independent connections
+                                            //    (worker API stream vs AC/historical stores).
+                                            let blobs_fut = async {
+                                                if !output_digests.is_empty() {
+                                                    let load = get_cpu_load_pct();
+                                                    debug!("BlobsAvailable cpu_load_pct={load}");
+                                                    if let Err(err) = grpc_client.blobs_available(
+                                                        BlobsAvailableNotification {
+                                                            worker_cas_endpoint: cas_endpoint_for_notify.clone(),
+                                                            digests: output_digests,
+                                                            is_full_snapshot: false,
+                                                            evicted_digests: Vec::new(),
+                                                            digest_infos: Vec::new(),
+                                                            cpu_load_pct: load,
+                                                            cached_directory_digests: Vec::new(),
+                                                            added_subtree_digests: Vec::new(),
+                                                            removed_subtree_digests: Vec::new(),
+                                                            is_full_subtree_snapshot: false,
+                                                        }
+                                                    ).await {
+                                                        warn!(?err, "Failed to send blobs_available notification");
+                                                    }
+                                                }
+                                            };
+                                            let cache_fut = async {
+                                                if let Some(digest_info) = action_digest.clone().and_then(|action_digest| action_digest.try_into().ok()) {
+                                                    if let Err(err) = running_actions_manager.cache_action_result(digest_info, &mut action_result, digest_hasher).await {
+                                                        error!(
+                                                            ?err,
+                                                            ?action_digest,
+                                                            "Error saving action in store",
+                                                        );
+                                                    }
+                                                }
+                                            };
+                                            tokio::join!(blobs_fut, cache_fut);
+
+                                            // 2. Notify scheduler that execution is complete
+                                            //    so it can schedule new work on this worker.
+                                            drop(grpc_client.execution_complete(complete).await);
+
+                                            // 3. Send execution response with the action result.
+                                            let action_stage = ActionStage::Completed(action_result.clone());
                                             grpc_client.execution_response(
                                                 ExecuteResult{
                                                     instance_name,
@@ -366,8 +744,30 @@ impl<'a, T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorke
                                             )
                                             .await
                                             .err_tip(|| "Error while calling execution_response")?;
+
+                                            // 4. Upload output blobs from local CAS to remote
+                                            //    CAS in the background. This is fire-and-forget;
+                                            //    peers can already serve the blobs directly.
+                                            running_actions_manager.spawn_upload_to_remote(&action_result);
                                         },
                                         Err(e) => {
+                                            // Still notify completion on error so the worker
+                                            // is freed for new work.
+                                            drop(grpc_client.execution_complete(complete).await);
+
+                                            let e = if e.code == Code::NotFound {
+                                                // Per REAPI spec, missing inputs should return
+                                                // FAILED_PRECONDITION so the client re-uploads.
+                                                let mut err = make_err!(
+                                                    Code::FailedPrecondition,
+                                                    "One or more input blobs missing: {}",
+                                                    e.message_string()
+                                                );
+                                                err.details = e.details;
+                                                err
+                                            } else {
+                                                e
+                                            };
                                             grpc_client.execution_response(ExecuteResult{
                                                 instance_name,
                                                 operation_id,
@@ -467,6 +867,11 @@ pub struct LocalWorker<T: WorkerApiClientTrait + 'static, U: RunningActionsManag
     connection_factory: ConnectionFactory<T>,
     sleep_fn: Option<Box<dyn Fn(Duration) -> BoxFuture<'static, ()> + Send + Sync>>,
     metrics: Arc<Metrics>,
+    /// State for periodic BlobsAvailable reporting.
+    blobs_available_state: Option<BlobsAvailableState>,
+    /// Guard for the worker CAS server task. Keeps the task alive as long as
+    /// the `LocalWorker` is alive. When dropped, the CAS server is aborted.
+    _cas_server_guard: Option<JoinHandleDropGuard<Result<(), Error>>>,
 }
 
 impl<
@@ -534,7 +939,48 @@ pub async fn new_local_worker(
         Duration::from_secs(config.max_upload_timeout as u64)
     };
 
-    // Initialize directory cache if configured
+    // If peer blob sharing is configured (cas_server_port is set), create a
+    // worker-local locality map and wrap the slow store with WorkerProxyStore.
+    // This enables workers to fetch blobs from peers instead of the central CAS.
+    let (effective_cas_store, peer_locality_map) = if config.cas_server_port.is_some() {
+        let locality_map = nativelink_util::blob_locality_map::new_shared_blob_locality_map();
+
+        // Wrap the slow store (central CAS) with WorkerProxyStore.
+        // Enable racing so the worker races peer fetches against server fetches.
+        let slow_store = fast_slow_store.slow_store().clone();
+        let mut proxy_arc =
+            nativelink_store::worker_proxy_store::WorkerProxyStore::new(
+                slow_store,
+                locality_map.clone(),
+            );
+        Arc::get_mut(&mut proxy_arc)
+            .expect("WorkerProxyStore just created, no other refs")
+            .enable_race_peers();
+        let proxy_store = Store::new(proxy_arc);
+
+        // Build a new FastSlowStore: fast=local disk, slow=WorkerProxyStore(central CAS).
+        // Preserve the original store's direction config so that e.g.
+        // slow_direction=get prevents uploads from propagating to the server.
+        let fast_store = fast_slow_store.fast_store().clone();
+        let fss_spec = nativelink_config::stores::FastSlowSpec {
+            fast: nativelink_config::stores::StoreSpec::Noop(Default::default()),
+            slow: nativelink_config::stores::StoreSpec::Noop(Default::default()),
+            fast_direction: fast_slow_store.fast_direction(),
+            slow_direction: fast_slow_store.slow_direction(),
+        };
+        let new_fss = FastSlowStore::new(&fss_spec, fast_store, proxy_store);
+        info!(
+            "Peer blob sharing enabled: wrapping slow store with WorkerProxyStore"
+        );
+
+        (new_fss, Some(locality_map))
+    } else {
+        (fast_slow_store.clone(), None)
+    };
+
+    // Initialize directory cache if configured.
+    // This is done after effective_cas_store is created so the cache can use
+    // the same FastSlowStore (with WorkerProxyStore) for batch downloads.
     let directory_cache = if let Some(cache_config) = &config.directory_cache {
         use std::path::PathBuf;
 
@@ -557,7 +1003,11 @@ pub async fn new_local_worker(
             cache_root,
         };
 
-        match DirectoryCache::new(worker_cache_config, Store::new(fast_slow_store.clone())).await {
+        match DirectoryCache::new(
+            worker_cache_config,
+            Store::new(effective_cas_store.clone()),
+            Some(effective_cas_store.clone()),
+        ).await {
             Ok(cache) => {
                 tracing::info!("Directory cache initialized successfully");
                 Some(Arc::new(cache))
@@ -571,6 +1021,8 @@ pub async fn new_local_worker(
         None
     };
 
+    let effective_cas_store_for_cas_server = effective_cas_store.clone();
+
     let running_actions_manager =
         Arc::new(RunningActionsManagerImpl::new(RunningActionsManagerArgs {
             root_action_directory: config.work_directory.clone(),
@@ -578,7 +1030,7 @@ pub async fn new_local_worker(
                 entrypoint,
                 additional_environment: config.additional_environment.clone(),
             },
-            cas_store: fast_slow_store,
+            cas_store: effective_cas_store,
             ac_store,
             historical_store,
             upload_action_result_config: &config.upload_action_result,
@@ -586,7 +1038,110 @@ pub async fn new_local_worker(
             max_upload_timeout,
             timeout_handled_externally: config.timeout_handled_externally,
             directory_cache,
+            peer_locality_map: peer_locality_map.clone(),
         })?);
+
+    // Set up periodic BlobsAvailable reporting if we have a CAS port.
+    let blobs_available_state = if config.cas_server_port.is_some() {
+        // Try to get a reference to the FilesystemStore (the fast store in FastSlowStore).
+        let fs_store_opt: Option<Arc<FilesystemStore>> = fast_slow_store
+            .fast_store()
+            .downcast_ref::<FilesystemStore>(None)
+            .and_then(|fs| fs.get_arc());
+
+        if let Some(fs_store) = fs_store_opt {
+            let interval_ms = if config.blobs_available_interval_ms == 0 {
+                DEFAULT_BLOBS_AVAILABLE_INTERVAL_MS
+            } else {
+                config.blobs_available_interval_ms
+            };
+            let cas_endpoint = config
+                .cas_server_port
+                .map(|port| cas_advertised_endpoint(port))
+                .unwrap_or_default();
+
+            // Create change tracker and register it on the FilesystemStore.
+            let tracker = BlobChangeTracker::new();
+            if let Err(err) = fs_store
+                .clone()
+                .register_item_callback(tracker.clone())
+            {
+                warn!(?err, "Failed to register blob change tracker on FilesystemStore");
+            } else {
+                info!(
+                    interval_ms,
+                    "Registered periodic BlobsAvailable reporting with callback-based change tracking"
+                );
+            }
+
+            Some(BlobsAvailableState {
+                fs_store,
+                tracker,
+                cas_endpoint,
+                interval: Duration::from_millis(interval_ms),
+            })
+        } else {
+            warn!("FastSlowStore's fast store is not a FilesystemStore; periodic BlobsAvailable reporting disabled");
+            None
+        }
+    } else {
+        None
+    };
+
+    // Start a CAS + ByteStream gRPC server for peer blob sharing if configured.
+    // Serves the effective_cas_store (which includes WorkerProxyStore) so that
+    // reads can be proxied to peers when the local store doesn't have the blob.
+    let cas_server_guard = if let Some(cas_port) = config.cas_server_port {
+        let cas_store = Store::new(effective_cas_store_for_cas_server);
+        let store_manager = Arc::new(nativelink_store::store_manager::StoreManager::new());
+        store_manager.add_store("worker_cas", cas_store);
+
+        let cas_configs = vec![nativelink_config::cas_server::WithInstanceName {
+            instance_name: String::new(),
+            config: nativelink_config::cas_server::CasStoreConfig {
+                cas_store: "worker_cas".to_string(),
+            },
+        }];
+        let bytestream_configs = vec![nativelink_config::cas_server::WithInstanceName {
+            instance_name: String::new(),
+            config: nativelink_config::cas_server::ByteStreamConfig {
+                cas_store: "worker_cas".to_string(),
+                ..Default::default()
+            },
+        }];
+
+        let cas_server = nativelink_service::cas_server::CasServer::new(&cas_configs, &store_manager)
+            .err_tip(|| "Failed to create worker CAS server")?;
+        let bytestream_server =
+            nativelink_service::bytestream_server::ByteStreamServer::new(&bytestream_configs, &store_manager)
+                .err_tip(|| "Failed to create worker ByteStream server")?;
+
+        let addr: std::net::SocketAddr = ([0, 0, 0, 0], cas_port).into();
+        let advertised = cas_advertised_endpoint(cas_port);
+
+        let worker_name = config.name.clone();
+        Some(spawn!("worker_cas_server", async move {
+            info!(
+                worker_name = %worker_name,
+                %addr,
+                %advertised,
+                "Starting worker CAS server for peer blob sharing"
+            );
+            let result = tonic::transport::Server::builder()
+                .add_service(cas_server.into_service())
+                .add_service(bytestream_server.into_service())
+                .serve(addr)
+                .await
+                .map_err(|e| make_err!(Code::Internal, "Worker CAS server failed: {e:?}"));
+            if let Err(ref e) = result {
+                error!(%addr, ?e, "Worker CAS server exited with error");
+            }
+            result
+        }))
+    } else {
+        None
+    };
+
     let local_worker = LocalWorker::new_with_connection_factory_and_actions_manager(
         config.clone(),
         running_actions_manager,
@@ -618,6 +1173,8 @@ pub async fn new_local_worker(
             })
         }),
         Box::new(move |d| Box::pin(sleep(d))),
+        blobs_available_state,
+        cas_server_guard,
     );
     Ok(local_worker)
 }
@@ -628,6 +1185,8 @@ impl<T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorker<T,
         running_actions_manager: Arc<U>,
         connection_factory: ConnectionFactory<T>,
         sleep_fn: Box<dyn Fn(Duration) -> BoxFuture<'static, ()> + Send + Sync>,
+        blobs_available_state: Option<BlobsAvailableState>,
+        cas_server_guard: Option<JoinHandleDropGuard<Result<(), Error>>>,
     ) -> Self {
         let metrics = Arc::new(Metrics::new(Arc::downgrade(
             running_actions_manager.metrics(),
@@ -638,6 +1197,8 @@ impl<T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorker<T,
             connection_factory,
             sleep_fn: Some(sleep_fn),
             metrics,
+            blobs_available_state,
+            _cas_server_guard: cas_server_guard,
         }
     }
 
@@ -673,11 +1234,16 @@ impl<T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorker<T,
             }
         }
 
+        let cas_endpoint = self
+            .config
+            .cas_server_port
+            .map_or_else(String::new, |port| cas_advertised_endpoint(port));
         let connect_worker_request = make_connect_worker_request(
             self.config.name.clone(),
             &self.config.platform_properties,
             &extra_envs,
             self.config.max_inflight_tasks,
+            cas_endpoint,
         )
         .await?;
         let mut update_for_worker_stream = client
@@ -743,6 +1309,7 @@ impl<T: WorkerApiClientTrait + 'static, U: RunningActionsManager> LocalWorker<T,
                         worker_id,
                         self.running_actions_manager.clone(),
                         self.metrics.clone(),
+                        self.blobs_available_state.clone(),
                     ),
                     update_for_worker_stream,
                 ),
@@ -827,3 +1394,285 @@ impl Metrics {
         fut(self).await
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nativelink_util::common::DigestInfo;
+    use nativelink_util::store_trait::StoreKey;
+
+    #[test]
+    fn test_blob_change_tracker_eviction_collects_and_swaps() {
+        let tracker = BlobChangeTracker::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        // Evict two digests via the callback.
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .build()
+            .unwrap();
+        rt.block_on(tracker.callback(StoreKey::Digest(d1)));
+        rt.block_on(tracker.callback(StoreKey::Digest(d2)));
+
+        // Swap should return both as evicted.
+        let changes = tracker.swap();
+        assert!(changes.added.is_empty(), "Expected no added digests");
+        assert_eq!(changes.evicted.len(), 2, "Expected 2 evicted digests");
+        assert!(changes.evicted.contains(&d1), "Expected d1 in evicted set");
+        assert!(changes.evicted.contains(&d2), "Expected d2 in evicted set");
+
+        // Second swap should return empty.
+        let changes2 = tracker.swap();
+        assert!(changes2.added.is_empty());
+        assert!(changes2.evicted.is_empty());
+    }
+
+    #[test]
+    fn test_blob_change_tracker_ignores_non_digest_keys() {
+        let tracker = BlobChangeTracker::new();
+
+        // Evict callback with a string key.
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .build()
+            .unwrap();
+        rt.block_on(tracker.callback(StoreKey::Str(Cow::Borrowed("some_key"))));
+
+        // Insert callback with a string key.
+        tracker.on_insert(StoreKey::Str(Cow::Borrowed("other_key")), 42);
+
+        let changes = tracker.swap();
+        assert!(changes.added.is_empty());
+        assert!(changes.evicted.is_empty());
+    }
+
+    #[test]
+    fn test_blob_change_tracker_insert_callback() {
+        let tracker = BlobChangeTracker::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        let now = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap()
+            .as_secs() as i64;
+
+        tracker.on_insert(StoreKey::Digest(d1), 100);
+        tracker.on_insert(StoreKey::Digest(d2), 200);
+
+        let changes = tracker.swap();
+        assert_eq!(changes.added.len(), 2, "Expected 2 added digests");
+        // Timestamps should be approximately "now" (within 2 seconds).
+        let ts1 = *changes.added.get(&d1).unwrap();
+        let ts2 = *changes.added.get(&d2).unwrap();
+        assert!((ts1 - now).abs() < 2, "d1 timestamp {ts1} too far from now {now}");
+        assert!((ts2 - now).abs() < 2, "d2 timestamp {ts2} too far from now {now}");
+        assert!(changes.evicted.is_empty());
+    }
+
+    #[test]
+    fn test_blob_change_tracker_swap_returns_and_clears() {
+        let tracker = BlobChangeTracker::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+        let d2 = DigestInfo::new([2u8; 32], 200);
+
+        // Accumulate an insert and an eviction.
+        tracker.on_insert(StoreKey::Digest(d1), 100);
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .build()
+            .unwrap();
+        rt.block_on(tracker.callback(StoreKey::Digest(d2)));
+
+        // First swap returns the accumulated changes.
+        let changes = tracker.swap();
+        assert_eq!(changes.added.len(), 1);
+        assert!(changes.added.contains_key(&d1));
+        assert_eq!(changes.evicted.len(), 1);
+        assert!(changes.evicted.contains(&d2));
+
+        // Second swap should be empty.
+        let changes2 = tracker.swap();
+        assert!(changes2.added.is_empty());
+        assert!(changes2.evicted.is_empty());
+    }
+
+    #[test]
+    fn test_blob_change_tracker_insert_then_evict_records_eviction() {
+        let tracker = BlobChangeTracker::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+
+        // Insert then evict the same digest — the eviction must still be
+        // recorded so the server knows the blob is no longer available.
+        tracker.on_insert(StoreKey::Digest(d1), 100);
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .build()
+            .unwrap();
+        rt.block_on(tracker.callback(StoreKey::Digest(d1)));
+
+        let changes = tracker.swap();
+        // The digest was inserted then evicted within the same tick.
+        // It should be removed from `added` (no longer available) and
+        // appear in `evicted` so the server is notified.
+        assert!(
+            !changes.added.contains_key(&d1),
+            "Expected d1 to NOT be in added after insert+evict"
+        );
+        assert!(
+            changes.evicted.contains(&d1),
+            "Expected d1 in evicted (it was evicted, removing it from added)"
+        );
+    }
+
+    #[test]
+    fn test_blob_change_tracker_evict_then_reinsert_cancels_out() {
+        let tracker = BlobChangeTracker::new();
+        let d1 = DigestInfo::new([1u8; 32], 100);
+
+        // Evict then reinsert the same digest — should show as added only.
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .build()
+            .unwrap();
+        rt.block_on(tracker.callback(StoreKey::Digest(d1)));
+        tracker.on_insert(StoreKey::Digest(d1), 100);
+
+        let changes = tracker.swap();
+        assert!(
+            changes.added.contains_key(&d1),
+            "Expected d1 in added after evict+reinsert"
+        );
+        assert!(
+            !changes.evicted.contains(&d1),
+            "Expected d1 NOT in evicted after evict+reinsert"
+        );
+    }
+
+    // ---------------------------------------------------------------
+    // Gap 4: BlobChangeTracker <-> EvictingMap integration test
+    // ---------------------------------------------------------------
+    // Wires: EvictingMap -> ItemCallbackHolder -> BlobChangeTracker
+    // and verifies that inserts and evictions flow through correctly.
+    #[test]
+    fn test_blob_change_tracker_evicting_map_integration() {
+        use std::time::SystemTime;
+
+        use nativelink_config::stores::EvictionPolicy;
+        use nativelink_store::callback_utils::ItemCallbackHolder;
+        use nativelink_util::evicting_map::{EvictingMap, LenEntry};
+        use nativelink_util::store_trait::StoreKeyBorrow;
+
+        // Simple value type for the EvictingMap.
+        #[derive(Clone, Debug)]
+        struct TestValue(u64);
+
+        impl LenEntry for TestValue {
+            fn len(&self) -> u64 {
+                self.0
+            }
+            fn is_empty(&self) -> bool {
+                self.0 == 0
+            }
+        }
+
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .build()
+            .unwrap();
+
+        rt.block_on(async {
+            // Create an EvictingMap with max_bytes = 100.
+            let evicting_map = EvictingMap::<
+                StoreKeyBorrow,
+                StoreKey<'static>,
+                TestValue,
+                SystemTime,
+                ItemCallbackHolder,
+            >::new(
+                &EvictionPolicy {
+                    max_count: 0,
+                    max_seconds: 0,
+                    max_bytes: 100,
+                    evict_bytes: 0,
+                },
+                SystemTime::now(),
+            );
+
+            // Create a BlobChangeTracker and register it.
+            let tracker = BlobChangeTracker::new();
+            let holder = ItemCallbackHolder::new(tracker.clone());
+            evicting_map.add_item_callback(holder);
+
+            let d1 = DigestInfo::new([1u8; 32], 30);
+            let d2 = DigestInfo::new([2u8; 32], 40);
+
+            // Insert two items (total 70 bytes, under 100 limit).
+            let key1: StoreKeyBorrow = StoreKey::Digest(d1).into();
+            let key2: StoreKeyBorrow = StoreKey::Digest(d2).into();
+            evicting_map.insert(key1, TestValue(30)).await;
+            evicting_map.insert(key2, TestValue(40)).await;
+
+            // Swap and verify both digests appear in `added`.
+            let changes = tracker.swap();
+            assert_eq!(
+                changes.added.len(),
+                2,
+                "Expected 2 added digests after initial inserts"
+            );
+            assert!(
+                changes.added.contains_key(&d1),
+                "Expected d1 in added set"
+            );
+            assert!(
+                changes.added.contains_key(&d2),
+                "Expected d2 in added set"
+            );
+            assert!(
+                changes.evicted.is_empty(),
+                "Expected no evictions yet"
+            );
+
+            // Now insert a third item (50 bytes) — total would be 120 bytes,
+            // which exceeds max_bytes=100. This should trigger eviction of
+            // the least recently used item (d1, 30 bytes).
+            let d3 = DigestInfo::new([3u8; 32], 50);
+            let key3: StoreKeyBorrow = StoreKey::Digest(d3).into();
+            evicting_map.insert(key3, TestValue(50)).await;
+
+            // Allow background tasks to run (eviction callbacks are fire-and-forget).
+            tokio::task::yield_now().await;
+
+            let changes = tracker.swap();
+            assert!(
+                changes.added.contains_key(&d3),
+                "Expected d3 in added set after third insert"
+            );
+            assert!(
+                changes.evicted.contains(&d1),
+                "Expected d1 in evicted set (LRU eviction)"
+            );
+            // d2 should NOT have been evicted (total after eviction: 40 + 50 = 90 <= 100).
+            assert!(
+                !changes.evicted.contains(&d2),
+                "Expected d2 to NOT be evicted"
+            );
+        });
+    }
+
+    #[test]
+    fn test_cas_advertised_endpoint_format() {
+        let endpoint = cas_advertised_endpoint(50081);
+        assert!(
+            endpoint.starts_with("grpc://"),
+            "Expected endpoint to start with 'grpc://', got: {endpoint}"
+        );
+        assert!(
+            endpoint.ends_with(":50081"),
+            "Expected endpoint to end with ':50081', got: {endpoint}"
+        );
+
+        // Extract hostname and verify it's non-empty.
+        let without_prefix = endpoint.strip_prefix("grpc://").unwrap();
+        let hostname = without_prefix.strip_suffix(":50081").unwrap();
+        assert!(
+            !hostname.is_empty(),
+            "Expected non-empty hostname in endpoint: {endpoint}"
+        );
+    }
+}
diff --git a/nativelink-worker/src/running_actions_manager.rs b/nativelink-worker/src/running_actions_manager.rs
index 993be3dab..aad81b594 100644
--- a/nativelink-worker/src/running_actions_manager.rs
+++ b/nativelink-worker/src/running_actions_manager.rs
@@ -42,12 +42,13 @@ use futures::stream::{FuturesUnordered, StreamExt, TryStreamExt};
 use nativelink_config::cas_server::{
     EnvironmentSource, UploadActionResultConfig, UploadCacheResultsStrategy,
 };
+use nativelink_config::stores::StoreDirection;
 use nativelink_error::{Code, Error, ResultExt, make_err, make_input_err};
 use nativelink_metric::MetricsComponent;
 use nativelink_proto::build::bazel::remote::execution::v2::{
-    Action, ActionResult as ProtoActionResult, Command as ProtoCommand,
-    Directory as ProtoDirectory, Directory, DirectoryNode, ExecuteResponse, FileNode, SymlinkNode,
-    Tree as ProtoTree, UpdateActionResultRequest,
+    Action, ActionResult as ProtoActionResult, BatchReadBlobsRequest, Command as ProtoCommand,
+    Directory as ProtoDirectory, Directory, DirectoryNode, ExecuteResponse, FileNode,
+    GetTreeRequest, SymlinkNode, Tree as ProtoTree, UpdateActionResultRequest,
 };
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
     HistoricalExecuteResponse, StartExecute,
@@ -59,27 +60,30 @@ use nativelink_store::cas_utils::is_zero_digest;
 use nativelink_store::fast_slow_store::FastSlowStore;
 use nativelink_store::filesystem_store::{FileEntry, FilesystemStore};
 use nativelink_store::grpc_store::GrpcStore;
+use nativelink_store::worker_proxy_store::WorkerProxyStore;
 use nativelink_util::action_messages::{
     ActionInfo, ActionResult, DirectoryInfo, ExecutionMetadata, FileInfo, NameOrPath, OperationId,
     SymlinkInfo, to_execute_response,
 };
 use nativelink_util::common::{DigestInfo, fs};
-use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc};
+use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc, default_digest_hasher_func};
 use nativelink_util::metrics_utils::{AsyncCounterWrapper, CounterWithTime};
-use nativelink_util::store_trait::{Store, StoreLike, UploadSizeInfo};
+use nativelink_util::buf_channel::make_buf_channel_pair;
+use nativelink_util::store_trait::{Store, StoreKey, StoreLike, StoreOptimizations, UploadSizeInfo};
+use nativelink_util::log_utils::throughput_mbps;
 use nativelink_util::{background_spawn, spawn, spawn_blocking};
 use parking_lot::Mutex;
 use prost::Message;
-use relative_path::RelativePath;
 use scopeguard::{ScopeGuard, guard};
 use serde::Deserialize;
-use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
+use tokio::io::AsyncReadExt;
 use tokio::process;
-use tokio::sync::{Notify, oneshot, watch};
+use tokio::sync::{Notify, mpsc, oneshot, watch};
 use tokio::time::Instant;
 use tokio_stream::wrappers::ReadDirStream;
+use opentelemetry::context::Context;
 use tonic::Request;
-use tracing::{debug, error, info, trace, warn};
+use tracing::{debug, error, event, trace, warn, Level};
 use uuid::Uuid;
 
 /// For simplicity we use a fixed exit code for cases when our program is terminated
@@ -111,157 +115,1355 @@ struct SideChannelInfo {
     failure: Option<SideChannelFailureReason>,
 }
 
-/// Aggressively download the digests of files and make a local folder from it. This function
-/// will spawn unbounded number of futures to try and get these downloaded. The store itself
-/// should be rate limited if spawning too many requests at once is an issue.
-/// We require the `FilesystemStore` to be the `fast` store of `FastSlowStore`. This is for
-/// efficiency reasons. We will request the `FastSlowStore` to populate the entry then we will
-/// assume the `FilesystemStore` has the file available immediately after and hardlink the file
-/// to a new location.
-// Sadly we cannot use `async fn` here because the rust compiler cannot determine the auto traits
-// of the future. So we need to force this function to return a dynamic future instead.
-// see: https://github.com/rust-lang/rust/issues/78649
-pub fn download_to_directory<'a>(
+#[derive(prost::Message)]
+struct PreconditionFailure {
+    #[prost(message, repeated, tag = "1")]
+    violations: Vec<Violation>,
+}
+
+#[derive(prost::Message)]
+struct Violation {
+    #[prost(string, tag = "1")]
+    r#type: String,
+    #[prost(string, tag = "2")]
+    subject: String,
+    #[prost(string, tag = "3")]
+    description: String,
+}
+
+fn make_precondition_failure_any(digest: DigestInfo) -> prost_types::Any {
+    let failure = PreconditionFailure {
+        violations: vec![Violation {
+            r#type: "MISSING".into(),
+            subject: format!("blobs/{}/{}", digest.packed_hash(), digest.size_bytes()),
+            description: String::new(),
+        }],
+    };
+    prost_types::Any {
+        type_url: "type.googleapis.com/google.rpc.PreconditionFailure".into(),
+        value: failure.encode_to_vec(),
+    }
+}
+
+/// Metadata about a file to be materialized from CAS to disk.
+struct FileToMaterialize {
+    digest: DigestInfo,
+    dest: String,
+    #[cfg(target_family = "unix")]
+    unix_mode: Option<u32>,
+    mtime: Option<prost_types::Timestamp>,
+}
+
+/// Maximum size for a blob to be eligible for BatchReadBlobs (1 MiB).
+/// Blobs larger than this use the existing ByteStream path.
+const BATCH_READ_MAX_BLOB_SIZE: u64 = 1024 * 1024;
+
+/// Maximum total payload per BatchReadBlobs request (4 MiB), per REAPI recommendation.
+const BATCH_READ_MAX_REQUEST_SIZE: u64 = 4 * 1024 * 1024;
+
+/// Resolve the full directory tree starting from `root_digest`.
+///
+/// Tries the `GetTree` RPC (single streaming call) if the slow store is a `GrpcStore`.
+/// Falls back to recursive `get_and_decode_digest` calls otherwise.
+///
+/// Returns a map from digest to Directory proto for every directory in the tree.
+pub async fn resolve_directory_tree(
+    cas_store: &FastSlowStore,
+    root_digest: &DigestInfo,
+) -> Result<HashMap<DigestInfo, ProtoDirectory>, Error> {
+    let tree_start = std::time::Instant::now();
+    debug!(
+        root = ?root_digest,
+        "resolve_directory_tree: starting tree resolution",
+    );
+    // Try the fast path: GetTree RPC via the underlying GrpcStore.
+    if let Some(grpc_store) = cas_store.slow_store().downcast_ref::<GrpcStore>(None) {
+        debug!(
+            root = ?root_digest,
+            method = "GetTree RPC",
+            "resolve_directory_tree: using GetTree RPC fast path",
+        );
+        let request = GetTreeRequest {
+            instance_name: String::new(), // GrpcStore fills this in
+            root_digest: Some((*root_digest).into()),
+            page_size: 0, // server decides
+            page_token: String::new(),
+            digest_function: Context::current()
+                .get::<DigestHasherFunc>()
+                .map_or_else(default_digest_hasher_func, |v| *v)
+                .proto_digest_func()
+                .into(),
+        };
+
+        match grpc_store.get_tree(Request::new(request)).await {
+            Ok(response) => {
+                let rpc_elapsed = tree_start.elapsed();
+                let mut stream = response.into_inner();
+                // Collect all directories from the stream into a flat list.
+                let mut all_dirs: Vec<ProtoDirectory> = Vec::new();
+                while let Some(resp) = stream.message().await.err_tip(|| "In GetTree stream")? {
+                    all_dirs.extend(resp.directories);
+                }
+                let stream_elapsed = tree_start.elapsed();
+
+                debug!(
+                    root = ?root_digest,
+                    raw_dir_count = all_dirs.len(),
+                    rpc_connect_ms = rpc_elapsed.as_millis() as u64,
+                    stream_complete_ms = stream_elapsed.as_millis() as u64,
+                    "resolve_directory_tree: GetTree stream received",
+                );
+
+                if !all_dirs.is_empty() {
+                    // Build the tree using BFS assignment from the root.
+                    // The GetTree response returns directories in BFS order
+                    // (root first). Rather than re-encoding each directory
+                    // and hoping the digest matches (which fails when the
+                    // original bytes were serialized by a different protobuf
+                    // implementation, e.g. Java), we assign digests by
+                    // walking the tree structure: the root gets `root_digest`,
+                    // and each child gets the digest its parent references.
+                    //
+                    // The server deduplicates: if two parents reference the
+                    // same child digest, the child appears only once in the
+                    // response. We mirror this by tracking `seen` digests
+                    // and only consuming a new position for unseen children.
+                    let mut tree = HashMap::with_capacity(all_dirs.len());
+                    let mut dir_by_pos: Vec<ProtoDirectory> = all_dirs;
+                    // BFS queue: (position_in_dir_by_pos, assigned_digest).
+                    let mut queue: VecDeque<(usize, DigestInfo)> = VecDeque::new();
+                    queue.push_back((0, *root_digest));
+                    let mut next_child_pos: usize = 1;
+                    // Track digests we've already assigned a position to,
+                    // mirroring the server's deduplication.
+                    let mut seen: HashSet<DigestInfo> = HashSet::new();
+                    seen.insert(*root_digest);
+
+                    while let Some((pos, digest)) = queue.pop_front() {
+                        if pos >= dir_by_pos.len() {
+                            break;
+                        }
+                        let dir = std::mem::take(&mut dir_by_pos[pos]);
+                        for child_node in &dir.directories {
+                            if let Some(child_digest) = child_node
+                                .digest
+                                .as_ref()
+                                .and_then(|d| DigestInfo::try_from(d).ok())
+                            {
+                                // Only assign a new position for previously
+                                // unseen digests (matching server dedup).
+                                if seen.insert(child_digest) {
+                                    if next_child_pos < dir_by_pos.len() {
+                                        queue.push_back((next_child_pos, child_digest));
+                                        next_child_pos += 1;
+                                    }
+                                }
+                            }
+                        }
+                        tree.insert(digest, dir);
+                    }
+
+                    // Validate structural completeness: every child reference
+                    // should point to a digest in the tree.
+                    let tree_valid = tree.contains_key(root_digest) && {
+                        tree.values().all(|dir| {
+                            dir.directories.iter().all(|node| {
+                                node.digest
+                                    .as_ref()
+                                    .and_then(|d| DigestInfo::try_from(d).ok())
+                                    .is_some_and(|d| tree.contains_key(&d))
+                            })
+                        })
+                    };
+
+                    if tree_valid {
+                        let elapsed = tree_start.elapsed();
+                        let total_bytes: u64 = tree.keys().map(|d| d.size_bytes()).sum();
+                        let total_files: usize = tree.values().map(|d| d.files.len()).sum();
+                        let total_symlinks: usize = tree.values().map(|d| d.symlinks.len()).sum();
+                        debug!(
+                            root = ?root_digest,
+                            dir_count = tree.len(),
+                            total_files,
+                            total_symlinks,
+                            total_bytes,
+                            elapsed_ms = elapsed.as_millis() as u64,
+                            "resolve_directory_tree: completed via GetTree RPC"
+                        );
+                        return Ok(tree);
+                    }
+                    // Tree structure didn't match BFS ordering; fall through.
+                    // Count how many child references are missing from the tree
+                    // so the warning includes actionable diagnostic info.
+                    let missing_children: usize = tree.values().map(|dir| {
+                        dir.directories.iter().filter(|node| {
+                            node.digest
+                                .as_ref()
+                                .and_then(|d| DigestInfo::try_from(d).ok())
+                                .map_or(true, |d| !tree.contains_key(&d))
+                        }).count()
+                    }).sum();
+                    warn!(
+                        root = ?root_digest,
+                        tree_has_root = tree.contains_key(root_digest),
+                        tree_size = tree.len(),
+                        expected_size = dir_by_pos.len(),
+                        missing_children,
+                        validation_elapsed_ms = tree_start.elapsed().as_millis() as u64,
+                        "resolve_directory_tree: GetTree BFS validation failed, falling back to recursive fetch"
+                    );
+                }
+            }
+            Err(e) => {
+                warn!(
+                    root = ?root_digest,
+                    err = ?e,
+                    elapsed_ms = tree_start.elapsed().as_millis() as u64,
+                    "resolve_directory_tree: GetTree RPC failed, falling back to recursive fetch"
+                );
+            }
+        }
+    } else {
+        debug!(
+            root = ?root_digest,
+            method = "recursive fetch",
+            "resolve_directory_tree: no GrpcStore available, using recursive fetch",
+        );
+    }
+
+    // Fallback: recursive fetch (original behavior).
+    let recursive_start = std::time::Instant::now();
+    let mut tree = HashMap::new();
+    resolve_directory_tree_recursive(cas_store, root_digest, &mut tree).await?;
+    let recursive_elapsed = recursive_start.elapsed();
+    let total_elapsed = tree_start.elapsed();
+    let total_bytes: u64 = tree.keys().map(|d| d.size_bytes()).sum();
+    let total_files: usize = tree.values().map(|d| d.files.len()).sum();
+    let total_symlinks: usize = tree.values().map(|d| d.symlinks.len()).sum();
+    debug!(
+        root = ?root_digest,
+        dir_count = tree.len(),
+        total_files,
+        total_symlinks,
+        total_bytes,
+        individual_fetches = tree.len(),
+        recursive_ms = recursive_elapsed.as_millis() as u64,
+        total_elapsed_ms = total_elapsed.as_millis() as u64,
+        "resolve_directory_tree: completed via recursive fetch"
+    );
+    Ok(tree)
+}
+
+/// Recursively fetch directories via individual `get_and_decode_digest` calls.
+fn resolve_directory_tree_recursive<'a>(
     cas_store: &'a FastSlowStore,
-    filesystem_store: Pin<&'a FilesystemStore>,
     digest: &'a DigestInfo,
-    current_directory: &'a str,
+    tree: &'a mut HashMap<DigestInfo, ProtoDirectory>,
 ) -> BoxFuture<'a, Result<(), Error>> {
     async move {
+        if tree.contains_key(digest) {
+            return Ok(());
+        }
         let directory = get_and_decode_digest::<ProtoDirectory>(cas_store, digest.into())
             .await
-            .err_tip(|| "Converting digest to Directory")?;
-        let mut futures = FuturesUnordered::new();
+            .err_tip(|| "Converting digest to Directory in recursive tree fetch")?;
+        let child_digests: Vec<DigestInfo> = directory
+            .directories
+            .iter()
+            .map(|d| {
+                d.digest
+                    .as_ref()
+                    .err_tip(|| "Expected Digest in DirectoryNode")?
+                    .try_into()
+                    .err_tip(|| "Parsing child directory digest in recursive tree fetch")
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+        tree.insert(*digest, directory);
+        for child in &child_digests {
+            resolve_directory_tree_recursive(cas_store, child, tree).await?;
+        }
+        Ok(())
+    }
+    .boxed()
+}
+
+/// Walk the resolved directory tree, creating all directories and collecting
+/// all files that need to be materialized. Returns the flat list of files.
+fn collect_files_from_tree(
+    tree: &HashMap<DigestInfo, ProtoDirectory>,
+    root_digest: &DigestInfo,
+    root_path: &str,
+) -> Result<(Vec<FileToMaterialize>, Vec<(String, String)>), Error> {
+    let mut files = Vec::new();
+    // (symlink_target, dest_path)
+    let mut symlinks: Vec<(String, String)> = Vec::new();
+    // BFS to create directories in order and collect files.
+    let mut queue = VecDeque::new();
+    queue.push_back((*root_digest, root_path.to_string()));
+
+    while let Some((dir_digest, dir_path)) = queue.pop_front() {
+        let directory = tree.get(&dir_digest).ok_or_else(|| {
+            make_err!(
+                Code::Internal,
+                "Directory {dir_digest:?} not found in resolved tree"
+            )
+        })?;
 
-        for file in directory.files {
+        for file in &directory.files {
             let digest: DigestInfo = file
                 .digest
-                .err_tip(|| "Expected Digest to exist in Directory::file::digest")?
+                .as_ref()
+                .err_tip(|| "Expected Digest in Directory::file::digest")?
                 .try_into()
                 .err_tip(|| "In Directory::file::digest")?;
-            let dest = format!("{}/{}", current_directory, file.name);
-            let (mtime, mut unix_mode) = match file.node_properties {
-                Some(properties) => (properties.mtime, properties.unix_mode),
-                None => (None, None),
+            let dest = format!("{}/{}", dir_path, file.name);
+
+            #[cfg(target_family = "unix")]
+            let unix_mode = {
+                let (_, mut mode) = match &file.node_properties {
+                    Some(properties) => (properties.mtime.clone(), properties.unix_mode),
+                    None => (None, None),
+                };
+                if file.is_executable {
+                    mode = Some(mode.unwrap_or(0o555) | 0o111);
+                }
+                // Default to 0o555 (read+execute, no write) to match CAS store
+                // defaults. Some build tools (rules_cc, rules_rust) set
+                // is_executable=false on shell scripts that must be executable;
+                // using 0o555 as the base avoids breaking those actions.
+                Some(mode.unwrap_or(0o555))
             };
-            #[cfg_attr(target_family = "windows", allow(unused_assignments))]
-            if file.is_executable {
-                unix_mode = Some(unix_mode.unwrap_or(0o444) | 0o111);
+
+            let mtime = file.node_properties.as_ref().and_then(|p| p.mtime.clone());
+
+            files.push(FileToMaterialize {
+                digest,
+                dest,
+                #[cfg(target_family = "unix")]
+                unix_mode,
+                mtime,
+            });
+        }
+
+        for subdir in &directory.directories {
+            let child_digest: DigestInfo = subdir
+                .digest
+                .as_ref()
+                .err_tip(|| "Expected Digest in Directory::directories::digest")?
+                .try_into()
+                .err_tip(|| "In Directory::directories::digest")?;
+            let child_path = format!("{}/{}", dir_path, subdir.name);
+            queue.push_back((child_digest, child_path));
+        }
+
+        #[cfg(target_family = "unix")]
+        for symlink_node in &directory.symlinks {
+            let dest = format!("{}/{}", dir_path, symlink_node.name);
+            symlinks.push((symlink_node.target.clone(), dest));
+        }
+    }
+
+    Ok((files, symlinks))
+}
+
+/// Maximum number of concurrent BatchReadBlobs RPCs in flight.
+const BATCH_READ_CONCURRENCY: usize = 16;
+
+/// Maximum number of concurrent ByteStream fetches in flight.
+
+/// Batch-download small blobs via `BatchReadBlobs` and write them into the fast store.
+/// Returns the set of digests that were successfully fetched.
+///
+/// If WorkerProxyStore is available, uses the locality map to route digests
+/// to peers that have them. Digests without a known peer go to the server.
+/// Any misses from peers or server are retried via `populate_fast_store_unchecked`.
+async fn batch_read_small_blobs(
+    cas_store: &FastSlowStore,
+    small_digests: &[DigestInfo],
+) -> Result<HashSet<DigestInfo>, Error> {
+    let slow_store = cas_store.slow_store();
+
+    // Try locality-aware routing through WorkerProxyStore.
+    if let Some(proxy) = slow_store.downcast_ref::<WorkerProxyStore>(None) {
+        let peer_stores = proxy.peer_stores();
+        if !peer_stores.is_empty() {
+            // Assign digests to endpoints using the locality map.
+            let mut endpoint_digests: HashMap<Arc<str>, Vec<DigestInfo>> = HashMap::new();
+            let mut server_digests: Vec<DigestInfo> = Vec::new();
+
+            {
+                let locality = proxy.locality_map().read();
+                let mut round_robin_idx: usize = 0;
+                for &digest in small_digests {
+                    let peers = locality.lookup_workers(&digest);
+                    // Filter to connected peers only.
+                    let connected: Vec<&Arc<str>> = peers
+                        .iter()
+                        .filter(|ep| peer_stores.contains_key(ep.as_ref()))
+                        .collect();
+                    if connected.is_empty() {
+                        server_digests.push(digest);
+                    } else {
+                        // Round-robin among connected peers that have this blob.
+                        let endpoint = connected[round_robin_idx % connected.len()].clone();
+                        round_robin_idx = round_robin_idx.wrapping_add(1);
+                        endpoint_digests
+                            .entry(endpoint)
+                            .or_default()
+                            .push(digest);
+                    }
+                }
             }
-            futures.push(
-                cas_store
-                    .populate_fast_store(digest.into())
-                    .and_then(move |()| async move {
-                        if is_zero_digest(digest) {
-                            let mut file_slot = fs::create_file(&dest).await?;
-                            file_slot.write_all(&[]).await?;
+
+            let peer_blob_count: usize = endpoint_digests.values().map(|v| v.len()).sum();
+            debug!(
+                total = small_digests.len(),
+                to_peers = peer_blob_count,
+                to_server = server_digests.len(),
+                peer_endpoints = endpoint_digests.len(),
+                "BatchReadBlobs: locality-based routing"
+            );
+
+            // Collect ALL batch work items (peer + server) for parallel execution.
+            let mut all_batches: Vec<(&str, &GrpcStore, Vec<DigestInfo>)> = Vec::new();
+
+            for (endpoint, digests) in &endpoint_digests {
+                if let Some(store) = peer_stores.get(endpoint.as_ref()) {
+                    if let Some(grpc) = store.downcast_ref::<GrpcStore>(None) {
+                        for batch in partition_into_batches(digests) {
+                            all_batches.push((endpoint.as_ref(), grpc, batch));
                         }
-                        else {
-                            let file_entry = filesystem_store
-                                .get_file_entry_for_digest(&digest)
-                                .await
-                                .err_tip(|| "During hard link")?;
-                            // TODO: add a test for #2051: deadlock with large number of files
-                            let src_path = file_entry.get_file_path_locked(|src| async move { Ok(PathBuf::from(src)) }).await?;
-                            fs::hard_link(&src_path, &dest)
+                    }
+                }
+            }
+
+            if let Some(grpc) = proxy.inner_store().downcast_ref::<GrpcStore>(None) {
+                for batch in partition_into_batches(&server_digests) {
+                    all_batches.push(("server", grpc, batch));
+                }
+            }
+
+            // Execute ALL batches in parallel across all endpoints.
+            let results = futures::future::join_all(
+                all_batches.into_iter().map(|(ep, grpc, batch)| async move {
+                    let result = execute_batch_read(grpc, cas_store, &batch).await;
+                    (ep, result)
+                }),
+            )
+            .await;
+
+            let mut fetched = HashSet::new();
+            for (ep, result) in results {
+                match result {
+                    Ok(completed) => fetched.extend(completed),
+                    Err(e) => debug!(endpoint = ep, ?e, "BatchReadBlobs: batch failed"),
+                }
+            }
+
+            // Retry misses via populate_fast_store_unchecked (full store chain).
+            let misses: Vec<DigestInfo> = small_digests
+                .iter()
+                .filter(|d| !fetched.contains(d))
+                .copied()
+                .collect();
+
+            if !misses.is_empty() {
+                debug!(count = misses.len(), "BatchReadBlobs: fetching misses via store chain");
+                let retry_results = futures::future::join_all(
+                    misses.iter().map(|&digest| async move {
+                        let result = cas_store
+                            .populate_fast_store_unchecked(digest.into())
+                            .await;
+                        (digest, result)
+                    }),
+                )
+                .await;
+                let mut retry_failures = 0u32;
+                for (digest, result) in retry_results {
+                    match result {
+                        Ok(()) => { fetched.insert(digest); }
+                        Err(e) => {
+                            retry_failures += 1;
+                            debug!(?digest, ?e, "BatchReadBlobs: retry fetch failed");
+                        }
+                    }
+                }
+                if retry_failures > 0 {
+                    debug!(retry_failures, "BatchReadBlobs: some retries failed");
+                }
+            }
+
+            return Ok(fetched);
+        }
+    }
+
+    // No peers available — server-only batch read.
+    let grpc_store = match slow_store.downcast_ref::<GrpcStore>(None) {
+        Some(store) => store,
+        None => return Ok(HashSet::new()),
+    };
+
+    let batches = partition_into_batches(small_digests);
+    let fetched: HashSet<DigestInfo> = futures::stream::iter(batches.into_iter())
+        .map(|batch| async move { execute_batch_read(grpc_store, cas_store, &batch).await })
+        .buffer_unordered(BATCH_READ_CONCURRENCY)
+        .try_fold(HashSet::new(), |mut acc, completed| async move {
+            acc.extend(completed);
+            Ok(acc)
+        })
+        .await?;
+
+    Ok(fetched)
+}
+
+/// Partition digests into 4 MiB batches for BatchReadBlobs.
+fn partition_into_batches(digests: &[DigestInfo]) -> Vec<Vec<DigestInfo>> {
+    let mut batches: Vec<Vec<DigestInfo>> = Vec::new();
+    let mut current_batch: Vec<DigestInfo> = Vec::new();
+    let mut current_size: u64 = 0;
+
+    for &digest in digests {
+        let blob_size = digest.size_bytes();
+        if !current_batch.is_empty() && current_size + blob_size > BATCH_READ_MAX_REQUEST_SIZE {
+            batches.push(std::mem::take(&mut current_batch));
+            current_size = 0;
+        }
+        current_batch.push(digest);
+        current_size += blob_size;
+    }
+    if !current_batch.is_empty() {
+        batches.push(current_batch);
+    }
+    batches
+}
+
+/// Execute a single BatchReadBlobs request and write results to fast store.
+async fn execute_batch_read(
+    grpc_store: &GrpcStore,
+    cas_store: &FastSlowStore,
+    digests: &[DigestInfo],
+) -> Result<Vec<DigestInfo>, Error> {
+    let request = BatchReadBlobsRequest {
+        instance_name: String::new(), // GrpcStore fills this in
+        digests: digests.iter().map(|d| (*d).into()).collect(),
+        acceptable_compressors: vec![],
+        digest_function: Context::current()
+            .get::<DigestHasherFunc>()
+            .map_or_else(default_digest_hasher_func, |v| *v)
+            .proto_digest_func()
+            .into(),
+    };
+
+    let response = grpc_store
+        .batch_read_blobs(Request::new(request))
+        .await
+        .err_tip(|| "In execute_batch_read")?
+        .into_inner();
+
+    let fast_store = cas_store.fast_store();
+
+    // Parse all valid responses first, then write to fast store concurrently.
+    let valid_blobs: Vec<(DigestInfo, Bytes)> = response
+        .responses
+        .into_iter()
+        .filter_map(|blob_resp| {
+            let status_code = blob_resp.status.as_ref().map_or(0, |s| s.code);
+            if status_code != 0 {
+                return None;
+            }
+            let proto_digest = blob_resp.digest?;
+            let digest = DigestInfo::try_from(proto_digest).ok()?;
+            Some((digest, Bytes::from(blob_resp.data)))
+        })
+        .collect();
+
+    // Write all blobs to fast store concurrently.
+    let write_futures: FuturesUnordered<_> = valid_blobs
+        .into_iter()
+        .map(|(digest, data)| {
+            let data_len = data.len() as u64;
+            async move {
+                let (mut tx, rx) = make_buf_channel_pair();
+                let store_key: StoreKey<'_> = digest.into();
+                let update_fut = fast_store.update(
+                    store_key,
+                    rx,
+                    UploadSizeInfo::ExactSize(data_len),
+                );
+                let send_fut = async {
+                    tx.send(data)
+                        .await
+                        .err_tip(|| "Sending batch blob to fast store")?;
+                    tx.send_eof().err_tip(|| "Sending EOF for batch blob")?;
+                    Ok::<_, Error>(())
+                };
+                let (update_res, send_res) = futures::join!(update_fut, send_fut);
+                update_res
+                    .merge(send_res)
+                    .err_tip(|| format!("Writing batch-read blob {digest:?} to fast store"))?;
+                Ok::<DigestInfo, Error>(digest)
+            }
+        })
+        .collect();
+
+    let completed: Vec<DigestInfo> = write_futures.try_collect().await?;
+
+    Ok(completed)
+}
+
+/// Populate the fast store for a single digest and hardlink it to `dest`.
+/// Contains the retry loop for cache eviction races.
+async fn populate_and_hardlink(
+    cas_store: &FastSlowStore,
+    filesystem_store: Pin<&FilesystemStore>,
+    digest: DigestInfo,
+    dest: &str,
+) -> Result<(), Error> {
+    if is_zero_digest(digest) {
+        cas_store.populate_fast_store(digest.into()).await?;
+        let mut file_slot = fs::create_file(dest).await?;
+        std::io::Write::write_all(file_slot.as_std_mut(), &[])
+            .err_tip(|| "Could not write to file")?;
+        return Ok(());
+    }
+
+    const MAX_RETRIES: u32 = 3;
+    let mut last_err = None;
+    for attempt in 0..MAX_RETRIES {
+        if attempt > 0 {
+            filesystem_store.remove_entry_for_digest(&digest).await;
+        }
+        cas_store.populate_fast_store(digest.into()).await?;
+
+        let result = async {
+            let file_entry = filesystem_store
+                .get_file_entry_for_digest(&digest)
+                .await
+                .err_tip(|| "Getting file entry for hardlink")?;
+            let dest_clone = dest.to_string();
+            file_entry
+                .get_file_path_locked(move |src| async move {
+                    let src_exists = Path::new(&src).exists();
+                    let result = fs::hard_link(&src, &dest_clone).await;
+                    if result.is_err() {
+                        warn!(
+                            src = %src.to_string_lossy(),
+                            src_exists = src_exists,
+                            dest = %dest_clone,
+                            "hard_link failed while holding read lock"
+                        );
+                    }
+                    result
+                })
+                .await
+        }
+        .await;
+
+        match result {
+            Ok(()) => {
+                last_err = None;
+                break;
+            }
+            Err(e) if e.code == Code::NotFound => {
+                warn!(
+                    attempt = attempt + 1,
+                    max_retries = MAX_RETRIES,
+                    ?digest,
+                    dest = %dest,
+                    err = ?e,
+                    "File evicted from cache during hardlink. Retrying."
+                );
+                last_err = Some(e);
+            }
+            Err(e) => {
+                return Err(make_err!(
+                    Code::Internal,
+                    "Could not make hardlink, {e:?} : {dest}"
+                ));
+            }
+        }
+    }
+    if let Some(e) = last_err {
+        return Err(make_err!(
+            Code::Internal,
+            "Could not make hardlink after {MAX_RETRIES} attempts, \
+            file was repeatedly evicted from cache. {e:?} : {dest}\n\
+            This error often occurs when the filesystem store's max_bytes is too small for your workload.\n\
+            To fix this issue:\n\
+            1. Increase the 'max_bytes' value in your filesystem store configuration\n\
+            2. Example: Change 'max_bytes: 10000000000' to 'max_bytes: 50000000000' (or higher)\n\
+            3. The setting is typically found in your nativelink.json config under:\n\
+            stores -> [your_filesystem_store] -> filesystem -> eviction_policy -> max_bytes\n\
+            4. Restart NativeLink after making the change\n\n\
+            If this error persists after increasing max_bytes several times, please report at:\n\
+            https://github.com/TraceMachina/nativelink/issues\n\
+            Include your config file and both server and client logs to help us assist you."
+        ));
+    }
+    Ok(())
+}
+
+/// Like `hardlink_and_set_metadata` but uses a pre-fetched file entry
+/// (from batch `get_file_entries_batch`) to avoid per-file EvictingMap lock
+/// contention. Falls back to the regular path on cache miss.
+async fn hardlink_and_set_metadata_prefetched(
+    cas_store: &FastSlowStore,
+    filesystem_store: Pin<&FilesystemStore>,
+    file: FileToMaterialize,
+    prefetched_entry: Option<Arc<nativelink_store::filesystem_store::FileEntryImpl>>,
+) -> Result<(), Error> {
+    let digest = file.digest;
+    let dest = file.dest.clone();
+
+    if let Some(file_entry) = prefetched_entry {
+        // We have a pre-fetched entry — try hardlink directly.
+        let dest_clone = dest.clone();
+        let result = file_entry
+            .get_file_path_locked(move |src| async move {
+                fs::hard_link(&src, &dest_clone).await
+            })
+            .await;
+
+        match result {
+            Ok(()) => {
+                // Success — apply permissions and mtime, then return.
+            }
+            Err(e) if e.code == Code::NotFound => {
+                // File was evicted between pre-fetch and hardlink.
+                // Fall back to full populate+hardlink.
+                populate_and_hardlink(cas_store, filesystem_store, digest, &dest).await?;
+            }
+            Err(e) => {
+                return Err(make_err!(
+                    Code::Internal,
+                    "Could not make hardlink (prefetched), {e:?} : {dest}"
+                ));
+            }
+        }
+    } else {
+        // No pre-fetched entry (cache miss or zero digest).
+        populate_and_hardlink(cas_store, filesystem_store, digest, &dest).await?;
+    }
+
+    // Always set permissions — CAS files default to 0o555 but concurrent
+    // hardlinks from other actions can change the shared inode's mode.
+    // We must unconditionally chmod to ensure correctness.
+    #[cfg(target_family = "unix")]
+    if let Some(unix_mode) = file.unix_mode {
+        fs::set_permissions(&dest, Permissions::from_mode(unix_mode))
+            .await
+            .err_tip(|| format!("Could not set unix mode in download_to_directory {dest}"))?;
+    }
+
+    // Apply mtime.
+    if let Some(mtime) = file.mtime {
+        let dest_owned = dest.clone();
+        spawn_blocking!("download_to_directory_set_mtime", move || {
+            set_file_mtime(
+                &dest_owned,
+                FileTime::from_unix_time(mtime.seconds, mtime.nanos as u32),
+            )
+            .err_tip(|| format!("Failed to set mtime in download_to_directory {dest_owned}"))
+        })
+        .await
+        .err_tip(|| "Failed to launch spawn_blocking in download_to_directory")??;
+    }
+
+    Ok(())
+}
+
+/// Aggressively download the digests of files and make a local folder from it.
+///
+/// This optimized version:
+/// 1. Resolves the full directory tree via `GetTree` RPC (single streaming call)
+///    instead of issuing recursive individual `get_and_decode_digest` calls.
+/// 2. Batch-checks which blobs are already in the fast store via `has_with_results`
+///    (maps to `FindMissingBlobs` on GrpcStore), avoiding per-file existence RPCs.
+/// 3. Fetches small missing blobs (<1 MiB) via `BatchReadBlobs` in 4 MiB batches,
+///    with large blobs using the existing ByteStream path.
+///
+/// We require the `FilesystemStore` to be the `fast` store of `FastSlowStore`.
+/// We will request the `FastSlowStore` to populate the entry then we will
+/// assume the `FilesystemStore` has the file available immediately after and hardlink the file
+/// to a new location.
+pub fn download_to_directory<'a>(
+    cas_store: &'a FastSlowStore,
+    filesystem_store: Pin<&'a FilesystemStore>,
+    digest: &'a DigestInfo,
+    current_directory: &'a str,
+) -> BoxFuture<'a, Result<(), Error>> {
+    async move {
+        let phase_start = std::time::Instant::now();
+
+        // Step 1: Resolve the full directory tree.
+        let tree = resolve_directory_tree(cas_store, digest).await?;
+        let tree_resolve_ms = phase_start.elapsed().as_millis();
+
+        // Step 2: Walk the tree, creating all directories and collecting files.
+        let (files, symlinks) = collect_files_from_tree(&tree, digest, current_directory)?;
+
+        debug!(
+            root = ?digest,
+            total_dirs = tree.len(),
+            total_files = files.len(),
+            total_symlinks = symlinks.len(),
+            "download_to_directory: starting materialization",
+        );
+
+        // Create all subdirectories using level-parallel BFS — siblings at
+        // the same depth are created concurrently while parent-before-child
+        // ordering is maintained (each level completes before the next starts).
+        let mkdir_start = std::time::Instant::now();
+        let mut dirs_created: usize = 0;
+        let mut mkdir_depth: u32 = 0;
+        {
+            let mut current_level = vec![(*digest, current_directory.to_string())];
+            while !current_level.is_empty() {
+                let mut next_level = Vec::new();
+                for (dir_digest, dir_path) in &current_level {
+                    if let Some(directory) = tree.get(dir_digest) {
+                        debug!(
+                            depth = mkdir_depth,
+                            path = %dir_path,
+                            files = directory.files.len(),
+                            subdirs = directory.directories.len(),
+                            "download_to_directory: processing directory",
+                        );
+                        for subdir in &directory.directories {
+                            let child_digest: DigestInfo = subdir
+                                .digest
+                                .as_ref()
+                                .err_tip(|| "Expected Digest")?
+                                .try_into()
+                                .err_tip(|| "In Directory::directories::digest")?;
+                            let child_path = format!("{}/{}", dir_path, subdir.name);
+                            next_level.push((child_digest, child_path));
+                        }
+                    }
+                }
+                if !next_level.is_empty() {
+                    dirs_created += next_level.len();
+                    try_join_all(next_level.iter().map(|(_, path)| {
+                        let path = path.clone();
+                        async move {
+                            fs::create_dir(&path)
                                 .await
-                                .map_err(|e| {
-                                    if e.code == Code::NotFound {
-                                        make_err!(
-                                            Code::Internal,
-                                            "Could not make hardlink, file was likely evicted from cache. {e:?} : {dest}\n\
-                                            This error often occurs when the filesystem store's max_bytes is too small for your workload.\n\
-                                            To fix this issue:\n\
-                                            1. Increase the 'max_bytes' value in your filesystem store configuration\n\
-                                            2. Example: Change 'max_bytes: 10000000000' to 'max_bytes: 50000000000' (or higher)\n\
-                                            3. The setting is typically found in your nativelink.json config under:\n\
-                                            stores -> [your_filesystem_store] -> filesystem -> eviction_policy -> max_bytes\n\
-                                            4. Restart NativeLink after making the change\n\n\
-                                            If this error persists after increasing max_bytes several times, please report at:\n\
-                                            https://github.com/TraceMachina/nativelink/issues\n\
-                                            Include your config file and both server and client logs to help us assist you."
-                                        )
-                                    } else {
-                                        make_err!(Code::Internal, "Could not make hardlink, {e:?} : {dest}")
-                                    }
-                                })?;
-                            }
-                        #[cfg(target_family = "unix")]
-                        if let Some(unix_mode) = unix_mode {
-                            fs::set_permissions(&dest, Permissions::from_mode(unix_mode))
+                                .err_tip(|| format!("Could not create directory {path}"))
+                        }
+                    }))
+                    .await?;
+                }
+                mkdir_depth += 1;
+                current_level = next_level;
+            }
+        }
+        let mkdir_elapsed = mkdir_start.elapsed();
+        debug!(
+            dirs_created,
+            mkdir_depth_levels = mkdir_depth,
+            mkdir_ms = mkdir_elapsed.as_millis() as u64,
+            "download_to_directory: directories created",
+        );
+
+        // Create symlinks concurrently.
+        #[cfg(target_family = "unix")]
+        {
+            let symlink_futures: FuturesUnordered<_> = symlinks
+                .iter()
+                .map(|(target, dest)| async move {
+                    fs::symlink(target, dest)
+                        .await
+                        .err_tip(|| format!("Could not create symlink {target} -> {dest}"))
+                })
+                .collect();
+            symlink_futures
+                .try_for_each(|()| futures::future::ready(Ok(())))
+                .await?;
+        }
+
+        if files.is_empty() {
+            debug!(
+                root = ?digest,
+                "download_to_directory: no files to materialize (directory-only tree)",
+            );
+            return Ok(());
+        }
+
+        // Step 3: Batch-check which blobs are already in the fast store.
+        // Deduplicate digests first to avoid redundant checks.
+        let unique_digests: Vec<DigestInfo> = {
+            let mut seen = HashSet::with_capacity(files.len());
+            files
+                .iter()
+                .filter_map(|f| {
+                    if seen.insert(f.digest) {
+                        Some(f.digest)
+                    } else {
+                        None
+                    }
+                })
+                .collect()
+        };
+
+        let has_check_start = std::time::Instant::now();
+        let store_keys: Vec<StoreKey<'_>> =
+            unique_digests.iter().map(|d| (*d).into()).collect();
+        let mut has_results = vec![None; store_keys.len()];
+        // Check in chunks to reduce Mutex hold time in the fast store,
+        // allowing concurrent operations from other actions to interleave.
+        const HAS_CHECK_CHUNK: usize = 500;
+        for start in (0..store_keys.len()).step_by(HAS_CHECK_CHUNK) {
+            let end = (start + HAS_CHECK_CHUNK).min(store_keys.len());
+            Pin::new(cas_store.fast_store())
+                .has_with_results(&store_keys[start..end], &mut has_results[start..end])
+                .await
+                .err_tip(|| "Batch has_with_results on fast store")?;
+        }
+
+        let cached_set: HashSet<DigestInfo> = unique_digests
+            .iter()
+            .zip(has_results.iter())
+            .filter_map(|(digest, result)| result.map(|_| *digest))
+            .collect();
+
+        let missing_digests: Vec<DigestInfo> = unique_digests
+            .iter()
+            .zip(has_results.iter())
+            .filter_map(|(digest, result)| if result.is_none() { Some(*digest) } else { None })
+            .collect();
+
+        let has_check_elapsed = has_check_start.elapsed();
+        let has_check_ms = phase_start.elapsed().as_millis();
+
+        let cached_bytes: u64 = cached_set.iter().map(|d| d.size_bytes()).sum();
+        let missing_bytes: u64 = missing_digests.iter().map(|d| d.size_bytes()).sum();
+        debug!(
+            total_files = files.len(),
+            unique_digests = unique_digests.len(),
+            cached = cached_set.len(),
+            cached_bytes,
+            missing = missing_digests.len(),
+            missing_bytes,
+            elapsed_ms = has_check_elapsed.as_millis() as u64,
+            "download_to_directory: batch existence check complete"
+        );
+
+        // Steps 4+5 (pipelined): Three concurrent futures:
+        //
+        //   Fetcher: launches ALL missing blob fetches at once with bounded
+        //     concurrency. As each blob arrives it is inserted into a
+        //     `fetched_set` so the producer knows it is ready.
+        //
+        //   Producer: iterates files in batches. Files whose blobs are already
+        //     cached go to the channel immediately. Files whose blobs are
+        //     still being fetched are deferred and retried after a short
+        //     yield. This means hardlinking starts right away for cached
+        //     files while fetches proceed in parallel.
+        //
+        //   Consumer: reads from the channel, hardlinks with bounded
+        //     concurrency (unchanged from before).
+        //
+        const HARDLINK_CONCURRENCY: usize = 64;
+        const FETCH_CONCURRENCY: usize = 128;
+        const HARDLINK_BATCH: usize = 64;
+        // Channel capacity: buffer ahead of the consumer.
+        const CHANNEL_CAPACITY: usize = HARDLINK_BATCH * 2;
+
+        type PipelineItem = (
+            FileToMaterialize,
+            Option<Arc<nativelink_store::filesystem_store::FileEntryImpl>>,
+        );
+
+        let total_files_to_link = files.len();
+        let (tx, rx) = mpsc::channel::<PipelineItem>(CHANNEL_CAPACITY);
+
+        let fetch_start = std::time::Instant::now();
+
+        let missing_set: HashSet<DigestInfo> = missing_digests.iter().copied().collect();
+
+        debug!(
+            total_files = total_files_to_link,
+            cached = cached_set.len(),
+            missing = missing_digests.len(),
+            missing_bytes,
+            fetch_concurrency = FETCH_CONCURRENCY,
+            hardlink_concurrency = HARDLINK_CONCURRENCY,
+            "download_to_directory: starting pipelined fetch+hardlink",
+        );
+
+        // --- Shared state: tracks which missing digests have arrived ---
+        let fetched_set: Arc<std::sync::Mutex<HashSet<DigestInfo>>> =
+            Arc::new(std::sync::Mutex::new(HashSet::with_capacity(missing_digests.len())));
+        let fetch_error: Arc<std::sync::Mutex<Option<Error>>> =
+            Arc::new(std::sync::Mutex::new(None));
+        let fetched_notify = Arc::new(Notify::new());
+
+        // --- Fetcher future ---
+        // Launches all missing blob fetches concurrently (bounded).
+        let fetcher_start = std::time::Instant::now();
+        let fetched_set_ref = &fetched_set;
+        let fetch_error_ref = &fetch_error;
+        let fetched_notify_ref = &fetched_notify;
+        let fetcher_fut = async {
+            // Partition into small (BatchReadBlobs) and large (ByteStream).
+            let mut small: Vec<DigestInfo> = Vec::new();
+            let mut large: Vec<DigestInfo> = Vec::new();
+            for &d in &missing_digests {
+                if is_zero_digest(d) {
+                    // Zero digests don't need fetching; mark as ready.
+                    fetched_set_ref.lock().unwrap().insert(d);
+                    continue;
+                }
+                if d.size_bytes() <= BATCH_READ_MAX_BLOB_SIZE {
+                    small.push(d);
+                } else {
+                    large.push(d);
+                }
+            }
+
+            debug!(
+                small = small.len(),
+                large = large.len(),
+                missing_bytes,
+                "fetcher: starting all blob fetches",
+            );
+
+            // Fetch small blobs via BatchReadBlobs (already batches internally).
+            let batch_read_fut = async {
+                if small.is_empty() {
+                    return Ok::<(), Error>(());
+                }
+                let fetched = batch_read_small_blobs(cas_store, &small).await?;
+                // Mark all successfully fetched small blobs as ready.
+                {
+                    let mut set = fetched_set_ref.lock().unwrap();
+                    for &d in &small {
+                        // batch_read_small_blobs returns the set of blobs it
+                        // actually got; unfetched ones need ByteStream fallback.
+                        if fetched.contains(&d) {
+                            set.insert(d);
+                        }
+                    }
+                }
+                fetched_notify_ref.notify_one();
+
+                // Fallback for small blobs not returned by BatchReadBlobs.
+                let fallback: Vec<DigestInfo> = small
+                    .iter()
+                    .filter(|d| !fetched.contains(d))
+                    .copied()
+                    .collect();
+                if !fallback.is_empty() {
+                    debug!(
+                        count = fallback.len(),
+                        "fetcher: BatchReadBlobs fallback via ByteStream",
+                    );
+                    futures::stream::iter(fallback.into_iter().map(Ok::<_, Error>))
+                        .try_for_each_concurrent(FETCH_CONCURRENCY, |d| async move {
+                            cas_store
+                                .populate_fast_store_unchecked(d.into())
                                 .await
-                                .err_tip(|| {
-                                    format!(
-                                        "Could not set unix mode in download_to_directory {dest}"
-                                    )
-                                })?;
+                                .err_tip(|| format!("Populating fast store (fallback) for {d:?}"))?;
+                            fetched_set_ref.lock().unwrap().insert(d);
+                            fetched_notify_ref.notify_one();
+                            Ok(())
+                        })
+                        .await?;
+                }
+                Ok(())
+            };
+
+            // Fetch large blobs via ByteStream with bounded concurrency.
+            let bytestream_fut = async {
+                if large.is_empty() {
+                    return Ok::<(), Error>(());
+                }
+                futures::stream::iter(large.into_iter().map(Ok::<_, Error>))
+                    .try_for_each_concurrent(FETCH_CONCURRENCY, |d| async move {
+                        let blob_start = std::time::Instant::now();
+                        cas_store
+                            .populate_fast_store_unchecked(d.into())
+                            .await
+                            .err_tip(|| format!("Populating fast store for {d:?}"))?;
+                        let blob_elapsed = blob_start.elapsed();
+                        if blob_elapsed.as_secs() >= 2 {
+                            warn!(
+                                digest = ?d,
+                                size_bytes = d.size_bytes(),
+                                elapsed_ms = blob_elapsed.as_millis() as u64,
+                                "fetcher: slow blob fetch (>2s)",
+                            );
+                        }
+                        fetched_set_ref.lock().unwrap().insert(d);
+                        fetched_notify_ref.notify_one();
+                        Ok(())
+                    })
+                    .await
+            };
+
+            // Run small and large fetches concurrently.
+            let (batch_result, bs_result) =
+                futures::future::join(batch_read_fut, bytestream_fut).await;
+
+            let fetcher_elapsed = fetcher_start.elapsed();
+
+            // If either failed, record the error so the producer can see it.
+            if let Err(e) = batch_result {
+                *fetch_error_ref.lock().unwrap() = Some(e);
+                fetched_notify_ref.notify_one();
+            }
+            if let Err(e) = bs_result {
+                let mut guard = fetch_error_ref.lock().unwrap();
+                if guard.is_none() {
+                    *guard = Some(e);
+                }
+                fetched_notify_ref.notify_one();
+            }
+
+            debug!(
+                elapsed_ms = fetcher_elapsed.as_millis() as u64,
+                fetched = fetched_set_ref.lock().unwrap().len(),
+                missing_total = missing_digests.len(),
+                throughput_mbps = format!("{:.1}", throughput_mbps(missing_bytes, fetcher_elapsed)),
+                "fetcher: all blob fetches complete",
+            );
+        };
+
+        // --- Producer future ---
+        // Iterates files, sends cached ones immediately, waits for missing
+        // ones as they arrive from the fetcher.
+        let producer_start = std::time::Instant::now();
+        let producer_fut = async {
+            let mut files_sent: usize = 0;
+            let mut deferred_count: usize = 0;
+
+            // Process files in batches for entry pre-fetching efficiency.
+            for batch_files in files.chunks(HARDLINK_BATCH) {
+                // Separate into ready (cached or already fetched) and pending.
+                let mut ready_files: Vec<&FileToMaterialize> = Vec::new();
+                let mut pending_files: Vec<&FileToMaterialize> = Vec::new();
+
+                {
+                    let fetched = fetched_set_ref.lock().unwrap();
+                    for f in batch_files {
+                        if !missing_set.contains(&f.digest) || fetched.contains(&f.digest) {
+                            ready_files.push(f);
+                        } else {
+                            pending_files.push(f);
+                        }
+                    }
+                }
+
+                // Send ready files immediately.
+                if !ready_files.is_empty() {
+                    let ready_digests: Vec<DigestInfo> =
+                        ready_files.iter().map(|f| f.digest).collect();
+                    let entries =
+                        filesystem_store.get_file_entries_batch(&ready_digests).await;
+
+                    for (file, entry) in ready_files.iter().zip(entries) {
+                        let item: PipelineItem = (
+                            FileToMaterialize {
+                                digest: file.digest,
+                                dest: file.dest.clone(),
+                                #[cfg(target_family = "unix")]
+                                unix_mode: file.unix_mode,
+                                mtime: file.mtime.clone(),
+                            },
+                            entry,
+                        );
+                        if tx.send(item).await.is_err() {
+                            return Ok::<_, Error>(producer_start.elapsed());
+                        }
+                        files_sent += 1;
+                    }
+                }
+
+                // Wait for pending files as their blobs arrive.
+                if !pending_files.is_empty() {
+                    deferred_count += pending_files.len();
+                    let mut remaining = pending_files;
+
+                    loop {
+                        if remaining.is_empty() {
+                            break;
+                        }
+
+                        // Check for fetcher errors.
+                        if let Some(e) = fetch_error_ref.lock().unwrap().take() {
+                            return Err(e);
                         }
-                        if let Some(mtime) = mtime {
-                            spawn_blocking!("download_to_directory_set_mtime", move || {
-                                set_file_mtime(
-                                    &dest,
-                                    FileTime::from_unix_time(mtime.seconds, mtime.nanos as u32),
-                                )
-                                .err_tip(|| {
-                                    format!("Failed to set mtime in download_to_directory {dest}")
-                                })
-                            })
-                            .await
-                            .err_tip(
-                                || "Failed to launch spawn_blocking in download_to_directory",
-                            )??;
+
+                        // Partition remaining into newly ready and still pending.
+                        let mut newly_ready: Vec<&FileToMaterialize> = Vec::new();
+                        let mut still_pending: Vec<&FileToMaterialize> = Vec::new();
+                        {
+                            let fetched = fetched_set_ref.lock().unwrap();
+                            for f in remaining {
+                                if fetched.contains(&f.digest) {
+                                    newly_ready.push(f);
+                                } else {
+                                    still_pending.push(f);
+                                }
+                            }
                         }
-                        Ok(())
-                    })
-                    .map_err(move |e| e.append(format!("for digest {digest}")))
-                    .boxed(),
-            );
-        }
 
-        for directory in directory.directories {
-            let digest: DigestInfo = directory
-                .digest
-                .err_tip(|| "Expected Digest to exist in Directory::directories::digest")?
-                .try_into()
-                .err_tip(|| "In Directory::file::digest")?;
-            let new_directory_path = format!("{}/{}", current_directory, directory.name);
-            futures.push(
-                async move {
-                    fs::create_dir(&new_directory_path)
-                        .await
-                        .err_tip(|| format!("Could not create directory {new_directory_path}"))?;
-                    download_to_directory(
-                        cas_store,
-                        filesystem_store,
-                        &digest,
-                        &new_directory_path,
-                    )
-                    .await
-                    .err_tip(|| format!("in download_to_directory : {new_directory_path}"))?;
-                    Ok(())
+                        if !newly_ready.is_empty() {
+                            let ready_digests: Vec<DigestInfo> =
+                                newly_ready.iter().map(|f| f.digest).collect();
+                            let entries =
+                                filesystem_store.get_file_entries_batch(&ready_digests).await;
+
+                            for (file, entry) in newly_ready.iter().zip(entries) {
+                                let item: PipelineItem = (
+                                    FileToMaterialize {
+                                        digest: file.digest,
+                                        dest: file.dest.clone(),
+                                        #[cfg(target_family = "unix")]
+                                        unix_mode: file.unix_mode,
+                                        mtime: file.mtime.clone(),
+                                    },
+                                    entry,
+                                );
+                                if tx.send(item).await.is_err() {
+                                    return Ok(producer_start.elapsed());
+                                }
+                                files_sent += 1;
+                            }
+                        }
+
+                        remaining = still_pending;
+                        if !remaining.is_empty() {
+                            // Wait until the fetcher signals new arrivals.
+                            fetched_notify_ref.notified().await;
+                        }
+                    }
                 }
-                .boxed(),
+            }
+
+            let producer_elapsed = producer_start.elapsed();
+            debug!(
+                files_sent,
+                deferred = deferred_count,
+                elapsed_ms = producer_elapsed.as_millis() as u64,
+                "producer: finished sending all files",
             );
-        }
 
-        #[cfg(target_family = "unix")]
-        for symlink_node in directory.symlinks {
-            let dest = format!("{}/{}", current_directory, symlink_node.name);
-            futures.push(
-                async move {
-                    fs::symlink(&symlink_node.target, &dest).await.err_tip(|| {
-                        format!(
-                            "Could not create symlink {} -> {}",
-                            symlink_node.target, dest
+            // Explicitly drop the sender so the consumer's rx.recv()
+            // returns None and the stream ends. join3 keeps all futures
+            // alive until all complete, so without this the consumer
+            // would wait forever.
+            drop(tx);
+
+            Ok(producer_start.elapsed())
+        };
+
+        // --- Consumer future ---
+        // Reads from the channel and hardlinks with bounded concurrency.
+        let hardlink_start = std::time::Instant::now();
+        let slow_hardlinks = std::sync::atomic::AtomicU32::new(0);
+        let max_hardlink_ms = std::sync::atomic::AtomicU64::new(0);
+        let links_completed = std::sync::atomic::AtomicUsize::new(0);
+
+        let consumer_fut = async {
+            let stream = futures::stream::unfold(rx, |mut rx| async {
+                rx.recv().await.map(|item| (Ok::<PipelineItem, Error>(item), rx))
+            });
+
+            stream
+                .try_for_each_concurrent(HARDLINK_CONCURRENCY, |(file, prefetched)| {
+                    let slow_hardlinks = &slow_hardlinks;
+                    let max_hardlink_ms = &max_hardlink_ms;
+                    let links_completed = &links_completed;
+                    async move {
+                        let digest = file.digest;
+                        let dest = file.dest.clone();
+                        let link_start = std::time::Instant::now();
+                        hardlink_and_set_metadata_prefetched(
+                            cas_store, filesystem_store, file, prefetched,
                         )
-                    })?;
-                    Ok(())
-                }
-                .boxed(),
-            );
-        }
+                        .await
+                        .map_err(move |e| {
+                            let mut e = e.append(format!("for digest {digest}"));
+                            if e.code == Code::NotFound {
+                                e.details.push(make_precondition_failure_any(digest));
+                            }
+                            e
+                        })?;
+                        let link_elapsed = link_start.elapsed();
+                        let link_ms = link_elapsed.as_millis() as u64;
+
+                        links_completed.fetch_add(1, Ordering::Relaxed);
+                        max_hardlink_ms.fetch_max(link_ms, Ordering::Relaxed);
+
+                        if link_ms > 50 {
+                            slow_hardlinks.fetch_add(1, Ordering::Relaxed);
+                            warn!(
+                                dest = %dest,
+                                digest = ?digest,
+                                elapsed_ms = link_ms,
+                                "pipeline: slow hardlink (>50ms)",
+                            );
+                        }
+                        Ok(())
+                    }
+                })
+                .await
+        };
+
+        // Run all three concurrently. The fetcher and producer share state
+        // via fetched_set + Notify. The producer and consumer share the
+        // mpsc channel. The consumer drops when the producer's tx drops.
+        let (_, producer_result, consumer_result) =
+            futures::future::join3(fetcher_fut, producer_fut, consumer_fut).await;
+
+        // Check consumer first (it's the critical path).
+        consumer_result?;
+        // Then check producer.
+        let producer_elapsed = producer_result?;
+
+        let hardlink_elapsed = hardlink_start.elapsed();
+        let fetch_elapsed = fetch_start.elapsed();
+        let slow_count = slow_hardlinks.load(Ordering::Relaxed);
+        let max_link_ms = max_hardlink_ms.load(Ordering::Relaxed);
+        let total_linked = links_completed.load(Ordering::Relaxed);
+        let fetcher_elapsed = fetcher_start.elapsed();
+
+        debug!(
+            total_missing = missing_digests.len(),
+            total_missing_bytes = missing_bytes,
+            fetch_elapsed_ms = fetcher_elapsed.as_millis() as u64,
+            throughput_mbps = format!("{:.1}", throughput_mbps(missing_bytes, fetcher_elapsed)),
+            "download_to_directory: fetch phase completed",
+        );
+
+        debug!(
+            total_links = total_linked,
+            elapsed_ms = hardlink_elapsed.as_millis() as u64,
+            slow_links_over_50ms = slow_count,
+            max_link_ms,
+            avg_link_us = if total_linked > 0 {
+                hardlink_elapsed.as_micros() as u64 / total_linked as u64
+            } else { 0 },
+            producer_ms = producer_elapsed.as_millis() as u64,
+            total_elapsed_ms = fetch_elapsed.as_millis() as u64,
+            "download_to_directory: hardlink phase completed",
+        );
+
+        let total_bytes: u64 = unique_digests.iter().map(|d| d.size_bytes()).sum();
+        let total_ms = phase_start.elapsed().as_millis();
+        debug!(
+            tree_resolve_ms,
+            has_check_ms = has_check_ms - tree_resolve_ms,
+            fetch_ms = fetcher_elapsed.as_millis() as u64,
+            hardlink_ms = hardlink_elapsed.as_millis() as u64,
+            total_ms,
+            num_files = unique_digests.len(),
+            total_bytes,
+            throughput_mbps = format!("{:.1}", throughput_mbps(total_bytes, phase_start.elapsed())),
+            "download_to_directory completed",
+        );
 
-        while futures.try_next().await?.is_some() {}
         Ok(())
     }
     .boxed()
@@ -331,13 +1533,13 @@ async fn upload_file(
 ) -> Result<FileInfo, Error> {
     let is_executable = is_executable(&metadata, &full_path);
     let file_size = metadata.len();
-    let file = fs::open_file(&full_path, 0, u64::MAX)
+    let file = fs::open_file(&full_path, 0)
         .await
         .err_tip(|| format!("Could not open file {full_path:?}"))?;
 
     let (digest, mut file) = hasher
         .hasher()
-        .digest_for_file(&full_path, file.into_inner(), Some(file_size))
+        .digest_for_file(&full_path, file, Some(file_size))
         .await
         .err_tip(|| format!("Failed to hash file in digest_for_file failed for {full_path:?}"))?;
 
@@ -355,7 +1557,7 @@ async fn upload_file(
             // Only upload if the digest doesn't already exist, this should be
             // a much cheaper operation than an upload.
             let cas_store = cas_store.as_store_driver_pin();
-            let store_key: nativelink_util::store_trait::StoreKey<'_> = digest.into();
+            let store_key: StoreKey<'_> = digest.into();
             let has_start = std::time::Instant::now();
             if cas_store
                 .has(store_key.borrow())
@@ -376,7 +1578,8 @@ async fn upload_file(
                 "upload_file: digest not in CAS, starting upload",
             );
 
-            file.rewind().await.err_tip(|| "Could not rewind file")?;
+            std::io::Seek::seek(file.as_std_mut(), std::io::SeekFrom::Start(0))
+                .err_tip(|| "Could not rewind file")?;
 
             // Note: For unknown reasons we appear to be hitting:
             // https://github.com/rust-lang/rust/issues/92096
@@ -393,12 +1596,28 @@ async fn upload_file(
                 )
                 .await
                 .map(|_slot| ());
-            trace!(
-                ?digest,
-                upload_elapsed_ms = file_upload_start.elapsed().as_millis(),
-                success = upload_result.is_ok(),
-                "upload_file: update_with_whole_file completed",
-            );
+            let upload_elapsed = file_upload_start.elapsed();
+
+            match &upload_result {
+                Ok(()) => {
+                    debug!(
+                        ?digest,
+                        size_bytes = digest.size_bytes(),
+                        elapsed_ms = upload_elapsed.as_millis() as u64,
+                        throughput_mbps = format!("{:.1}", throughput_mbps(digest.size_bytes(), upload_elapsed)),
+                        "upload_file: CAS write completed",
+                    );
+                }
+                Err(e) => {
+                    error!(
+                        ?digest,
+                        size_bytes = digest.size_bytes(),
+                        elapsed_ms = upload_elapsed.as_millis() as u64,
+                        ?e,
+                        "upload_file: CAS write failed",
+                    );
+                }
+            }
 
             match upload_result {
                 Ok(()) => Ok(()),
@@ -456,13 +1675,18 @@ async fn upload_symlink(
     // Detect if our symlink is inside our work directory, if it is find the
     // relative path otherwise use the absolute path.
     let target = if full_target_path.starts_with(full_work_directory_path.as_ref()) {
-        let full_target_path = RelativePath::from_path(&full_target_path)
-            .map_err(|v| make_err!(Code::Internal, "Could not convert {} to RelativePath", v))?;
-        RelativePath::from_path(full_work_directory_path.as_ref())
-            .map_err(|v| make_err!(Code::Internal, "Could not convert {} to RelativePath", v))?
-            .relative(full_target_path)
-            .normalize()
-            .into_string()
+        full_target_path
+            .strip_prefix(full_work_directory_path.as_ref())
+            .map_err(|e| make_err!(Code::Internal, "Could not strip work dir prefix: {}", e))?
+            .to_str()
+            .err_tip(|| {
+                make_err!(
+                    Code::Internal,
+                    "Could not convert '{:?}' to string",
+                    full_target_path
+                )
+            })?
+            .to_string()
     } else {
         full_target_path
             .to_str()
@@ -627,7 +1851,7 @@ async fn process_side_channel_file(
     let mut json_contents = String::new();
     {
         // Note: Scoping `file_slot` allows the file_slot semaphore to be released faster.
-        let mut file_slot = match fs::open_file(side_channel_file, 0, u64::MAX).await {
+        let mut file_slot = match fs::open_file(side_channel_file, 0).await {
             Ok(file_slot) => file_slot,
             Err(e) => {
                 if e.code != Code::NotFound {
@@ -637,9 +1861,7 @@ async fn process_side_channel_file(
                 return Ok(None);
             }
         };
-        file_slot
-            .read_to_string(&mut json_contents)
-            .await
+        std::io::Read::read_to_string(file_slot.as_std_mut(), &mut json_contents)
             .err_tip(|| "Error reading side channel file")?;
     }
 
@@ -675,9 +1897,17 @@ async fn do_cleanup(
 
     debug!("Worker cleaning up");
     // Note: We need to be careful to keep trying to cleanup even if one of the steps fails.
-    let remove_dir_result = fs::remove_dir_all(action_directory)
-        .await
-        .err_tip(|| format!("Could not remove working directory {action_directory}"));
+    let remove_dir_result = match fs::remove_dir_all(action_directory).await {
+        Ok(()) => Ok(()),
+        Err(_) => {
+            // On macOS, Spotlight/Finder can momentarily recreate files
+            // (e.g. .DS_Store) during deletion, causing ENOTEMPTY. A
+            // short delay and single retry is sufficient.
+            tokio::time::sleep(Duration::from_millis(100)).await;
+            fs::remove_dir_all(action_directory).await
+        }
+    }
+    .err_tip(|| format!("Could not remove working directory {action_directory}"));
 
     if let Err(err) = running_actions_manager.cleanup_action(operation_id) {
         error!(%operation_id, ?err, "Error cleaning up action");
@@ -846,25 +2076,161 @@ impl RunningActionImpl {
         };
         {
             // Create all directories needed for our output paths. This is required by the bazel spec.
+            let work_dir_for_output = self.work_directory.clone();
+            // Mutex serializes the slow-path symlink replacement to avoid
+            // concurrent tasks racing on the same symlink (EEXIST / ENOENT).
+            let symlink_fix_lock = Arc::new(tokio::sync::Mutex::new(()));
             let prepare_output_directories = |output_file| {
+                let work_dir = work_dir_for_output.clone();
+                let lock = symlink_fix_lock.clone();
                 let full_output_path = if command.working_directory.is_empty() {
-                    format!("{}/{}", self.work_directory, output_file)
+                    format!("{}/{}", work_dir, output_file)
                 } else {
                     format!(
                         "{}/{}/{}",
-                        self.work_directory, command.working_directory, output_file
+                        work_dir, command.working_directory, output_file
                     )
                 };
                 async move {
                     let full_parent_path = Path::new(&full_output_path)
                         .parent()
                         .err_tip(|| format!("Parent path for {full_output_path} has no parent"))?;
-                    fs::create_dir_all(full_parent_path).await.err_tip(|| {
-                        format!(
-                            "Error creating output directory {} (file)",
+
+                    // Fast path: create_dir_all and verify the directory is writable.
+                    // create_dir_all succeeds even if the directory is read-only
+                    // (it already exists), but rustc needs write access for outputs.
+                    if fs::create_dir_all(full_parent_path).await.is_ok() {
+                        let mut dir_writable = true;
+                        #[cfg(target_family = "unix")]
+                        if let Ok(m) = fs::metadata(full_parent_path).await {
+                            dir_writable = m.mode() & 0o200 != 0;
+                        }
+                        if dir_writable {
+                            return Result::<(), Error>::Ok(());
+                        }
+                        // Directory exists but is not writable (likely through
+                        // a symlink to the read-only cache). Fall through to fix.
+                    }
+
+                    // Slow path: serialize to avoid concurrent symlink replacement races.
+                    let _guard = lock.lock().await;
+
+                    // Re-check under lock — another task may have already fixed it.
+                    if fs::create_dir_all(full_parent_path).await.is_ok() {
+                        let mut dir_writable = true;
+                        #[cfg(target_family = "unix")]
+                        if let Ok(m) = fs::metadata(full_parent_path).await {
+                            dir_writable = m.mode() & 0o200 != 0;
+                        }
+                        if dir_writable {
+                            return Result::<(), Error>::Ok(());
+                        }
+                    }
+
+                    // Walk the path and replace blocking symlinks with writable
+                    // shallow-copy directories that preserve access to all
+                    // original entries via absolute symlinks.
+                    let work_root = Path::new(&work_dir);
+                    let relative = full_parent_path.strip_prefix(work_root)
+                        .map_err(|_| make_err!(
+                            Code::Internal,
+                            "Output path {} not under work dir {}",
+                            full_parent_path.display(),
+                            work_root.display()
+                        ))?;
+
+                    let mut current = work_root.to_path_buf();
+                    for component in relative.components() {
+                        let component_name = component.as_os_str();
+                        let next = current.join(component_name);
+
+                        match fs::symlink_metadata(&next).await {
+                            Ok(meta) => {
+                                #[cfg(target_family = "unix")]
+                                if meta.is_symlink() {
+                                    // Check if resolved target is a read-only directory
+                                    let needs_replace = match fs::canonicalize(&next).await {
+                                        Ok(resolved) => {
+                                            match fs::metadata(&resolved).await {
+                                                Ok(m) => m.is_dir() && (m.mode() & 0o200 == 0),
+                                                Err(_) => false,
+                                            }
+                                        }
+                                        Err(_) => false,
+                                    };
+
+                                    if needs_replace {
+                                        let resolved = fs::canonicalize(&next).await
+                                            .err_tip(|| format!("Failed to resolve: {}", next.display()))?;
+
+                                        // Replace symlink with a writable shallow-copy directory.
+                                        // Each entry in the original target gets an absolute symlink,
+                                        // except for self-referential entries (e.g., bazel-out -> .).
+                                        fs::remove_file(&next).await
+                                            .err_tip(|| format!("Failed to remove symlink: {}", next.display()))?;
+                                        fs::create_dir(&next).await
+                                            .err_tip(|| format!("Failed to create dir: {}", next.display()))?;
+
+                                        let rd = fs::read_dir(&resolved).await
+                                            .err_tip(|| format!("Failed to read dir: {}", resolved.display()))?;
+                                        let (_permit, mut inner_rd) = rd.into_inner();
+                                        while let Some(entry) = inner_rd.next_entry().await
+                                            .err_tip(|| format!("Failed to iterate: {}", resolved.display()))?
+                                        {
+                                            let entry_name = entry.file_name();
+                                            // Skip self-referential entries (bazel-out -> . creates
+                                            // an entry pointing back to the replaced dir itself).
+                                            if entry_name == component_name {
+                                                continue;
+                                            }
+                                            let abs_target = resolved.join(&entry_name);
+                                            let link = next.join(&entry_name);
+                                            if let Err(e) = fs::symlink(&abs_target, &link).await {
+                                                warn!(
+                                                    link = %link.display(),
+                                                    target = %abs_target.display(),
+                                                    ?e,
+                                                    "prepare_output_dirs: failed to create shallow-copy symlink",
+                                                );
+                                            }
+                                        }
+
+                                        // Retry — the fix at this level may be sufficient.
+                                        if fs::create_dir_all(full_parent_path).await.is_ok() {
+                                            return Ok(());
+                                        }
+                                    }
+                                }
+
+                                #[cfg(target_family = "unix")]
+                                if meta.is_dir() && (meta.mode() & 0o200 == 0) {
+                                    // Read-only directory in the work tree (not through symlink).
+                                    // Safe to make writable since work dirs are independent copies.
+                                    let mut perms = meta.permissions();
+                                    perms.set_mode(meta.mode() | 0o200);
+                                    drop(fs::set_permissions(&next, perms).await);
+                                }
+                            }
+                            Err(_) => {
+                                // Path doesn't exist — create remaining dirs.
+                                fs::create_dir_all(full_parent_path).await
+                                    .err_tip(|| format!(
+                                        "Error creating output directory {}",
+                                        full_parent_path.display()
+                                    ))?;
+                                return Ok(());
+                            }
+                        }
+
+                        current = next;
+                    }
+
+                    // Final attempt after all fixes applied.
+                    fs::create_dir_all(full_parent_path).await
+                        .err_tip(|| format!(
+                            "Error creating output directory {} (after symlink fixes)",
                             full_parent_path.display()
-                        )
-                    })?;
+                        ))?;
                     Result::<(), Error>::Ok(())
                 }
             };
@@ -928,7 +2294,70 @@ impl RunningActionImpl {
         //                    figure out toolchain misconfiguration issues.
         //                    De-bloat the `debug` level by using the `trace`
         //                    level more effectively and adjust this.
-        info!(?args, "Executing command",);
+        debug!(?args, "Executing command",);
+
+        // Diagnostic: log permissions of .sh files in the work directory tree
+        // to debug EACCES errors on remote workers.
+        #[cfg(target_family = "unix")]
+        {
+            use std::os::unix::fs::{MetadataExt, PermissionsExt};
+            let work_dir = format!(
+                "{}/{}",
+                self.work_directory, command_proto.working_directory
+            );
+            let mut check_dirs = vec![work_dir.clone()];
+            let mut sh_count = 0u32;
+            let mut bad_count = 0u32;
+            while let Some(dir) = check_dirs.pop() {
+                if let Ok(mut entries) = tokio::fs::read_dir(&dir).await {
+                    while let Ok(Some(entry)) = entries.next_entry().await {
+                        let path = entry.path();
+                        if let Ok(meta) = tokio::fs::symlink_metadata(&path).await {
+                            if meta.is_dir() {
+                                check_dirs.push(path.to_string_lossy().to_string());
+                            } else if path.extension().is_some_and(|e| e == "sh") {
+                                sh_count += 1;
+                                let mode = meta.permissions().mode();
+                                let nlink = meta.nlink();
+                                let is_symlink = meta.file_type().is_symlink();
+                                if mode & 0o111 == 0 {
+                                    bad_count += 1;
+                                    event!(
+                                        target: "nativelink::diag",
+                                        Level::WARN,
+                                        path = %path.display(),
+                                        mode = format!("{mode:04o}"),
+                                        nlink,
+                                        is_symlink,
+                                        "NON-EXEC .sh file in work dir"
+                                    );
+                                } else {
+                                    event!(
+                                        target: "nativelink::diag",
+                                        Level::INFO,
+                                        path = %path.display(),
+                                        mode = format!("{mode:04o}"),
+                                        nlink,
+                                        is_symlink,
+                                        "OK .sh file in work dir"
+                                    );
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            if sh_count > 0 {
+                event!(
+                    target: "nativelink::diag",
+                    Level::INFO,
+                    sh_count,
+                    bad_count,
+                    "sh file permission scan complete"
+                );
+            }
+        }
+
         let mut command_builder = process::Command::new(args[0]);
         command_builder
             .args(&args[1..])
@@ -1086,7 +2515,7 @@ impl RunningActionImpl {
                     {
                         let joined_command = args.join(OsStr::new(" "));
                         let command = joined_command.to_string_lossy();
-                        info!(
+                        debug!(
                             seconds = self.action_info.timeout.as_secs_f32(),
                             %command,
                             "Command timed out"
@@ -1131,7 +2560,7 @@ impl RunningActionImpl {
                         exit_code
                     });
 
-                    info!(?args, "Command complete");
+                    debug!(?args, "Command complete");
 
                     let maybe_error_override = if let Some(side_channel_file) = maybe_side_channel_file {
                         process_side_channel_file(side_channel_file.clone(), &args, requested_timeout).await
@@ -1208,7 +2637,10 @@ impl RunningActionImpl {
                 state.execution_metadata.clone(),
             )
         };
-        let cas_store = self.running_actions_manager.cas_store.as_ref();
+        // Upload outputs to the fast store (local FilesystemStore) only.
+        // The slow store (remote CAS) upload is deferred to the background
+        // after the execution result is reported, reducing latency.
+        let cas_store = self.running_actions_manager.cas_store.fast_store();
         let hasher = self.action_info.unique_qualifier.digest_function();
 
         let mut output_path_futures = FuturesUnordered::new();
@@ -1359,10 +2791,12 @@ impl RunningActionImpl {
                 .update_oneshot(digest, data)
                 .await
                 .err_tip(|| "Uploading stdout")?;
+            let elapsed = start.elapsed();
             debug!(
                 ?digest,
-                data_len,
-                elapsed_ms = start.elapsed().as_millis(),
+                size_bytes = data_len,
+                elapsed_ms = elapsed.as_millis() as u64,
+                throughput_mbps = format!("{:.1}", throughput_mbps(data_len as u64, elapsed)),
                 "upload_results: stdout upload completed",
             );
             Result::<DigestInfo, Error>::Ok(digest)
@@ -1376,10 +2810,12 @@ impl RunningActionImpl {
                 .update_oneshot(digest, data)
                 .await
                 .err_tip(|| "Uploading  stderr")?;
+            let elapsed = start.elapsed();
             debug!(
                 ?digest,
-                data_len,
-                elapsed_ms = start.elapsed().as_millis(),
+                size_bytes = data_len,
+                elapsed_ms = elapsed.as_millis() as u64,
+                throughput_mbps = format!("{:.1}", throughput_mbps(data_len as u64, elapsed)),
                 "upload_results: stderr upload completed",
             );
             Result::<DigestInfo, Error>::Ok(digest)
@@ -1431,6 +2867,25 @@ impl RunningActionImpl {
             let mut state = self.state.lock();
             execution_metadata.worker_completed_timestamp =
                 (self.running_actions_manager.callbacks.now_fn)();
+
+            // Log phase durations for every action so we can diagnose latency.
+            let duration_ms = |start: SystemTime, end: SystemTime| -> i64 {
+                end.duration_since(start)
+                    .map(|d| d.as_millis() as i64)
+                    .unwrap_or_else(|e| -(e.duration().as_millis() as i64))
+            };
+            let em = &execution_metadata;
+            debug!(
+                operation_id = ?self.operation_id,
+                queue_ms = duration_ms(em.queued_timestamp, em.worker_start_timestamp),
+                input_fetch_ms = duration_ms(em.input_fetch_start_timestamp, em.input_fetch_completed_timestamp),
+                execution_ms = duration_ms(em.execution_start_timestamp, em.execution_completed_timestamp),
+                output_upload_ms = duration_ms(em.output_upload_start_timestamp, em.output_upload_completed_timestamp),
+                worker_overhead_ms = duration_ms(em.worker_start_timestamp, em.input_fetch_start_timestamp),
+                total_worker_ms = duration_ms(em.worker_start_timestamp, em.worker_completed_timestamp),
+                "Action phase timing",
+            );
+
             state.action_result = Some(ActionResult {
                 output_files,
                 output_folders,
@@ -1532,7 +2987,7 @@ impl RunningAction for RunningActionImpl {
     async fn upload_results(self: Arc<Self>) -> Result<Arc<Self>, Error> {
         let upload_timeout = self.running_actions_manager.max_upload_timeout;
         let operation_id = self.operation_id.clone();
-        info!(
+        debug!(
             ?operation_id,
             upload_timeout_s = upload_timeout.as_secs(),
             "upload_results: starting with timeout",
@@ -1542,11 +2997,13 @@ impl RunningAction for RunningActionImpl {
             .upload_results
             .wrap(Self::inner_upload_results(self));
 
+        let stall_warned = AtomicBool::new(false);
         let stall_warn_fut = async {
             let mut elapsed_secs = 0u64;
             loop {
                 tokio::time::sleep(Duration::from_secs(60)).await;
                 elapsed_secs += 60;
+                stall_warned.store(true, Ordering::Relaxed);
                 warn!(
                     ?operation_id,
                     elapsed_s = elapsed_secs,
@@ -1556,6 +3013,7 @@ impl RunningAction for RunningActionImpl {
             }
         };
 
+        let upload_start = Instant::now();
         let res = tokio::time::timeout(upload_timeout, async {
             tokio::pin!(upload_fut);
             tokio::pin!(stall_warn_fut);
@@ -1573,8 +3031,18 @@ impl RunningAction for RunningActionImpl {
                 operation_id,
             )
         })?;
-        if let Err(ref e) = res {
-            warn!(?operation_id, ?e, "Error during upload_results");
+        match &res {
+            Ok(_) if stall_warned.load(Ordering::Relaxed) => {
+                debug!(
+                    ?operation_id,
+                    elapsed_s = upload_start.elapsed().as_secs(),
+                    "upload_results: completed after stall",
+                );
+            }
+            Err(e) => {
+                warn!(?operation_id, ?e, "Error during upload_results");
+            }
+            _ => {}
         }
         res
     }
@@ -1638,7 +3106,25 @@ pub trait RunningActionsManager: Sync + Send + Sized + Unpin + 'static {
         operation_id: &OperationId,
     ) -> impl Future<Output = Result<(), Error>> + Send;
 
+    /// Spawn a background task to upload action output blobs from the local
+    /// fast store to the remote slow store. No-op by default.
+    fn spawn_upload_to_remote(self: &Arc<Self>, _action_result: &ActionResult) {}
+
     fn metrics(&self) -> &Arc<Metrics>;
+
+    /// Returns the digests of input root directories cached in the worker's
+    /// directory cache. Returns an empty Vec if no directory cache is configured.
+    fn cached_directory_digests(&self) -> impl Future<Output = Vec<DigestInfo>> + Send;
+
+    /// Returns ALL subtree digests across all cached directory entries.
+    /// Used for the initial full snapshot on (re)connect.
+    fn all_subtree_digests(&self) -> impl Future<Output = Vec<DigestInfo>> + Send;
+
+    /// Atomically takes the pending subtree digest changes since the last call.
+    /// Returns (added, removed) digest lists and clears the internal state.
+    fn take_pending_subtree_changes(
+        &self,
+    ) -> impl Future<Output = (Vec<DigestInfo>, Vec<DigestInfo>)> + Send;
 }
 
 /// A function to get the current system time, used to allow mocking for tests
@@ -1803,11 +3289,22 @@ impl UploadActionResults {
                 results_cache_policy: None,
                 digest_function: hasher.proto_digest_func().into(),
             };
-            return grpc_store
+            let size_bytes = update_action_request.encoded_len() as u64;
+            let start = std::time::Instant::now();
+            grpc_store
                 .update_action_result(Request::new(update_action_request))
                 .await
                 .map(|_| ())
-                .err_tip(|| "Caching ActionResult");
+                .err_tip(|| "Caching ActionResult")?;
+            let elapsed = start.elapsed();
+            debug!(
+                ?action_digest,
+                size_bytes,
+                elapsed_ms = elapsed.as_millis() as u64,
+                throughput_mbps = format!("{:.1}", throughput_mbps(size_bytes, elapsed)),
+                "AC write completed (grpc)",
+            );
+            return Ok(());
         }
 
         let mut store_data = BytesMut::with_capacity(ESTIMATED_DIGEST_SIZE);
@@ -1815,10 +3312,21 @@ impl UploadActionResults {
             .encode(&mut store_data)
             .err_tip(|| "Encoding ActionResult for caching")?;
 
+        let size_bytes = store_data.len() as u64;
+        let start = std::time::Instant::now();
         ac_store
             .update_oneshot(action_digest, store_data.split().freeze())
             .await
-            .err_tip(|| "Caching ActionResult")
+            .err_tip(|| "Caching ActionResult")?;
+        let elapsed = start.elapsed();
+        debug!(
+            ?action_digest,
+            size_bytes,
+            elapsed_ms = elapsed.as_millis() as u64,
+            throughput_mbps = format!("{:.1}", throughput_mbps(size_bytes, elapsed)),
+            "AC write completed",
+        );
+        Ok(())
     }
 
     async fn upload_historical_results_with_message(
@@ -1863,7 +3371,7 @@ impl UploadActionResults {
             return Ok(());
         }
 
-        let mut execute_response = to_execute_response(action_result.clone());
+        let execute_response = to_execute_response(action_result.clone());
 
         // In theory exit code should always be != 0 if there's an error, but for safety we
         // catch both.
@@ -1873,51 +3381,66 @@ impl UploadActionResults {
             self.failure_message_template.clone()
         };
 
-        let upload_historical_results_with_message_result = if should_upload_historical_results {
-            let maybe_message = self
-                .upload_historical_results_with_message(
-                    action_info,
-                    execute_response.clone(),
+        // Extract AC result proto before concurrent uploads (independent of message).
+        let ac_result_proto = if should_upload_ac_results {
+            Some(
+                execute_response
+                    .result
+                    .clone()
+                    .err_tip(|| "No result set in cache_action_result")?,
+            )
+        } else {
+            None
+        };
+
+        // Run historical + AC uploads concurrently — they are independent.
+        let historical_fut = async {
+            if should_upload_historical_results {
+                match self
+                    .upload_historical_results_with_message(
+                        action_info,
+                        execute_response,
+                        message_template,
+                        hasher,
+                    )
+                    .await
+                {
+                    Ok(message) => Ok(Some(message)),
+                    Err(e) => Err(e),
+                }
+            } else {
+                match Self::format_execute_response_message(
                     message_template,
+                    action_info,
+                    None,
                     hasher,
-                )
-                .await;
-            match maybe_message {
-                Ok(message) => {
-                    action_result.message.clone_from(&message);
-                    execute_response.message = message;
-                    Ok(())
-                }
-                Err(e) => Result::<(), Error>::Err(e),
-            }
-        } else {
-            match Self::format_execute_response_message(message_template, action_info, None, hasher)
-            {
-                Ok(message) => {
-                    action_result.message.clone_from(&message);
-                    execute_response.message = message;
-                    Ok(())
+                ) {
+                    Ok(message) => Ok(Some(message)),
+                    Err(e) => {
+                        Err(e).err_tip(|| "Could not format message in cache_action_result")
+                    }
                 }
-                Err(e) => Err(e).err_tip(|| "Could not format message in cache_action_result"),
             }
         };
 
-        // Note: Done in this order because we assume most results will succeed and most configs will
-        // either always upload upload historical results or only upload on filure. In which case
-        // we can avoid an extra clone of the protos by doing this last with the above assumption.
-        let ac_upload_results = if should_upload_ac_results {
-            self.upload_ac_results(
-                action_info,
-                execute_response
-                    .result
-                    .err_tip(|| "No result set in cache_action_result")?,
-                hasher,
-            )
-            .await
-        } else {
-            Ok(())
+        let ac_fut = async {
+            if let Some(proto) = ac_result_proto {
+                self.upload_ac_results(action_info, proto, hasher).await
+            } else {
+                Ok(())
+            }
         };
-        upload_historical_results_with_message_result.merge(ac_upload_results)
+
+        let (historical_result, ac_result) = futures::future::join(historical_fut, ac_fut).await;
+
+        // Apply message from historical upload.
+        if let Ok(Some(message)) = &historical_result {
+            action_result.message.clone_from(message);
+        }
+
+        historical_result
+            .map(|_| ())
+            .merge(ac_result)
     }
 }
 
@@ -1933,6 +3456,10 @@ pub struct RunningActionsManagerArgs<'a> {
     pub max_upload_timeout: Duration,
     pub timeout_handled_externally: bool,
     pub directory_cache: Option<Arc<crate::directory_cache::DirectoryCache>>,
+    /// Worker-local locality map for registering peer hints from StartExecute.
+    /// When present, peer_hints from the scheduler are registered here so that
+    /// WorkerProxyStore can fetch blobs from peer workers.
+    pub peer_locality_map: Option<nativelink_util::blob_locality_map::SharedBlobLocalityMap>,
 }
 
 struct CleanupGuard {
@@ -1980,6 +3507,8 @@ pub struct RunningActionsManagerImpl {
     /// Optional directory cache for improving performance by caching reconstructed
     /// input directories and using hardlinks.
     directory_cache: Option<Arc<crate::directory_cache::DirectoryCache>>,
+    /// Worker-local locality map for registering peer hints from StartExecute.
+    peer_locality_map: Option<nativelink_util::blob_locality_map::SharedBlobLocalityMap>,
 }
 
 impl RunningActionsManagerImpl {
@@ -2024,6 +3553,7 @@ impl RunningActionsManagerImpl {
             cleaning_up_operations: Mutex::new(HashSet::new()),
             cleanup_complete_notify: Arc::new(Notify::new()),
             directory_cache: args.directory_cache,
+            peer_locality_map: args.peer_locality_map,
         })
     }
 
@@ -2037,6 +3567,243 @@ impl RunningActionsManagerImpl {
         )
     }
 
+    /// Spawn a background task that uploads all action output blobs from the
+    /// fast store (local FilesystemStore) to the slow store (remote CAS).
+    /// This is called after the execution result has been reported to the
+    /// scheduler, so it does not block action completion latency.
+    ///
+    /// To prevent a race condition where the EvictingMap evicts small blobs
+    /// before the background task can read them, we pre-read all small blobs
+    /// (<=1 MiB) from the fast store *before* spawning the background task.
+    /// The pre-read data is passed into the spawned task via a HashMap, so
+    /// the background upload never needs to re-read small blobs from the
+    /// store. Large blobs are streamed directly from the store as before
+    /// (they are much less likely to be evicted quickly due to their size).
+    pub fn spawn_upload_to_remote(self: &Arc<Self>, action_result: &ActionResult) {
+        let slow_store = self.cas_store.slow_store();
+        if slow_store
+            .inner_store(None::<StoreKey<'_>>)
+            .optimized_for(StoreOptimizations::NoopUpdates)
+        {
+            return;
+        }
+        // Respect slow_direction config — when set to Get or ReadOnly,
+        // the slow store should not receive writes (same check as
+        // FastSlowStore::update).
+        let dir = self.cas_store.slow_direction();
+        if dir == StoreDirection::Get || dir == StoreDirection::ReadOnly {
+            return;
+        }
+
+        let mut digests = Vec::new();
+        let mut tree_digests = Vec::new();
+        for file in &action_result.output_files {
+            if file.digest.size_bytes() > 0 {
+                digests.push(file.digest);
+            }
+        }
+        for folder in &action_result.output_folders {
+            if folder.tree_digest.size_bytes() > 0 {
+                digests.push(folder.tree_digest);
+                tree_digests.push(folder.tree_digest);
+            }
+        }
+        if action_result.stdout_digest.size_bytes() > 0 {
+            digests.push(action_result.stdout_digest);
+        }
+        if action_result.stderr_digest.size_bytes() > 0 {
+            digests.push(action_result.stderr_digest);
+        }
+        if digests.is_empty() {
+            return;
+        }
+
+        let cas_store = self.cas_store.clone();
+        tokio::spawn(async move {
+            let fast_store = cas_store.fast_store();
+            let slow_store = cas_store.slow_store();
+            let start = std::time::Instant::now();
+
+            // Small blobs use update_oneshot which routes through
+            // BatchUpdateBlobs for efficient coalescing. Large blobs
+            // stream through a channel to avoid loading into memory.
+            const BATCH_THRESHOLD: u64 = 1024 * 1024; // 1 MiB
+
+            // Phase 1: Pre-read all known small blobs into memory to
+            // prevent the eviction race condition. The EvictingMap can
+            // evict tiny blobs (e.g. 4-byte tree blobs, stdout, stderr)
+            // before the background task gets a chance to read them.
+            // By reading them eagerly at the start of the spawned task
+            // (which runs immediately), we capture the data before any
+            // subsequent action's uploads can trigger eviction.
+            let mut preread_data: HashMap<DigestInfo, Bytes> =
+                HashMap::with_capacity(digests.len());
+
+            // Pre-read initial small digests (stdout, stderr, tree blobs,
+            // small output files).
+            let preread_futures: FuturesUnordered<_> = digests
+                .iter()
+                .filter(|d| d.size_bytes() <= BATCH_THRESHOLD)
+                .copied()
+                .map(|digest| async move {
+                    let result = fast_store.get_part_unchunked(digest, 0, None).await;
+                    (digest, result)
+                })
+                .collect();
+            let preread_results: Vec<_> = preread_futures.collect().await;
+            for (digest, result) in preread_results {
+                match result {
+                    Ok(data) => {
+                        preread_data.insert(digest, data);
+                    }
+                    Err(e) => {
+                        warn!(
+                            ?digest,
+                            ?e,
+                            "upload_to_remote: failed to pre-read small blob from fast store",
+                        );
+                    }
+                }
+            }
+
+            // Extract file digests from output directory trees. Use
+            // pre-read data if available (avoids re-reading from store).
+            for tree_digest in &tree_digests {
+                let tree_result = if let Some(data) = preread_data.get(tree_digest) {
+                    ProtoTree::decode(data.clone())
+                        .map_err(|e| make_err!(Code::Internal, "Failed to decode Tree proto: {e}"))
+                } else {
+                    get_and_decode_digest::<ProtoTree>(fast_store, (*tree_digest).into()).await
+                };
+                match tree_result {
+                    Ok(tree) => {
+                        let file_digests: Vec<DigestInfo> = tree
+                            .children
+                            .into_iter()
+                            .chain(tree.root)
+                            .flat_map(|dir| dir.files)
+                            .filter_map(|f| f.digest.and_then(|d| DigestInfo::try_from(d).ok()))
+                            .filter(|d| d.size_bytes() > 0)
+                            .collect();
+                        debug!(
+                            ?tree_digest,
+                            file_count = file_digests.len(),
+                            "upload_to_remote: extracted file digests from output directory tree",
+                        );
+                        // Pre-read any newly-discovered small file digests.
+                        let new_preread_futures: FuturesUnordered<_> = file_digests
+                            .iter()
+                            .filter(|d| {
+                                d.size_bytes() <= BATCH_THRESHOLD
+                                    && !preread_data.contains_key(d)
+                            })
+                            .copied()
+                            .map(|digest| async move {
+                                let result =
+                                    fast_store.get_part_unchunked(digest, 0, None).await;
+                                (digest, result)
+                            })
+                            .collect();
+                        let new_results: Vec<_> = new_preread_futures.collect().await;
+                        for (digest, result) in new_results {
+                            match result {
+                                Ok(data) => {
+                                    preread_data.insert(digest, data);
+                                }
+                                Err(e) => {
+                                    warn!(
+                                        ?digest,
+                                        ?e,
+                                        "upload_to_remote: failed to pre-read tree file blob",
+                                    );
+                                }
+                            }
+                        }
+                        digests.extend(file_digests);
+                    }
+                    Err(e) => {
+                        warn!(
+                            ?tree_digest,
+                            ?e,
+                            "upload_to_remote: failed to decode tree for file digest extraction",
+                        );
+                    }
+                }
+            }
+
+            let total = digests.len();
+            let preread_count = preread_data.len();
+            debug!(
+                total_digests = total,
+                preread_count,
+                tree_count = tree_digests.len(),
+                "upload_to_remote: starting background CAS upload",
+            );
+
+            // Phase 2: Upload all digests to the slow store. Small blobs
+            // use pre-read data; large blobs stream from the fast store.
+            let mut success_count = 0u64;
+            let mut fail_count = 0u64;
+            let mut uploads = FuturesUnordered::new();
+            for digest in digests {
+                // Use pre-read data for small blobs that were captured
+                // eagerly. This avoids the eviction race where EvictingMap
+                // removes the blob before we can read it.
+                let cached_data = preread_data.remove(&digest);
+                uploads.push(async move {
+                    let result = if let Some(data) = cached_data {
+                        // Data was pre-read -- upload directly without
+                        // touching the fast store.
+                        slow_store.update_oneshot(digest, data).await
+                    } else if digest.size_bytes() <= BATCH_THRESHOLD {
+                        // Small blob that wasn't pre-read (e.g. pre-read
+                        // failed). Try reading from the store as fallback.
+                        match fast_store.get_part_unchunked(digest, 0, None).await {
+                            Ok(data) => slow_store.update_oneshot(digest, data).await,
+                            Err(e) => Err(e),
+                        }
+                    } else {
+                        let (tx, rx) = make_buf_channel_pair();
+                        let read_fut = fast_store.get(digest, tx);
+                        let write_fut = slow_store.update(
+                            digest,
+                            rx,
+                            UploadSizeInfo::ExactSize(digest.size_bytes()),
+                        );
+                        let (read_res, write_res) = tokio::join!(read_fut, write_fut);
+                        read_res.merge(write_res)
+                    };
+                    match result {
+                        Ok(()) => true,
+                        Err(e) => {
+                            warn!(
+                                ?digest,
+                                ?e,
+                                "upload_to_remote: failed to upload digest",
+                            );
+                            false
+                        }
+                    }
+                });
+            }
+            while let Some(ok) = uploads.next().await {
+                if ok {
+                    success_count += 1;
+                } else {
+                    fail_count += 1;
+                }
+            }
+
+            debug!(
+                total_digests = total,
+                success_count,
+                fail_count,
+                elapsed_ms = start.elapsed().as_millis() as u64,
+                "upload_to_remote: background CAS upload completed",
+            );
+        });
+    }
+
     /// Fixes a race condition that occurs when an action fails to execute on a worker, and the same worker
     /// attempts to re-execute the same action before the physical cleanup (file is removed) completes.
     /// See this issue for additional details: <https://github.com/TraceMachina/nativelink/issues/1859>
@@ -2232,6 +3999,30 @@ impl RunningActionsManager for RunningActionsManagerImpl {
         self.metrics
             .create_and_add_action
             .wrap(async move {
+                // Extract peer hints BEFORE consuming start_execute.
+                let peer_hints = start_execute.peer_hints.clone();
+                if !peer_hints.is_empty() {
+                    if let Some(ref locality_map) = self.peer_locality_map {
+                        let mut map = locality_map.write();
+                        let mut total_registered = 0usize;
+                        for hint in &peer_hints {
+                            if let Some(ref digest_proto) = hint.digest {
+                                if let Ok(digest) = DigestInfo::try_from(digest_proto) {
+                                    for endpoint in &hint.peer_endpoints {
+                                        map.register_blobs(endpoint, &[digest]);
+                                        total_registered += 1;
+                                    }
+                                }
+                            }
+                        }
+                        debug!(
+                            hints = peer_hints.len(),
+                            registrations = total_registered,
+                            "Registered peer hints from scheduler into worker locality map"
+                        );
+                    }
+                }
+
                 let queued_timestamp = start_execute
                     .queued_timestamp
                     .and_then(|time| time.try_into().ok())
@@ -2356,10 +4147,35 @@ impl RunningActionsManager for RunningActionsManagerImpl {
         );
     }
 
+    fn spawn_upload_to_remote(self: &Arc<Self>, action_result: &ActionResult) {
+        RunningActionsManagerImpl::spawn_upload_to_remote(self, action_result);
+    }
+
     #[inline]
     fn metrics(&self) -> &Arc<Metrics> {
         &self.metrics
     }
+
+    async fn cached_directory_digests(&self) -> Vec<DigestInfo> {
+        match &self.directory_cache {
+            Some(cache) => cache.cached_digests().await,
+            None => Vec::new(),
+        }
+    }
+
+    async fn all_subtree_digests(&self) -> Vec<DigestInfo> {
+        match &self.directory_cache {
+            Some(cache) => cache.all_subtree_digests().await,
+            None => Vec::new(),
+        }
+    }
+
+    async fn take_pending_subtree_changes(&self) -> (Vec<DigestInfo>, Vec<DigestInfo>) {
+        match &self.directory_cache {
+            Some(cache) => cache.take_pending_subtree_changes().await,
+            None => (Vec::new(), Vec::new()),
+        }
+    }
 }
 
 #[derive(Debug, Default, MetricsComponent)]
diff --git a/nativelink-worker/src/worker_api_client_wrapper.rs b/nativelink-worker/src/worker_api_client_wrapper.rs
index 1e2791fc0..364c60275 100644
--- a/nativelink-worker/src/worker_api_client_wrapper.rs
+++ b/nativelink-worker/src/worker_api_client_wrapper.rs
@@ -19,7 +19,8 @@ use nativelink_error::{make_err, Error, ResultExt};
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::update_for_scheduler::Update;
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::worker_api_client::WorkerApiClient;
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
-    ConnectWorkerRequest, ExecuteComplete, ExecuteResult, GoingAwayRequest, KeepAliveRequest, UpdateForScheduler, UpdateForWorker
+    BlobsAvailableNotification, ConnectWorkerRequest, ExecuteComplete,
+    ExecuteResult, GoingAwayRequest, KeepAliveRequest, UpdateForScheduler, UpdateForWorker,
 };
 use tokio::sync::mpsc::Sender;
 use tonic::codec::Streaming;
@@ -53,6 +54,11 @@ pub trait WorkerApiClientTrait: Clone + Sync + Send + Sized + Unpin {
         &mut self,
         request: ExecuteComplete,
     ) -> impl Future<Output = Result<(), Error>> + Send;
+
+    fn blobs_available(
+        &mut self,
+        request: BlobsAvailableNotification,
+    ) -> impl Future<Output = Result<(), Error>> + Send;
 }
 
 #[derive(Debug, Clone)]
@@ -133,4 +139,11 @@ impl WorkerApiClientTrait for WorkerApiClientWrapper {
     async fn execution_complete(&mut self, request: ExecuteComplete) -> Result<(), Error> {
         self.send_update(Update::ExecuteComplete(request)).await
     }
+
+    async fn blobs_available(
+        &mut self,
+        request: BlobsAvailableNotification,
+    ) -> Result<(), Error> {
+        self.send_update(Update::BlobsAvailable(request)).await
+    }
 }
diff --git a/nativelink-worker/src/worker_utils.rs b/nativelink-worker/src/worker_utils.rs
index 3135e0be3..b07a91abb 100644
--- a/nativelink-worker/src/worker_utils.rs
+++ b/nativelink-worker/src/worker_utils.rs
@@ -32,6 +32,7 @@ pub async fn make_connect_worker_request<S: BuildHasher>(
     worker_properties: &HashMap<String, WorkerProperty, S>,
     extra_envs: &HashMap<String, String>,
     max_inflight_tasks: u64,
+    cas_endpoint: String,
 ) -> Result<ConnectWorkerRequest, Error> {
     let mut futures = vec![];
     for (property_name, worker_property) in worker_properties {
@@ -106,5 +107,6 @@ pub async fn make_connect_worker_request<S: BuildHasher>(
         worker_id_prefix,
         properties: try_join_all(futures).await?.into_iter().flatten().collect(),
         max_inflight_tasks,
+        cas_endpoint,
     })
 }
diff --git a/nativelink-worker/tests/local_worker_test.rs b/nativelink-worker/tests/local_worker_test.rs
index efc3a61fa..49af0b124 100644
--- a/nativelink-worker/tests/local_worker_test.rs
+++ b/nativelink-worker/tests/local_worker_test.rs
@@ -35,12 +35,12 @@ use nativelink_config::stores::{
 };
 use nativelink_error::{Code, Error, make_err, make_input_err};
 use nativelink_macro::nativelink_test;
-use nativelink_proto::build::bazel::remote::execution::v2::Platform;
+use nativelink_proto::build::bazel::remote::execution::v2::{Digest, Platform};
 use nativelink_proto::build::bazel::remote::execution::v2::platform::Property;
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::update_for_worker::Update;
 use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
-    ConnectWorkerRequest, ConnectionResult, ExecuteResult, KillOperationRequest, StartExecute,
-    UpdateForWorker, execute_result,
+    ConnectWorkerRequest, ConnectionResult, ExecuteResult, KillOperationRequest, PeerHint,
+    StartExecute, UpdateForWorker, execute_result,
 };
 use nativelink_store::fast_slow_store::FastSlowStore;
 use nativelink_store::filesystem_store::FilesystemStore;
@@ -58,7 +58,6 @@ use nativelink_worker::local_worker::preconditions_met;
 use pretty_assertions::assert_eq;
 use prost::Message;
 use rand::Rng;
-use tokio::io::AsyncWriteExt;
 use utils::local_worker_test_utils::{
     setup_grpc_stream, setup_local_worker, setup_local_worker_with_config,
 };
@@ -128,6 +127,7 @@ async fn platform_properties_smoke_test() -> Result<(), Error> {
                 }
             ],
             max_inflight_tasks: 0,
+            cas_endpoint: String::new(),
         }
     );
 
@@ -262,6 +262,7 @@ async fn blake3_digest_function_registered_properly() -> Result<(), Error> {
                         queued_timestamp: None,
                         platform: Some(Platform::default()),
                         worker_id: expected_worker_id.clone(),
+                        peer_hints: Vec::new(),
                     })),
                 })
                 .unwrap(),
@@ -352,6 +353,7 @@ async fn simple_worker_start_action_test() -> Result<(), Error> {
                         queued_timestamp: None,
                         platform: Some(Platform::default()),
                         worker_id: expected_worker_id.clone(),
+                        peer_hints: Vec::new(),
                     })),
                 })
                 .unwrap(),
@@ -490,8 +492,10 @@ async fn new_local_worker_removes_work_directory_before_start_test() -> Result<(
     fs::create_dir_all(format!("{}/{}", work_directory, "another_dir")).await?;
     let mut file =
         fs::create_file(OsString::from(format!("{}/{}", work_directory, "foo.txt"))).await?;
-    file.write_all(b"Hello, world!").await?;
-    file.as_mut().sync_all().await?;
+    Write::write_all(file.as_std_mut(), b"Hello, world!")
+        .map_err(|e| Into::<Error>::into(e))?;
+    file.as_std().sync_all()
+        .map_err(|e| Into::<Error>::into(e))?;
     drop(file);
     new_local_worker(
         Arc::new(LocalWorkerConfig {
@@ -627,6 +631,7 @@ async fn experimental_precondition_script_fails() -> Result<(), Error> {
                         queued_timestamp: None,
                         platform: Some(Platform::default()),
                         worker_id: expected_worker_id.clone(),
+                        peer_hints: Vec::new(),
                     })),
                 })
                 .unwrap(),
@@ -714,6 +719,7 @@ async fn kill_action_request_kills_action() -> Result<(), Error> {
                         queued_timestamp: None,
                         platform: Some(Platform::default()),
                         worker_id: expected_worker_id.clone(),
+                        peer_hints: Vec::new(),
                     })),
                 })
                 .unwrap(),
@@ -765,3 +771,494 @@ async fn preconditions_met_extra_envs() -> Result<(), Error> {
     assert!(logs_contain("test_value_for_demo_env"));
     Ok(())
 }
+
+#[nativelink_test]
+async fn worker_translates_not_found_to_failed_precondition_test() -> Result<(), Error> {
+    let mut test_context = setup_local_worker(HashMap::new()).await;
+    let streaming_response = test_context.maybe_streaming_response.take().unwrap();
+
+    {
+        // Ensure our worker connects and properties were sent.
+        let props = test_context
+            .client
+            .expect_connect_worker(Ok(streaming_response))
+            .await;
+        assert_eq!(props, ConnectWorkerRequest::default());
+    }
+
+    let expected_worker_id = "foobar".to_string();
+
+    let tx_stream = test_context.maybe_tx_stream.take().unwrap();
+    {
+        // First initialize our worker by sending the response to the connection request.
+        tx_stream
+            .send(Frame::data(
+                encode_stream_proto(&UpdateForWorker {
+                    update: Some(Update::ConnectionResult(ConnectionResult {
+                        worker_id: expected_worker_id.clone(),
+                    })),
+                })
+                .unwrap(),
+            ))
+            .await
+            .map_err(|e| make_input_err!("Could not send : {:?}", e))?;
+    }
+
+    let action_digest = DigestInfo::new([3u8; 32], 10);
+    let action_info = ActionInfo {
+        command_digest: DigestInfo::new([1u8; 32], 10),
+        input_root_digest: DigestInfo::new([2u8; 32], 10),
+        timeout: Duration::from_secs(1),
+        platform_properties: HashMap::new(),
+        priority: 0,
+        load_timestamp: SystemTime::UNIX_EPOCH,
+        insert_timestamp: SystemTime::UNIX_EPOCH,
+        unique_qualifier: ActionUniqueQualifier::Uncacheable(ActionUniqueKey {
+            instance_name: INSTANCE_NAME.to_string(),
+            digest_function: DigestHasherFunc::Sha256,
+            digest: action_digest,
+        }),
+    };
+
+    {
+        // Send execution request.
+        tx_stream
+            .send(Frame::data(
+                encode_stream_proto(&UpdateForWorker {
+                    update: Some(Update::StartAction(StartExecute {
+                        execute_request: Some((&action_info).into()),
+                        operation_id: String::new(),
+                        queued_timestamp: None,
+                        platform: Some(Platform::default()),
+                        worker_id: expected_worker_id.clone(),
+                        peer_hints: Vec::new(),
+                    })),
+                })
+                .unwrap(),
+            ))
+            .await
+            .map_err(|e| make_input_err!("Could not send : {:?}", e))?;
+    }
+
+    let running_action = Arc::new(MockRunningAction::new());
+
+    // Send and wait for response from create_and_add_action to RunningActionsManager.
+    test_context
+        .actions_manager
+        .expect_create_and_add_action(Ok(running_action.clone()))
+        .await;
+
+    // Make the action fail with a NotFound error during get_finished_result.
+    // This simulates a missing input blob scenario.
+    running_action
+        .simple_expect_get_finished_result(Err(make_err!(Code::NotFound, "Object not found")))
+        .await?;
+
+    // Now our client should be notified that our runner finished.
+    let execution_response = test_context.client.expect_execution_response(Ok(())).await;
+
+    // The worker should have translated NotFound into FailedPrecondition per the REAPI spec.
+    let error_status = match execution_response.result {
+        Some(execute_result::Result::InternalError(status)) => status,
+        other => panic!(
+            "Expected InternalError result, got: {:?}",
+            other
+        ),
+    };
+
+    assert_eq!(
+        error_status.code,
+        Code::FailedPrecondition as i32,
+        "Expected NotFound to be translated to FailedPrecondition"
+    );
+    assert!(
+        error_status.message.contains("One or more input blobs missing"),
+        "Expected error message to contain 'One or more input blobs missing', got: {}",
+        error_status.message
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn peer_hints_passed_to_action_manager_test() -> Result<(), Error> {
+    let mut test_context = setup_local_worker(HashMap::new()).await;
+    let streaming_response = test_context.maybe_streaming_response.take().unwrap();
+
+    {
+        // Ensure our worker connects and properties were sent.
+        let props = test_context
+            .client
+            .expect_connect_worker(Ok(streaming_response))
+            .await;
+        assert_eq!(props, ConnectWorkerRequest::default());
+    }
+
+    let expected_worker_id = "foobar".to_string();
+
+    let tx_stream = test_context.maybe_tx_stream.take().unwrap();
+    {
+        // First initialize our worker by sending the response to the connection request.
+        tx_stream
+            .send(Frame::data(
+                encode_stream_proto(&UpdateForWorker {
+                    update: Some(Update::ConnectionResult(ConnectionResult {
+                        worker_id: expected_worker_id.clone(),
+                    })),
+                })
+                .unwrap(),
+            ))
+            .await
+            .map_err(|e| make_input_err!("Could not send : {:?}", e))?;
+    }
+
+    let action_digest = DigestInfo::new([3u8; 32], 10);
+    let action_info = ActionInfo {
+        command_digest: DigestInfo::new([1u8; 32], 10),
+        input_root_digest: DigestInfo::new([2u8; 32], 10),
+        timeout: Duration::from_secs(1),
+        platform_properties: HashMap::new(),
+        priority: 0,
+        load_timestamp: SystemTime::UNIX_EPOCH,
+        insert_timestamp: SystemTime::UNIX_EPOCH,
+        unique_qualifier: ActionUniqueQualifier::Uncacheable(ActionUniqueKey {
+            instance_name: INSTANCE_NAME.to_string(),
+            digest_function: DigestHasherFunc::Sha256,
+            digest: action_digest,
+        }),
+    };
+
+    // Create peer hints: digest D1 is available on "worker-a:50081".
+    let d1 = DigestInfo::new([10u8; 32], 500);
+    let peer_hints = vec![PeerHint {
+        digest: Some(Digest::from(d1)),
+        peer_endpoints: vec!["worker-a:50081".to_string()],
+    }];
+
+    {
+        // Send execution request with peer_hints populated.
+        tx_stream
+            .send(Frame::data(
+                encode_stream_proto(&UpdateForWorker {
+                    update: Some(Update::StartAction(StartExecute {
+                        execute_request: Some((&action_info).into()),
+                        operation_id: String::new(),
+                        queued_timestamp: None,
+                        platform: Some(Platform::default()),
+                        worker_id: expected_worker_id.clone(),
+                        peer_hints: peer_hints.clone(),
+                    })),
+                })
+                .unwrap(),
+            ))
+            .await
+            .map_err(|e| make_input_err!("Could not send : {:?}", e))?;
+    }
+
+    let running_action = Arc::new(MockRunningAction::new());
+
+    // Send and wait for response from create_and_add_action to RunningActionsManager.
+    // This returns the (worker_id, StartExecute) that was passed to the mock.
+    let (received_worker_id, received_start_execute) = test_context
+        .actions_manager
+        .expect_create_and_add_action(Ok(running_action.clone()))
+        .await;
+
+    // Verify worker_id is passed correctly.
+    assert_eq!(received_worker_id, expected_worker_id);
+
+    // Verify peer_hints arrived intact at the mock RunningActionsManager.
+    assert_eq!(
+        received_start_execute.peer_hints.len(),
+        1,
+        "Expected exactly one peer hint"
+    );
+    assert_eq!(
+        received_start_execute.peer_hints[0].digest,
+        Some(Digest::from(d1)),
+        "Peer hint digest should match the one we sent"
+    );
+    assert_eq!(
+        received_start_execute.peer_hints[0].peer_endpoints,
+        vec!["worker-a:50081".to_string()],
+        "Peer hint endpoint should match the one we sent"
+    );
+
+    // Complete the action normally so the test can clean up.
+    running_action
+        .simple_expect_get_finished_result(Ok(ActionResult::default()))
+        .await?;
+
+    // Expect the action result to be cached.
+    let _cached = test_context
+        .actions_manager
+        .expect_cache_action_result()
+        .await;
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn empty_peer_hints_action_starts_normally_test() -> Result<(), Error> {
+    let mut test_context = setup_local_worker(HashMap::new()).await;
+    let streaming_response = test_context.maybe_streaming_response.take().unwrap();
+
+    {
+        let props = test_context
+            .client
+            .expect_connect_worker(Ok(streaming_response))
+            .await;
+        assert_eq!(props, ConnectWorkerRequest::default());
+    }
+
+    let expected_worker_id = "foobar".to_string();
+
+    let tx_stream = test_context.maybe_tx_stream.take().unwrap();
+    {
+        tx_stream
+            .send(Frame::data(
+                encode_stream_proto(&UpdateForWorker {
+                    update: Some(Update::ConnectionResult(ConnectionResult {
+                        worker_id: expected_worker_id.clone(),
+                    })),
+                })
+                .unwrap(),
+            ))
+            .await
+            .map_err(|e| make_input_err!("Could not send : {:?}", e))?;
+    }
+
+    let action_digest = DigestInfo::new([3u8; 32], 10);
+    let action_info = ActionInfo {
+        command_digest: DigestInfo::new([1u8; 32], 10),
+        input_root_digest: DigestInfo::new([2u8; 32], 10),
+        timeout: Duration::from_secs(1),
+        platform_properties: HashMap::new(),
+        priority: 0,
+        load_timestamp: SystemTime::UNIX_EPOCH,
+        insert_timestamp: SystemTime::UNIX_EPOCH,
+        unique_qualifier: ActionUniqueQualifier::Uncacheable(ActionUniqueKey {
+            instance_name: INSTANCE_NAME.to_string(),
+            digest_function: DigestHasherFunc::Sha256,
+            digest: action_digest,
+        }),
+    };
+
+    {
+        // Send execution request with empty peer_hints.
+        tx_stream
+            .send(Frame::data(
+                encode_stream_proto(&UpdateForWorker {
+                    update: Some(Update::StartAction(StartExecute {
+                        execute_request: Some((&action_info).into()),
+                        operation_id: String::new(),
+                        queued_timestamp: None,
+                        platform: Some(Platform::default()),
+                        worker_id: expected_worker_id.clone(),
+                        peer_hints: Vec::new(),
+                    })),
+                })
+                .unwrap(),
+            ))
+            .await
+            .map_err(|e| make_input_err!("Could not send : {:?}", e))?;
+    }
+
+    let running_action = Arc::new(MockRunningAction::new());
+
+    let (received_worker_id, received_start_execute) = test_context
+        .actions_manager
+        .expect_create_and_add_action(Ok(running_action.clone()))
+        .await;
+
+    // Verify worker_id is passed correctly.
+    assert_eq!(received_worker_id, expected_worker_id);
+
+    // Verify empty peer_hints doesn't cause any issues.
+    assert!(
+        received_start_execute.peer_hints.is_empty(),
+        "Expected peer_hints to be empty"
+    );
+
+    let action_result = ActionResult {
+        output_files: vec![],
+        output_folders: vec![],
+        output_file_symlinks: vec![],
+        output_directory_symlinks: vec![],
+        exit_code: 0,
+        stdout_digest: DigestInfo::new([21u8; 32], 10),
+        stderr_digest: DigestInfo::new([22u8; 32], 10),
+        execution_metadata: ExecutionMetadata {
+            worker: expected_worker_id.clone(),
+            queued_timestamp: SystemTime::UNIX_EPOCH,
+            worker_start_timestamp: SystemTime::UNIX_EPOCH,
+            worker_completed_timestamp: SystemTime::UNIX_EPOCH,
+            input_fetch_start_timestamp: SystemTime::UNIX_EPOCH,
+            input_fetch_completed_timestamp: SystemTime::UNIX_EPOCH,
+            execution_start_timestamp: SystemTime::UNIX_EPOCH,
+            execution_completed_timestamp: SystemTime::UNIX_EPOCH,
+            output_upload_start_timestamp: SystemTime::UNIX_EPOCH,
+            output_upload_completed_timestamp: SystemTime::UNIX_EPOCH,
+        },
+        server_logs: HashMap::new(),
+        error: None,
+        message: String::new(),
+    };
+
+    // Complete the action normally.
+    running_action
+        .simple_expect_get_finished_result(Ok(action_result.clone()))
+        .await?;
+
+    // Expect the action result to be cached.
+    let (stored_digest, stored_result, _digest_hasher) = test_context
+        .actions_manager
+        .expect_cache_action_result()
+        .await;
+    assert_eq!(stored_digest, action_digest);
+    assert_eq!(stored_result, action_result);
+
+    // Verify we get the execution response back.
+    let execution_response = test_context.client.expect_execution_response(Ok(())).await;
+    assert_eq!(
+        execution_response,
+        ExecuteResult {
+            instance_name: INSTANCE_NAME.to_string(),
+            operation_id: String::new(),
+            result: Some(execute_result::Result::ExecuteResponse(
+                ActionStage::Completed(action_result).into()
+            )),
+        }
+    );
+
+    Ok(())
+}
+
+#[nativelink_test]
+async fn multiple_peer_hints_with_multiple_endpoints_test() -> Result<(), Error> {
+    let mut test_context = setup_local_worker(HashMap::new()).await;
+    let streaming_response = test_context.maybe_streaming_response.take().unwrap();
+
+    {
+        let props = test_context
+            .client
+            .expect_connect_worker(Ok(streaming_response))
+            .await;
+        assert_eq!(props, ConnectWorkerRequest::default());
+    }
+
+    let expected_worker_id = "foobar".to_string();
+
+    let tx_stream = test_context.maybe_tx_stream.take().unwrap();
+    {
+        tx_stream
+            .send(Frame::data(
+                encode_stream_proto(&UpdateForWorker {
+                    update: Some(Update::ConnectionResult(ConnectionResult {
+                        worker_id: expected_worker_id.clone(),
+                    })),
+                })
+                .unwrap(),
+            ))
+            .await
+            .map_err(|e| make_input_err!("Could not send : {:?}", e))?;
+    }
+
+    let action_digest = DigestInfo::new([3u8; 32], 10);
+    let action_info = ActionInfo {
+        command_digest: DigestInfo::new([1u8; 32], 10),
+        input_root_digest: DigestInfo::new([2u8; 32], 10),
+        timeout: Duration::from_secs(1),
+        platform_properties: HashMap::new(),
+        priority: 0,
+        load_timestamp: SystemTime::UNIX_EPOCH,
+        insert_timestamp: SystemTime::UNIX_EPOCH,
+        unique_qualifier: ActionUniqueQualifier::Uncacheable(ActionUniqueKey {
+            instance_name: INSTANCE_NAME.to_string(),
+            digest_function: DigestHasherFunc::Sha256,
+            digest: action_digest,
+        }),
+    };
+
+    // Create multiple peer hints with multiple endpoints.
+    let d1 = DigestInfo::new([10u8; 32], 500);
+    let d2 = DigestInfo::new([11u8; 32], 1000);
+    let peer_hints = vec![
+        PeerHint {
+            digest: Some(Digest::from(d1)),
+            peer_endpoints: vec![
+                "worker-a:50081".to_string(),
+                "worker-b:50081".to_string(),
+            ],
+        },
+        PeerHint {
+            digest: Some(Digest::from(d2)),
+            peer_endpoints: vec!["worker-c:50081".to_string()],
+        },
+    ];
+
+    {
+        tx_stream
+            .send(Frame::data(
+                encode_stream_proto(&UpdateForWorker {
+                    update: Some(Update::StartAction(StartExecute {
+                        execute_request: Some((&action_info).into()),
+                        operation_id: String::new(),
+                        queued_timestamp: None,
+                        platform: Some(Platform::default()),
+                        worker_id: expected_worker_id.clone(),
+                        peer_hints: peer_hints.clone(),
+                    })),
+                })
+                .unwrap(),
+            ))
+            .await
+            .map_err(|e| make_input_err!("Could not send : {:?}", e))?;
+    }
+
+    let running_action = Arc::new(MockRunningAction::new());
+
+    let (_received_worker_id, received_start_execute) = test_context
+        .actions_manager
+        .expect_create_and_add_action(Ok(running_action.clone()))
+        .await;
+
+    // Verify all peer_hints arrived intact.
+    assert_eq!(
+        received_start_execute.peer_hints.len(),
+        2,
+        "Expected exactly two peer hints"
+    );
+
+    // Verify first hint: d1 available on worker-a and worker-b.
+    assert_eq!(
+        received_start_execute.peer_hints[0].digest,
+        Some(Digest::from(d1)),
+    );
+    assert_eq!(
+        received_start_execute.peer_hints[0].peer_endpoints,
+        vec!["worker-a:50081".to_string(), "worker-b:50081".to_string()],
+    );
+
+    // Verify second hint: d2 available on worker-c.
+    assert_eq!(
+        received_start_execute.peer_hints[1].digest,
+        Some(Digest::from(d2)),
+    );
+    assert_eq!(
+        received_start_execute.peer_hints[1].peer_endpoints,
+        vec!["worker-c:50081".to_string()],
+    );
+
+    // Complete the action normally.
+    running_action
+        .simple_expect_get_finished_result(Ok(ActionResult::default()))
+        .await?;
+
+    let _cached = test_context
+        .actions_manager
+        .expect_cache_action_result()
+        .await;
+
+    Ok(())
+}
diff --git a/nativelink-worker/tests/running_actions_manager_test.rs b/nativelink-worker/tests/running_actions_manager_test.rs
index 0c630bc41..5d1b56a31 100644
--- a/nativelink-worker/tests/running_actions_manager_test.rs
+++ b/nativelink-worker/tests/running_actions_manager_test.rs
@@ -41,12 +41,12 @@ mod tests {
     use nativelink_proto::build::bazel::remote::execution::v2::command::EnvironmentVariable;
     #[cfg_attr(target_family = "windows", allow(unused_imports))]
     use nativelink_proto::build::bazel::remote::execution::v2::{
-        Action, ActionResult as ProtoActionResult, Command, Directory, DirectoryNode,
+        Action, ActionResult as ProtoActionResult, Command, Digest, Directory, DirectoryNode,
         ExecuteRequest, ExecuteResponse, FileNode, NodeProperties, Platform, SymlinkNode, Tree,
         digest_function::Value as ProtoDigestFunction, platform::Property,
     };
     use nativelink_proto::com::github::trace_machina::nativelink::remote_execution::{
-        HistoricalExecuteResponse, StartExecute,
+        HistoricalExecuteResponse, PeerHint, StartExecute,
     };
     use nativelink_proto::google::rpc::Status;
     use nativelink_store::ac_utils::{get_and_decode_digest, serialize_and_upload_message};
@@ -60,6 +60,7 @@ mod tests {
     use nativelink_util::action_messages::{
         ActionResult, ExecutionMetadata, FileInfo, NameOrPath, OperationId,
     };
+    use nativelink_util::blob_locality_map::new_shared_blob_locality_map;
     use nativelink_util::common::{DigestInfo, fs};
     use nativelink_util::digest_hasher::{DigestHasher, DigestHasherFunc};
     use nativelink_util::store_trait::{Store, StoreLike};
@@ -429,6 +430,506 @@ mod tests {
         Ok(())
     }
 
+    #[nativelink_test]
+    async fn download_to_directory_batch_existence_check_test(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        // Verifies that files already in the fast store are hardlinked
+        // without being re-fetched from the slow store.
+        const FILE1_NAME: &str = "cached_file.txt";
+        const FILE1_CONTENT: &str = "ALREADY_IN_FAST";
+        const FILE2_NAME: &str = "uncached_file.txt";
+        const FILE2_CONTENT: &str = "ONLY_IN_SLOW";
+
+        let (fast_store, slow_store, cas_store, _ac_store) = setup_stores().await?;
+
+        let root_directory_digest = {
+            let file1_content_digest = DigestInfo::new([10u8; 32], FILE1_CONTENT.len() as u64);
+            let file2_content_digest = DigestInfo::new([11u8; 32], FILE2_CONTENT.len() as u64);
+
+            // Put file1 in BOTH slow and fast store (simulates a cached blob).
+            slow_store
+                .as_ref()
+                .update_oneshot(file1_content_digest, FILE1_CONTENT.into())
+                .await?;
+            fast_store
+                .as_ref()
+                .update_oneshot(file1_content_digest, FILE1_CONTENT.into())
+                .await?;
+
+            // Put file2 ONLY in slow store (simulates a cache miss).
+            slow_store
+                .as_ref()
+                .update_oneshot(file2_content_digest, FILE2_CONTENT.into())
+                .await?;
+
+            let root_directory_digest = DigestInfo::new([12u8; 32], 32);
+            let root_directory = Directory {
+                files: vec![
+                    FileNode {
+                        name: FILE1_NAME.to_string(),
+                        digest: Some(file1_content_digest.into()),
+                        ..Default::default()
+                    },
+                    FileNode {
+                        name: FILE2_NAME.to_string(),
+                        digest: Some(file2_content_digest.into()),
+                        ..Default::default()
+                    },
+                ],
+                ..Default::default()
+            };
+
+            slow_store
+                .as_ref()
+                .update_oneshot(root_directory_digest, root_directory.encode_to_vec().into())
+                .await?;
+            root_directory_digest
+        };
+
+        let download_dir = make_temp_path("download_dir_batch_check");
+        fs::create_dir_all(&download_dir).await?;
+        download_to_directory(
+            cas_store.as_ref(),
+            fast_store.as_pin(),
+            &root_directory_digest,
+            &download_dir,
+        )
+        .await?;
+
+        // Both files should be present with correct content.
+        let file1_content = fs::read(format!("{download_dir}/{FILE1_NAME}")).await?;
+        assert_eq!(from_utf8(&file1_content)?, FILE1_CONTENT);
+
+        let file2_content = fs::read(format!("{download_dir}/{FILE2_NAME}")).await?;
+        assert_eq!(from_utf8(&file2_content)?, FILE2_CONTENT);
+
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn download_to_directory_dedup_digests_test(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        // Verifies that multiple files sharing the same digest content
+        // are all materialized correctly (the digest is only downloaded once
+        // but hardlinked to multiple destinations).
+        const SHARED_CONTENT: &str = "SHARED_CONTENT_DATA";
+        const FILE_A_NAME: &str = "file_a.txt";
+        const FILE_B_NAME: &str = "file_b.txt";
+        const FILE_C_NAME: &str = "file_c.txt";
+
+        let (fast_store, slow_store, cas_store, _ac_store) = setup_stores().await?;
+
+        let root_directory_digest = {
+            let shared_digest = DigestInfo::new([20u8; 32], SHARED_CONTENT.len() as u64);
+            slow_store
+                .as_ref()
+                .update_oneshot(shared_digest, SHARED_CONTENT.into())
+                .await?;
+
+            let root_directory_digest = DigestInfo::new([21u8; 32], 32);
+            let root_directory = Directory {
+                files: vec![
+                    FileNode {
+                        name: FILE_A_NAME.to_string(),
+                        digest: Some(shared_digest.into()),
+                        ..Default::default()
+                    },
+                    FileNode {
+                        name: FILE_B_NAME.to_string(),
+                        digest: Some(shared_digest.into()),
+                        ..Default::default()
+                    },
+                    FileNode {
+                        name: FILE_C_NAME.to_string(),
+                        digest: Some(shared_digest.into()),
+                        ..Default::default()
+                    },
+                ],
+                ..Default::default()
+            };
+
+            slow_store
+                .as_ref()
+                .update_oneshot(root_directory_digest, root_directory.encode_to_vec().into())
+                .await?;
+            root_directory_digest
+        };
+
+        let download_dir = make_temp_path("download_dir_dedup");
+        fs::create_dir_all(&download_dir).await?;
+        download_to_directory(
+            cas_store.as_ref(),
+            fast_store.as_pin(),
+            &root_directory_digest,
+            &download_dir,
+        )
+        .await?;
+
+        // All three files should exist with the same content.
+        for name in &[FILE_A_NAME, FILE_B_NAME, FILE_C_NAME] {
+            let content = fs::read(format!("{download_dir}/{name}")).await?;
+            assert_eq!(from_utf8(&content)?, SHARED_CONTENT, "Mismatch for {name}");
+        }
+
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn download_to_directory_deep_nested_tree_test(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        // Verifies that deeply nested directory trees (3 levels) are resolved
+        // correctly via the recursive fallback path (MemoryStore).
+        const LEAF_FILE_NAME: &str = "leaf.txt";
+        const LEAF_CONTENT: &str = "DEEP_LEAF_DATA";
+
+        let (fast_store, slow_store, cas_store, _ac_store) = setup_stores().await?;
+
+        let root_directory_digest = {
+            let leaf_content_digest = DigestInfo::new([30u8; 32], LEAF_CONTENT.len() as u64);
+            slow_store
+                .as_ref()
+                .update_oneshot(leaf_content_digest, LEAF_CONTENT.into())
+                .await?;
+
+            // Level 3 (deepest): directory containing a file
+            let level3_digest = DigestInfo::new([31u8; 32], 32);
+            let level3_dir = Directory {
+                files: vec![FileNode {
+                    name: LEAF_FILE_NAME.to_string(),
+                    digest: Some(leaf_content_digest.into()),
+                    ..Default::default()
+                }],
+                ..Default::default()
+            };
+            slow_store
+                .as_ref()
+                .update_oneshot(level3_digest, level3_dir.encode_to_vec().into())
+                .await?;
+
+            // Level 2: directory containing level3
+            let level2_digest = DigestInfo::new([32u8; 32], 32);
+            let level2_dir = Directory {
+                directories: vec![DirectoryNode {
+                    name: "level3".to_string(),
+                    digest: Some(level3_digest.into()),
+                }],
+                ..Default::default()
+            };
+            slow_store
+                .as_ref()
+                .update_oneshot(level2_digest, level2_dir.encode_to_vec().into())
+                .await?;
+
+            // Level 1 (root): directory containing level2
+            let root_digest = DigestInfo::new([33u8; 32], 32);
+            let root_dir = Directory {
+                directories: vec![DirectoryNode {
+                    name: "level2".to_string(),
+                    digest: Some(level2_digest.into()),
+                }],
+                ..Default::default()
+            };
+            slow_store
+                .as_ref()
+                .update_oneshot(root_digest, root_dir.encode_to_vec().into())
+                .await?;
+            root_digest
+        };
+
+        let download_dir = make_temp_path("download_dir_deep");
+        fs::create_dir_all(&download_dir).await?;
+        download_to_directory(
+            cas_store.as_ref(),
+            fast_store.as_pin(),
+            &root_directory_digest,
+            &download_dir,
+        )
+        .await?;
+
+        // Verify the deeply nested file exists with correct content.
+        let leaf_path = format!("{download_dir}/level2/level3/{LEAF_FILE_NAME}");
+        let leaf_content = fs::read(&leaf_path).await?;
+        assert_eq!(from_utf8(&leaf_content)?, LEAF_CONTENT);
+
+        // Verify intermediate directories exist.
+        let level2_meta = fs::metadata(format!("{download_dir}/level2")).await?;
+        assert!(level2_meta.is_dir());
+        let level3_meta = fs::metadata(format!("{download_dir}/level2/level3")).await?;
+        assert!(level3_meta.is_dir());
+
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn download_to_directory_empty_directory_test(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        // Verifies that an empty root directory is handled correctly.
+        let (fast_store, slow_store, cas_store, _ac_store) = setup_stores().await?;
+
+        let root_directory_digest = {
+            let root_digest = DigestInfo::new([40u8; 32], 32);
+            let root_dir = Directory::default();
+            slow_store
+                .as_ref()
+                .update_oneshot(root_digest, root_dir.encode_to_vec().into())
+                .await?;
+            root_digest
+        };
+
+        let download_dir = make_temp_path("download_dir_empty");
+        fs::create_dir_all(&download_dir).await?;
+        download_to_directory(
+            cas_store.as_ref(),
+            fast_store.as_pin(),
+            &root_directory_digest,
+            &download_dir,
+        )
+        .await?;
+
+        // Directory should exist and be empty.
+        let meta = fs::metadata(&download_dir).await?;
+        assert!(meta.is_dir());
+
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn download_to_directory_many_files_test(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        // Verifies that a directory with many files (simulating a real build
+        // with many inputs) is handled correctly by the batch existence check
+        // and parallel download paths.
+        const FILE_COUNT: usize = 50;
+
+        let (fast_store, slow_store, cas_store, _ac_store) = setup_stores().await?;
+
+        let root_directory_digest = {
+            let mut file_nodes = Vec::with_capacity(FILE_COUNT);
+            for i in 0..FILE_COUNT {
+                let content = format!("content_of_file_{i}");
+                // Create unique digests using the index.
+                let mut hash = [0u8; 32];
+                hash[0] = 50;
+                hash[1] = (i >> 8) as u8;
+                hash[2] = (i & 0xff) as u8;
+                let digest = DigestInfo::new(hash, content.len() as u64);
+
+                slow_store
+                    .as_ref()
+                    .update_oneshot(digest, content.into())
+                    .await?;
+
+                // Pre-populate every 3rd file in the fast store to test
+                // the mixed cached/uncached path.
+                if i % 3 == 0 {
+                    let content_again = format!("content_of_file_{i}");
+                    fast_store
+                        .as_ref()
+                        .update_oneshot(digest, content_again.into())
+                        .await?;
+                }
+
+                file_nodes.push(FileNode {
+                    name: format!("file_{i:04}.txt"),
+                    digest: Some(digest.into()),
+                    ..Default::default()
+                });
+            }
+
+            let root_digest = DigestInfo::new([51u8; 32], 32);
+            let root_dir = Directory {
+                files: file_nodes,
+                ..Default::default()
+            };
+            slow_store
+                .as_ref()
+                .update_oneshot(root_digest, root_dir.encode_to_vec().into())
+                .await?;
+            root_digest
+        };
+
+        let download_dir = make_temp_path("download_dir_many");
+        fs::create_dir_all(&download_dir).await?;
+        download_to_directory(
+            cas_store.as_ref(),
+            fast_store.as_pin(),
+            &root_directory_digest,
+            &download_dir,
+        )
+        .await?;
+
+        // Verify all files.
+        for i in 0..FILE_COUNT {
+            let expected = format!("content_of_file_{i}");
+            let path = format!("{download_dir}/file_{i:04}.txt");
+            let content = fs::read(&path).await?;
+            assert_eq!(
+                from_utf8(&content)?,
+                expected,
+                "Content mismatch for file {i}"
+            );
+        }
+
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn download_to_directory_missing_blob_returns_error_test(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        // Verifies that a reference to a missing blob in the slow store
+        // propagates an error (not silently ignored).
+        const FILE_NAME: &str = "missing.txt";
+
+        let (fast_store, slow_store, cas_store, _ac_store) = setup_stores().await?;
+
+        let root_directory_digest = {
+            // Reference a file content digest that does NOT exist in any store.
+            let missing_content_digest = DigestInfo::new([60u8; 32], 100);
+
+            let root_digest = DigestInfo::new([61u8; 32], 32);
+            let root_directory = Directory {
+                files: vec![FileNode {
+                    name: FILE_NAME.to_string(),
+                    digest: Some(missing_content_digest.into()),
+                    ..Default::default()
+                }],
+                ..Default::default()
+            };
+
+            slow_store
+                .as_ref()
+                .update_oneshot(root_digest, root_directory.encode_to_vec().into())
+                .await?;
+            root_digest
+        };
+
+        let download_dir = make_temp_path("download_dir_missing_blob");
+        fs::create_dir_all(&download_dir).await?;
+        let result = download_to_directory(
+            cas_store.as_ref(),
+            fast_store.as_pin(),
+            &root_directory_digest,
+            &download_dir,
+        )
+        .await;
+
+        assert!(result.is_err(), "Expected error for missing blob");
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn download_to_directory_missing_directory_digest_returns_error_test(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        // Verifies that a DirectoryNode referencing a non-existent directory
+        // digest propagates an error during tree resolution.
+        let (fast_store, slow_store, cas_store, _ac_store) = setup_stores().await?;
+
+        let root_directory_digest = {
+            // Reference a child directory digest that does NOT exist.
+            let missing_child_digest = DigestInfo::new([70u8; 32], 32);
+
+            let root_digest = DigestInfo::new([71u8; 32], 32);
+            let root_directory = Directory {
+                directories: vec![DirectoryNode {
+                    name: "missing_dir".to_string(),
+                    digest: Some(missing_child_digest.into()),
+                }],
+                ..Default::default()
+            };
+
+            slow_store
+                .as_ref()
+                .update_oneshot(root_digest, root_directory.encode_to_vec().into())
+                .await?;
+            root_digest
+        };
+
+        let download_dir = make_temp_path("download_dir_missing_dir");
+        fs::create_dir_all(&download_dir).await?;
+        let result = download_to_directory(
+            cas_store.as_ref(),
+            fast_store.as_pin(),
+            &root_directory_digest,
+            &download_dir,
+        )
+        .await;
+
+        assert!(result.is_err(), "Expected error for missing directory digest");
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn download_to_directory_zero_digest_file_test(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        // Verifies that zero-digest (empty) files are created correctly.
+        // Zero-digest files have special handling and skip batch existence checks.
+        const EMPTY_FILE_NAME: &str = "empty.txt";
+        const NORMAL_FILE_NAME: &str = "normal.txt";
+        const NORMAL_CONTENT: &str = "NORMAL_DATA";
+
+        // SHA-256 of zero bytes.
+        const ZERO_HASH: [u8; 32] = [
+            0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f,
+            0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b,
+            0x78, 0x52, 0xb8, 0x55,
+        ];
+
+        let (fast_store, slow_store, cas_store, _ac_store) = setup_stores().await?;
+
+        let root_directory_digest = {
+            let zero_digest = DigestInfo::new(ZERO_HASH, 0);
+            let normal_digest = DigestInfo::new([80u8; 32], NORMAL_CONTENT.len() as u64);
+            slow_store
+                .as_ref()
+                .update_oneshot(normal_digest, NORMAL_CONTENT.into())
+                .await?;
+
+            let root_digest = DigestInfo::new([81u8; 32], 32);
+            let root_directory = Directory {
+                files: vec![
+                    FileNode {
+                        name: EMPTY_FILE_NAME.to_string(),
+                        digest: Some(zero_digest.into()),
+                        ..Default::default()
+                    },
+                    FileNode {
+                        name: NORMAL_FILE_NAME.to_string(),
+                        digest: Some(normal_digest.into()),
+                        ..Default::default()
+                    },
+                ],
+                ..Default::default()
+            };
+
+            slow_store
+                .as_ref()
+                .update_oneshot(root_digest, root_directory.encode_to_vec().into())
+                .await?;
+            root_digest
+        };
+
+        let download_dir = make_temp_path("download_dir_zero");
+        fs::create_dir_all(&download_dir).await?;
+        download_to_directory(
+            cas_store.as_ref(),
+            fast_store.as_pin(),
+            &root_directory_digest,
+            &download_dir,
+        )
+        .await?;
+
+        // Zero-digest file should exist and be empty.
+        let empty_path = format!("{download_dir}/{EMPTY_FILE_NAME}");
+        let empty_content = fs::read(&empty_path).await?;
+        assert_eq!(empty_content.len(), 0, "Zero-digest file should be empty");
+
+        // Normal file should also exist.
+        let normal_content = fs::read(format!("{download_dir}/{NORMAL_FILE_NAME}")).await?;
+        assert_eq!(from_utf8(&normal_content)?, NORMAL_CONTENT);
+
+        Ok(())
+    }
+
     #[nativelink_test]
     async fn ensure_output_files_full_directories_are_created_no_working_directory_test()
     -> Result<(), Box<dyn core::error::Error>> {
@@ -460,6 +961,7 @@ mod tests {
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -529,6 +1031,7 @@ mod tests {
                         queued_timestamp: None,
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .await?;
@@ -584,6 +1087,7 @@ mod tests {
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -655,6 +1159,7 @@ mod tests {
                         queued_timestamp: None,
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .await?;
@@ -689,7 +1194,7 @@ mod tests {
             monotonic_clock(&CLOCK)
         }
 
-        let (_, slow_store, cas_store, ac_store) = setup_stores().await?;
+        let (_, _slow_store, cas_store, ac_store) = setup_stores().await?;
         let root_action_directory = make_temp_path("root_action_directory");
         fs::create_dir_all(&root_action_directory).await?;
 
@@ -710,6 +1215,7 @@ mod tests {
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -797,23 +1303,24 @@ mod tests {
                         queued_timestamp: None,
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .await?;
 
             run_action(running_action_impl.clone()).await?
         };
-        let file_content = slow_store
+        let file_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.output_files[0].digest, 0, None)
             .await?;
         assert_eq!(from_utf8(&file_content)?, "123 ");
-        let stdout_content = slow_store
+        let stdout_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.stdout_digest, 0, None)
             .await?;
         assert_eq!(from_utf8(&stdout_content)?, "foo-stdout ");
-        let stderr_content = slow_store
+        let stderr_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.stderr_digest, 0, None)
             .await?;
@@ -871,7 +1378,7 @@ mod tests {
             monotonic_clock(&CLOCK)
         }
 
-        let (_, slow_store, cas_store, ac_store) = setup_stores().await?;
+        let (_, _slow_store, cas_store, ac_store) = setup_stores().await?;
         let root_action_directory = make_temp_path("root_action_directory");
         fs::create_dir_all(&root_action_directory).await?;
 
@@ -892,6 +1399,7 @@ mod tests {
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -978,23 +1486,24 @@ mod tests {
                         queued_timestamp: None,
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .await?;
 
             run_action(running_action_impl.clone()).await?
         };
-        let file_content = slow_store
+        let file_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.output_files[0].digest, 0, None)
             .await?;
         assert_eq!(from_utf8(&file_content)?, "123 ");
-        let stdout_content = slow_store
+        let stdout_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.stdout_digest, 0, None)
             .await?;
         assert_eq!(from_utf8(&stdout_content)?, "foo-stdout ");
-        let stderr_content = slow_store
+        let stderr_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.stderr_digest, 0, None)
             .await?;
@@ -1054,7 +1563,7 @@ mod tests {
             monotonic_clock(&CLOCK)
         }
 
-        let (_, slow_store, cas_store, ac_store) = setup_stores().await?;
+        let (_, _slow_store, cas_store, ac_store) = setup_stores().await?;
         let root_action_directory = make_temp_path("root_action_directory");
         fs::create_dir_all(&root_action_directory).await?;
 
@@ -1075,6 +1584,7 @@ mod tests {
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -1143,6 +1653,7 @@ mod tests {
                         queued_timestamp: Some(queued_timestamp.into()),
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .await?;
@@ -1150,7 +1661,7 @@ mod tests {
             run_action(running_action_impl.clone()).await?
         };
         let tree = get_and_decode_digest::<Tree>(
-            slow_store.as_ref(),
+            cas_store.as_ref(),
             action_result.output_folders[0].tree_digest.into(),
         )
         .await?;
@@ -1284,6 +1795,7 @@ mod tests {
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -1347,6 +1859,7 @@ mod tests {
                         queued_timestamp: Some(queued_timestamp.into()),
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .await?;
@@ -1420,6 +1933,7 @@ mod tests {
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         #[cfg(target_family = "unix")]
@@ -1497,6 +2011,7 @@ mod tests {
                     queued_timestamp: Some(make_system_time(1000).into()),
                     platform: action.platform.clone(),
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await?;
@@ -1624,6 +2139,7 @@ exit 0
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
         #[cfg(target_family = "unix")]
         let arguments = vec!["printf".to_string(), EXPECTED_STDOUT.to_string()];
@@ -1678,6 +2194,7 @@ exit 0
                     queued_timestamp: Some(make_system_time(1000).into()),
                     platform: action.platform.clone(),
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await?;
@@ -1801,6 +2318,7 @@ exit 0
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
         #[cfg(target_family = "unix")]
         let arguments = vec!["printf".to_string(), EXPECTED_STDOUT.to_string()];
@@ -1865,6 +2383,7 @@ exit 0
                     queued_timestamp: Some(make_system_time(1000).into()),
                     platform: action.platform.clone(),
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await?;
@@ -1972,6 +2491,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
         let arguments = vec!["true".to_string()];
         let command = Command {
@@ -2023,6 +2543,7 @@ exit 1
                     queued_timestamp: Some(make_system_time(1000).into()),
                     platform: action.platform.clone(),
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await?;
@@ -2057,6 +2578,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         let action_digest = DigestInfo::new([2u8; 32], 32);
@@ -2133,6 +2655,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         let action_digest = DigestInfo::new([2u8; 32], 32);
@@ -2215,6 +2738,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         let action_digest = DigestInfo::new([2u8; 32], 32);
@@ -2318,6 +2842,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         let action_digest = DigestInfo::new([2u8; 32], 32);
@@ -2365,6 +2890,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         let action_digest = DigestInfo::new([2u8; 32], 32);
@@ -2434,6 +2960,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         let action_digest = DigestInfo::new([2u8; 32], 32);
@@ -2554,6 +3081,7 @@ exit 1
                     max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                     timeout_handled_externally: false,
                     directory_cache: None,
+                    peer_locality_map: None,
                 },
                 Callbacks {
                     now_fn: test_monotonic_clock,
@@ -2582,6 +3110,7 @@ exit 1
                         queued_timestamp: Some(make_system_time(1000).into()),
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .and_then(|action| {
@@ -2642,6 +3171,7 @@ exit 1
                     max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                     timeout_handled_externally: false,
                     directory_cache: None,
+                    peer_locality_map: None,
                 },
                 Callbacks {
                     now_fn: test_monotonic_clock,
@@ -2670,6 +3200,7 @@ exit 1
                         queued_timestamp: Some(make_system_time(1000).into()),
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .and_then(|action| {
@@ -2730,6 +3261,7 @@ exit 1
                     max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                     timeout_handled_externally: false,
                     directory_cache: None,
+                    peer_locality_map: None,
                 },
                 Callbacks {
                     now_fn: test_monotonic_clock,
@@ -2758,6 +3290,7 @@ exit 1
                         queued_timestamp: Some(make_system_time(1000).into()),
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .and_then(|action| {
@@ -2815,6 +3348,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -2891,6 +3425,7 @@ exit 1
                     queued_timestamp: Some(make_system_time(1000).into()),
                     platform: action.platform.clone(),
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .and_then(|action| {
@@ -2968,6 +3503,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -3041,6 +3577,7 @@ exit 1
                     queued_timestamp: Some(make_system_time(1000).into()),
                     platform: action.platform.clone(),
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await?;
@@ -3138,6 +3675,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -3239,6 +3777,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
         let queued_timestamp = make_system_time(1000);
 
@@ -3296,6 +3835,7 @@ exit 1
                     queued_timestamp: Some(queued_timestamp.into()),
                     platform: action.platform.clone(),
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await?;
@@ -3326,7 +3866,7 @@ exit 1
             monotonic_clock(&CLOCK)
         }
 
-        let (_, slow_store, cas_store, ac_store) = setup_stores().await?;
+        let (_, _slow_store, cas_store, ac_store) = setup_stores().await?;
         let root_action_directory = make_temp_path("root_action_directory");
         fs::create_dir_all(&root_action_directory).await?;
 
@@ -3354,6 +3894,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -3431,23 +3972,24 @@ exit 1
                         queued_timestamp: None,
                         platform: action.platform.clone(),
                         worker_id: WORKER_ID.to_string(),
+                        peer_hints: Vec::new(),
                     },
                 )
                 .await?;
 
             run_action(running_action_impl.clone()).await?
         };
-        let file_content = slow_store
+        let file_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.output_files[0].digest, 0, None)
             .await?;
         assert_eq!(from_utf8(&file_content)?, "123 ");
-        let stdout_content = slow_store
+        let stdout_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.stdout_digest, 0, None)
             .await?;
         assert_eq!(from_utf8(&stdout_content)?, "foo-stdout ");
-        let stderr_content = slow_store
+        let stderr_content = cas_store
             .as_ref()
             .get_part_unchunked(action_result.stderr_digest, 0, None)
             .await?;
@@ -3535,6 +4077,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             },
             Callbacks {
                 now_fn: test_monotonic_clock,
@@ -3614,6 +4157,7 @@ exit 1
                     queued_timestamp: Some(make_system_time(1000).into()),
                     platform: action.platform.clone(),
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await?;
@@ -3656,6 +4200,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         // Create a simple action
@@ -3734,6 +4279,7 @@ exit 1
                     queued_timestamp: Some(SystemTime::now().into()),
                     platform: None,
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await;
@@ -3798,6 +4344,7 @@ exit 1
                 max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
                 timeout_handled_externally: false,
                 directory_cache: None,
+                peer_locality_map: None,
             })?);
 
         // Create a simple action
@@ -3846,6 +4393,7 @@ exit 1
                     queued_timestamp: Some(SystemTime::now().into()),
                     platform: None,
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await?;
@@ -3867,6 +4415,7 @@ exit 1
                     queued_timestamp: Some(SystemTime::now().into()),
                     platform: None,
                     worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
                 },
             )
             .await;
@@ -3884,4 +4433,273 @@ exit 1
         fs::remove_dir_all(&root_action_directory).await?;
         Ok(())
     }
+
+    /// Helper: set up a RunningActionsManagerImpl with stores, a root directory,
+    /// and a minimal action (empty command + empty input root) uploaded to the CAS.
+    /// Returns (manager, execute_request, action) for use in peer hint tests.
+    async fn setup_peer_hint_test(
+        peer_locality_map: Option<nativelink_util::blob_locality_map::SharedBlobLocalityMap>,
+    ) -> Result<
+        (
+            Arc<RunningActionsManagerImpl>,
+            ExecuteRequest,
+            Action,
+            String,
+        ),
+        Box<dyn core::error::Error>,
+    > {
+        let (_, _, cas_store, ac_store) = setup_stores().await?;
+        let root_action_directory = make_temp_path("root_action_directory");
+        fs::create_dir_all(&root_action_directory).await?;
+
+        let running_actions_manager =
+            Arc::new(RunningActionsManagerImpl::new(RunningActionsManagerArgs {
+                root_action_directory: root_action_directory.clone(),
+                execution_configuration: ExecutionConfiguration::default(),
+                cas_store: cas_store.clone(),
+                ac_store: Some(Store::new(ac_store.clone())),
+                historical_store: Store::new(cas_store.clone()),
+                upload_action_result_config:
+                    &nativelink_config::cas_server::UploadActionResultConfig {
+                        upload_ac_results_strategy:
+                            nativelink_config::cas_server::UploadCacheResultsStrategy::Never,
+                        ..Default::default()
+                    },
+                max_action_timeout: Duration::MAX,
+                max_upload_timeout: Duration::from_secs(DEFAULT_MAX_UPLOAD_TIMEOUT),
+                timeout_handled_externally: false,
+                directory_cache: None,
+                peer_locality_map,
+            })?);
+
+        // Upload a minimal command + empty input root + action to CAS.
+        #[cfg(target_family = "unix")]
+        let arguments = vec![
+            "sh".to_string(),
+            "-c".to_string(),
+            "true".to_string(),
+        ];
+        #[cfg(target_family = "windows")]
+        let arguments = vec![
+            "cmd".to_string(),
+            "/C".to_string(),
+            "echo ok".to_string(),
+        ];
+
+        let command = Command {
+            arguments,
+            output_paths: vec![],
+            working_directory: ".".to_string(),
+            environment_variables: vec![EnvironmentVariable {
+                name: "PATH".to_string(),
+                value: env::var("PATH").unwrap(),
+            }],
+            ..Default::default()
+        };
+        let command_digest = serialize_and_upload_message(
+            &command,
+            cas_store.as_pin(),
+            &mut DigestHasherFunc::Sha256.hasher(),
+        )
+        .await?;
+        let input_root_digest = serialize_and_upload_message(
+            &Directory::default(),
+            cas_store.as_pin(),
+            &mut DigestHasherFunc::Sha256.hasher(),
+        )
+        .await?;
+        let action = Action {
+            command_digest: Some(command_digest.into()),
+            input_root_digest: Some(input_root_digest.into()),
+            ..Default::default()
+        };
+        let action_digest = serialize_and_upload_message(
+            &action,
+            cas_store.as_pin(),
+            &mut DigestHasherFunc::Sha256.hasher(),
+        )
+        .await?;
+
+        let execute_request = ExecuteRequest {
+            action_digest: Some(action_digest.into()),
+            ..Default::default()
+        };
+
+        Ok((
+            running_actions_manager,
+            execute_request,
+            action,
+            root_action_directory,
+        ))
+    }
+
+    #[nativelink_test]
+    async fn test_peer_hints_registered_in_locality_map(
+    ) -> Result<(), Box<dyn core::error::Error>> {
+        const WORKER_ID: &str = "peer_hint_worker";
+
+        let locality_map = new_shared_blob_locality_map();
+        let (running_actions_manager, execute_request, action, root_action_directory) =
+            setup_peer_hint_test(Some(locality_map.clone())).await?;
+
+        let d1 = DigestInfo::new([0xAA; 32], 1000);
+        let d1_proto: Digest = d1.into();
+
+        let running_action = running_actions_manager
+            .clone()
+            .create_and_add_action(
+                WORKER_ID.to_string(),
+                StartExecute {
+                    execute_request: Some(execute_request),
+                    operation_id: OperationId::default().to_string(),
+                    queued_timestamp: Some(make_system_time(1000).into()),
+                    platform: action.platform.clone(),
+                    worker_id: WORKER_ID.to_string(),
+                    peer_hints: vec![PeerHint {
+                        digest: Some(d1_proto),
+                        peer_endpoints: vec!["worker-a:50081".to_string()],
+                    }],
+                },
+            )
+            .await?;
+
+        // Verify the locality map was populated.
+        {
+            let map = locality_map.read();
+            let workers = map.lookup_workers(&d1);
+            assert_eq!(workers.len(), 1, "Expected 1 endpoint for d1");
+            assert_eq!(&*workers[0], "worker-a:50081");
+        }
+
+        // Clean up.
+        running_action.cleanup().await?;
+        fs::remove_dir_all(&root_action_directory).await?;
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn test_empty_peer_hints_no_error() -> Result<(), Box<dyn core::error::Error>> {
+        const WORKER_ID: &str = "empty_hints_worker";
+
+        let locality_map = new_shared_blob_locality_map();
+        let (running_actions_manager, execute_request, action, root_action_directory) =
+            setup_peer_hint_test(Some(locality_map.clone())).await?;
+
+        let running_action = running_actions_manager
+            .clone()
+            .create_and_add_action(
+                WORKER_ID.to_string(),
+                StartExecute {
+                    execute_request: Some(execute_request),
+                    operation_id: OperationId::default().to_string(),
+                    queued_timestamp: Some(make_system_time(1000).into()),
+                    platform: action.platform.clone(),
+                    worker_id: WORKER_ID.to_string(),
+                    peer_hints: Vec::new(),
+                },
+            )
+            .await?;
+
+        // Locality map should be empty.
+        {
+            let map = locality_map.read();
+            assert_eq!(map.digest_count(), 0, "Expected no digests in locality map");
+            assert_eq!(
+                map.endpoint_count(),
+                0,
+                "Expected no endpoints in locality map"
+            );
+        }
+
+        running_action.cleanup().await?;
+        fs::remove_dir_all(&root_action_directory).await?;
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn test_peer_hints_without_locality_map() -> Result<(), Box<dyn core::error::Error>> {
+        const WORKER_ID: &str = "no_map_worker";
+
+        // Pass None for peer_locality_map.
+        let (running_actions_manager, execute_request, action, root_action_directory) =
+            setup_peer_hint_test(None).await?;
+
+        let d1 = DigestInfo::new([0xBB; 32], 500);
+        let d1_proto: Digest = d1.into();
+
+        // Should not panic or error even though peer_hints are provided.
+        let running_action = running_actions_manager
+            .clone()
+            .create_and_add_action(
+                WORKER_ID.to_string(),
+                StartExecute {
+                    execute_request: Some(execute_request),
+                    operation_id: OperationId::default().to_string(),
+                    queued_timestamp: Some(make_system_time(1000).into()),
+                    platform: action.platform.clone(),
+                    worker_id: WORKER_ID.to_string(),
+                    peer_hints: vec![PeerHint {
+                        digest: Some(d1_proto),
+                        peer_endpoints: vec!["worker-x:50081".to_string()],
+                    }],
+                },
+            )
+            .await?;
+
+        running_action.cleanup().await?;
+        fs::remove_dir_all(&root_action_directory).await?;
+        Ok(())
+    }
+
+    #[nativelink_test]
+    async fn test_multiple_endpoints_per_hint() -> Result<(), Box<dyn core::error::Error>> {
+        const WORKER_ID: &str = "multi_endpoint_worker";
+
+        let locality_map = new_shared_blob_locality_map();
+        let (running_actions_manager, execute_request, action, root_action_directory) =
+            setup_peer_hint_test(Some(locality_map.clone())).await?;
+
+        let d1 = DigestInfo::new([0xCC; 32], 2000);
+        let d1_proto: Digest = d1.into();
+
+        let running_action = running_actions_manager
+            .clone()
+            .create_and_add_action(
+                WORKER_ID.to_string(),
+                StartExecute {
+                    execute_request: Some(execute_request),
+                    operation_id: OperationId::default().to_string(),
+                    queued_timestamp: Some(make_system_time(1000).into()),
+                    platform: action.platform.clone(),
+                    worker_id: WORKER_ID.to_string(),
+                    peer_hints: vec![PeerHint {
+                        digest: Some(d1_proto),
+                        peer_endpoints: vec![
+                            "worker-a:50081".to_string(),
+                            "worker-b:50081".to_string(),
+                        ],
+                    }],
+                },
+            )
+            .await?;
+
+        // Both endpoints should be registered for d1.
+        {
+            let map = locality_map.read();
+            let workers = map.lookup_workers(&d1);
+            assert_eq!(workers.len(), 2, "Expected 2 endpoints for d1");
+            assert!(
+                workers.iter().any(|w| &**w == "worker-a:50081"),
+                "Expected worker-a:50081 in endpoints"
+            );
+            assert!(
+                workers.iter().any(|w| &**w == "worker-b:50081"),
+                "Expected worker-b:50081 in endpoints"
+            );
+        }
+
+        running_action.cleanup().await?;
+        fs::remove_dir_all(&root_action_directory).await?;
+        Ok(())
+    }
 }
diff --git a/nativelink-worker/tests/utils/local_worker_test_utils.rs b/nativelink-worker/tests/utils/local_worker_test_utils.rs
index a655fe613..3f79a09b1 100644
--- a/nativelink-worker/tests/utils/local_worker_test_utils.rs
+++ b/nativelink-worker/tests/utils/local_worker_test_utils.rs
@@ -32,13 +32,10 @@ use nativelink_worker::local_worker::LocalWorker;
 use nativelink_worker::worker_api_client_wrapper::WorkerApiClientTrait;
 use tokio::sync::{broadcast, mpsc};
 use tonic::Status;
-use tonic::{
-    Response,
-    Streaming,
-    codec::Codec, // Needed for .decoder().
-    codec::CompressionEncoding,
-    codec::ProstCodec,
-};
+use tonic::{Response, Streaming, codec::CompressionEncoding};
+use tonic_prost::ProstCodec;
+// Needed for .decoder().
+use tonic::codec::Codec;
 
 use super::mock_running_actions_manager::MockRunningActionsManager;
 
@@ -186,6 +183,13 @@ impl WorkerApiClientTrait for MockWorkerApiClient {
     async fn execution_complete(&mut self, _request: ExecuteComplete) -> Result<(), Error> {
         Ok(())
     }
+
+    async fn blobs_available(
+        &mut self,
+        _request: nativelink_proto::com::github::trace_machina::nativelink::remote_execution::BlobsAvailableNotification,
+    ) -> Result<(), Error> {
+        Ok(())
+    }
 }
 
 pub(crate) fn setup_grpc_stream() -> (
@@ -213,6 +217,8 @@ pub(crate) async fn setup_local_worker_with_config(
             Box::pin(async move { Ok(mock_worker_api_client) })
         }),
         Box::new(move |_| Box::pin(async move { /* No sleep */ })),
+        None, // No periodic BlobsAvailable in tests
+        None, // No CAS server guard in tests
     );
     let (shutdown_tx_test, _) = broadcast::channel::<ShutdownGuard>(BROADCAST_CAPACITY);
 
diff --git a/nativelink-worker/tests/utils/mock_running_actions_manager.rs b/nativelink-worker/tests/utils/mock_running_actions_manager.rs
index 4efe50132..254aa0850 100644
--- a/nativelink-worker/tests/utils/mock_running_actions_manager.rs
+++ b/nativelink-worker/tests/utils/mock_running_actions_manager.rs
@@ -183,6 +183,18 @@ impl RunningActionsManager for MockRunningActionsManager {
     fn metrics(&self) -> &Arc<Metrics> {
         &self.metrics
     }
+
+    async fn cached_directory_digests(&self) -> Vec<DigestInfo> {
+        Vec::new()
+    }
+
+    async fn all_subtree_digests(&self) -> Vec<DigestInfo> {
+        Vec::new()
+    }
+
+    async fn take_pending_subtree_changes(&self) -> (Vec<DigestInfo>, Vec<DigestInfo>) {
+        (Vec::new(), Vec::new())
+    }
 }
 
 #[derive(Debug)]
diff --git a/nativelink-worker/tests/worker_utils_test.rs b/nativelink-worker/tests/worker_utils_test.rs
index 62e16b574..a1cb01cc8 100644
--- a/nativelink-worker/tests/worker_utils_test.rs
+++ b/nativelink-worker/tests/worker_utils_test.rs
@@ -22,7 +22,7 @@ async fn make_connect_worker_request_with_extra_envs() -> Result<(), Error> {
     extra_envs.insert("PATH".into(), env::var("PATH").unwrap());
 
     let res =
-        make_connect_worker_request("1234".to_string(), &worker_properties, &extra_envs, 1).await?;
+        make_connect_worker_request("1234".to_string(), &worker_properties, &extra_envs, 1, String::new()).await?;
     assert_eq!(
         res.properties.first(),
         Some(&Property {
diff --git a/src/bin/nativelink.rs b/src/bin/nativelink.rs
index cfad2a0e4..9f8ee8b81 100644
--- a/src/bin/nativelink.rs
+++ b/src/bin/nativelink.rs
@@ -16,6 +16,7 @@ use core::net::SocketAddr;
 use core::time::Duration;
 use std::collections::{HashMap, HashSet};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
 
 use async_lock::Mutex as AsyncMutex;
 use axum::Router;
@@ -45,6 +46,7 @@ use nativelink_service::fetch_server::FetchServer;
 use nativelink_service::health_server::HealthServer;
 use nativelink_service::push_server::PushServer;
 use nativelink_service::worker_api_server::WorkerApiServer;
+use nativelink_util::blob_locality_map;
 use nativelink_store::default_store_factory::store_factory;
 use nativelink_store::store_manager::StoreManager;
 use nativelink_util::common::fs::set_open_file_limit;
@@ -63,6 +65,7 @@ use nativelink_util::{background_spawn, fs, spawn};
 use nativelink_worker::local_worker::new_local_worker;
 use rustls_pki_types::pem::PemObject;
 use rustls_pki_types::{CertificateRevocationListDer, PrivateKeyDer};
+use socket2::SockRef;
 use tokio::net::TcpListener;
 use tokio::select;
 #[cfg(target_family = "unix")]
@@ -145,7 +148,13 @@ impl RoutesExt for Routes {
 }
 
 /// If this value changes update the documentation in the config definition.
-const DEFAULT_MAX_DECODING_MESSAGE_SIZE: usize = 4 * 1024 * 1024;
+const DEFAULT_MAX_DECODING_MESSAGE_SIZE: usize = 64 * 1024 * 1024;
+
+/// Server-side encoding (response) limit.  Bazel's Java gRPC client defaults
+/// to 4 MiB max inbound message size, so we default to 4 MiB.  Workers that
+/// need larger responses should use a separate listener with a higher
+/// `max_encoding_message_size` in the config.
+const DEFAULT_MAX_ENCODING_MESSAGE_SIZE: usize = 4 * 1024 * 1024;
 
 macro_rules! service_setup {
     ($v: tt, $http_config: tt) => {{
@@ -156,6 +165,12 @@ macro_rules! service_setup {
             $http_config.max_decoding_message_size
         };
         service = service.max_decoding_message_size(max_decoding_message_size);
+        let max_encoding_message_size = if $http_config.max_encoding_message_size == 0 {
+            DEFAULT_MAX_ENCODING_MESSAGE_SIZE
+        } else {
+            $http_config.max_encoding_message_size
+        };
+        service = service.max_encoding_message_size(max_encoding_message_size);
         let send_algo = &$http_config.compression.send_compression_algorithm;
         if let Some(encoding) = into_encoding(send_algo.unwrap_or(HttpCompressionAlgorithm::None)) {
             service = service.send_compressed(encoding);
@@ -181,6 +196,7 @@ async fn inner_main(
     const fn into_encoding(from: HttpCompressionAlgorithm) -> Option<CompressionEncoding> {
         match from {
             HttpCompressionAlgorithm::Gzip => Some(CompressionEncoding::Gzip),
+            HttpCompressionAlgorithm::Zstd => Some(CompressionEncoding::Zstd),
             HttpCompressionAlgorithm::None => None,
         }
     }
@@ -229,11 +245,17 @@ async fn inner_main(
         })
         .transpose()?;
 
+    // Create a shared blob locality map for peer-to-peer blob sharing.
+    // This map is shared between the scheduler (for locality scoring and
+    // peer hint generation) and WorkerApiServer (for receiving
+    // BlobsAvailable updates from workers).
+    let locality_map = blob_locality_map::new_shared_blob_locality_map();
+
     let mut action_schedulers = HashMap::new();
     let mut worker_schedulers = HashMap::new();
     for SchedulerConfig { name, spec } in cfg.schedulers.iter().flatten() {
         let (maybe_action_scheduler, maybe_worker_scheduler) =
-            scheduler_factory(spec, &store_manager, maybe_origin_event_tx.as_ref())
+            scheduler_factory(spec, &store_manager, maybe_origin_event_tx.as_ref(), Some(locality_map.clone()))
                 .err_tip(|| format!("Failed to create scheduler '{name}'"))?;
         if let Some(action_scheduler) = maybe_action_scheduler {
             action_schedulers.insert(name.clone(), action_scheduler.clone());
@@ -245,6 +267,41 @@ async fn inner_main(
 
     let server_cfgs: Vec<ServerConfig> = cfg.servers.into_iter().collect();
 
+    // Wrap CAS stores with WorkerProxyStore so the server can proxy reads
+    // to workers that have the blob (discovered via BlobsAvailable reports).
+    {
+        let mut cas_store_names: HashSet<String> = HashSet::new();
+        for server_cfg in &server_cfgs {
+            if let Some(ref services) = server_cfg.services {
+                if let Some(ref cas_cfgs) = services.cas {
+                    for c in cas_cfgs {
+                        cas_store_names.insert(c.config.cas_store.clone());
+                    }
+                }
+                if let Some(ref bs_cfgs) = services.bytestream {
+                    for c in bs_cfgs {
+                        cas_store_names.insert(c.config.cas_store.clone());
+                    }
+                }
+            }
+        }
+        for store_name in &cas_store_names {
+            if let Some(original_store) = store_manager.get_store(store_name) {
+                let proxy_store = nativelink_util::store_trait::Store::new(
+                    nativelink_store::worker_proxy_store::WorkerProxyStore::new(
+                        original_store,
+                        locality_map.clone(),
+                    ),
+                );
+                store_manager.add_store(store_name, proxy_store);
+                info!(
+                    store_name,
+                    "Wrapped CAS store with WorkerProxyStore for peer blob sharing"
+                );
+            }
+        }
+    }
+
     for server_cfg in server_cfgs {
         let services = server_cfg
             .services
@@ -327,7 +384,7 @@ async fn inner_main(
                 services
                     .worker_api
                     .map_or(Ok(None), |cfg| {
-                        WorkerApiServer::new(&cfg, &worker_schedulers)
+                        WorkerApiServer::new(&cfg, &worker_schedulers, Some(locality_map.clone()))
                             .map(|v| Some(service_setup!(v, http_config)))
                     })
                     .err_tip(|| "Could not create WorkerApi service")?,
@@ -518,18 +575,27 @@ async fn inner_main(
                     || "Could not convert experimental_http2_max_pending_accept_reset_streams",
                 )?);
         }
-        if let Some(value) = http_config.experimental_http2_initial_stream_window_size {
-            http.http2().initial_stream_window_size(value);
-        }
-        if let Some(value) = http_config.experimental_http2_initial_connection_window_size {
-            http.http2().initial_connection_window_size(value);
-        }
+        // Default to 16 MiB stream window and 32 MiB connection window
+        // to avoid capping per-stream throughput at ~64 MB/s with 1ms RTT
+        // (hyper's default of 64 KiB is too small for high-bandwidth links).
+        http.http2().initial_stream_window_size(
+            http_config
+                .experimental_http2_initial_stream_window_size
+                .unwrap_or(16 * 1024 * 1024),
+        );
+        http.http2().initial_connection_window_size(
+            http_config
+                .experimental_http2_initial_connection_window_size
+                .unwrap_or(32 * 1024 * 1024),
+        );
         if let Some(value) = http_config.experimental_http2_adaptive_window {
             http.http2().adaptive_window(value);
         }
-        if let Some(value) = http_config.experimental_http2_max_frame_size {
-            http.http2().max_frame_size(value);
-        }
+        http.http2().max_frame_size(
+            http_config
+                .experimental_http2_max_frame_size
+                .unwrap_or(64 * 1024),
+        );
         if let Some(value) = http_config.experimental_http2_max_concurrent_streams {
             http.http2().max_concurrent_streams(value);
         }
@@ -537,11 +603,14 @@ async fn inner_main(
             http.http2()
                 .keep_alive_timeout(Duration::from_secs(u64::from(value)));
         }
-        if let Some(value) = http_config.experimental_http2_max_send_buf_size {
-            http.http2().max_send_buf_size(
-                usize::try_from(value).err_tip(|| "Could not convert http2_max_send_buf_size")?,
-            );
-        }
+        http.http2().max_send_buf_size(
+            usize::try_from(
+                http_config
+                    .experimental_http2_max_send_buf_size
+                    .unwrap_or(2 * 1024 * 1024),
+            )
+            .err_tip(|| "Could not convert http2_max_send_buf_size")?,
+        );
         if http_config.experimental_http2_enable_connect_protocol == Some(true) {
             http.http2().enable_connect_protocol();
         }
@@ -555,6 +624,25 @@ async fn inner_main(
                     accept_result = tcp_listener.accept() => {
                         match accept_result {
                             Ok((tcp_stream, remote_addr)) => {
+                                // Disable Nagle's algorithm to reduce latency
+                                // on small writes (e.g., gRPC frames).
+                                if let Err(err) = tcp_stream.set_nodelay(true) {
+                                    error!(
+                                        target: "nativelink::services",
+                                        ?err,
+                                        "Failed to set TCP_NODELAY"
+                                    );
+                                }
+                                // Enable TCP keepalive to detect dead connections.
+                                // Uses system defaults (tcp_keepalive_time/intvl/probes).
+                                let sock_ref = SockRef::from(&tcp_stream);
+                                if let Err(err) = sock_ref.set_keepalive(true) {
+                                    error!(
+                                        target: "nativelink::services",
+                                        ?err,
+                                        "Failed to set SO_KEEPALIVE"
+                                    );
+                                }
                                 info!(
                                     target: "nativelink::services",
                                     ?remote_addr,
@@ -710,6 +798,12 @@ fn get_config() -> Result<CasConfig, Error> {
     CasConfig::try_from_json5_file(&args.config_file)
 }
 
+/// Dump all thread stacks to a timestamped file for post-mortem analysis.
+/// Reads /proc/self/task/*/comm, status, wchan, and stack (if permitted).
+fn dump_thread_stacks() {
+    nativelink_util::stall_detector::dump_thread_stacks("runtime-watchdog");
+}
+
 fn main() -> Result<(), Box<dyn core::error::Error>> {
     #[expect(clippy::disallowed_methods, reason = "starting main runtime")]
     let runtime = tokio::runtime::Builder::new_multi_thread()
@@ -761,7 +855,7 @@ fn main() -> Result<(), Box<dyn core::error::Error>> {
         tokio::signal::ctrl_c()
             .await
             .expect("Failed to listen to SIGINT");
-        eprintln!("User terminated process via SIGINT");
+        error!("User terminated process via SIGINT");
         std::process::exit(130);
     });
 
@@ -785,6 +879,57 @@ fn main() -> Result<(), Box<dyn core::error::Error>> {
         std::process::exit(143);
     });
 
+    // Spawn a heartbeat task inside the tokio runtime and an external
+    // watchdog OS thread that detects when the runtime stalls.
+    let heartbeat_counter = Arc::new(AtomicU64::new(0));
+    let heartbeat_counter_task = heartbeat_counter.clone();
+    #[expect(clippy::disallowed_methods, reason = "runtime watchdog heartbeat")]
+    runtime.spawn(async move {
+        let mut ticker = tokio::time::interval(Duration::from_millis(500));
+        loop {
+            ticker.tick().await;
+            heartbeat_counter_task.fetch_add(1, Ordering::Relaxed);
+        }
+    });
+    std::thread::Builder::new()
+        .name("runtime-watchdog".to_string())
+        .spawn(move || {
+            let stall_threshold = Duration::from_secs(2);
+            let check_interval = Duration::from_secs(1);
+            loop {
+                let before = heartbeat_counter.load(Ordering::Relaxed);
+                std::thread::sleep(check_interval);
+                let after = heartbeat_counter.load(Ordering::Relaxed);
+                if before == after {
+                    let stall_start = std::time::Instant::now();
+                    let mut stall_logged = false;
+                    // Confirmed stall — wait until it resolves to measure duration.
+                    loop {
+                        std::thread::sleep(Duration::from_millis(100));
+                        let now = heartbeat_counter.load(Ordering::Relaxed);
+                        if now != after {
+                            let stall_duration = stall_start.elapsed();
+                            error!(
+                                "RUNTIME STALL RESOLVED: tokio runtime was unresponsive for {:.1}s (heartbeat stuck at {after})",
+                                stall_duration.as_secs_f64() + check_interval.as_secs_f64(),
+                            );
+                            break;
+                        }
+                        if !stall_logged && stall_start.elapsed() > stall_threshold {
+                            stall_logged = true;
+                            let total = stall_threshold.as_secs_f64()
+                                + check_interval.as_secs_f64();
+                            error!(
+                                "RUNTIME STALL IN PROGRESS: tokio runtime unresponsive for >{total:.1}s (heartbeat stuck at {after})",
+                            );
+                            dump_thread_stacks();
+                        }
+                    }
+                }
+            }
+        })
+        .expect("Failed to spawn runtime watchdog thread");
+
     #[expect(clippy::disallowed_methods, reason = "waiting on everything to finish")]
     runtime
         .block_on(async {
diff --git a/tests/blobs_available_integration_test.rs b/tests/blobs_available_integration_test.rs
new file mode 100644
index 000000000..2fc83d48f
--- /dev/null
+++ b/tests/blobs_available_integration_test.rs
@@ -0,0 +1,879 @@
+// Copyright 2025 The NativeLink Authors. All rights reserved.
+//
+// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License
+// (the "License"); you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    See LICENSE file for details
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Integration test: 1 nativelink server + 3 workers exercising BlobsAvailable.
+//!
+//! Verifies the callback-based BlobsAvailable reporting pipeline:
+//! 1. Workers connect and register with the scheduler
+//! 2. Each worker sends an initial full-snapshot BlobsAvailable
+//! 3. Blobs uploaded to a worker's CAS trigger the on_insert callback
+//! 4. The next periodic tick sends a delta with just the new blobs
+//! 5. The server processes notifications and populates the locality map
+//! 6. When a worker disconnects, the server cleans up the locality map
+
+use std::io::{BufRead, BufReader};
+use std::path::{Path, PathBuf};
+use std::process::{Child, Command, Stdio};
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::{Arc, Mutex};
+use std::time::Duration;
+
+use tracing::error;
+
+use nativelink_proto::build::bazel::remote::execution::v2::{
+    batch_update_blobs_request,
+    content_addressable_storage_client::ContentAddressableStorageClient, BatchReadBlobsRequest,
+    BatchUpdateBlobsRequest, Digest,
+};
+use sha2::{Digest as Sha2Digest, Sha256};
+use tempfile::TempDir;
+use tonic::metadata::MetadataValue;
+use tonic::transport::Channel;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/// Allocate a free TCP port by binding to port 0 and extracting the OS-assigned port.
+fn get_free_port() -> u16 {
+    let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+    listener.local_addr().unwrap().port()
+}
+
+struct Ports {
+    public: u16,
+    worker_api: u16,
+    cas: [u16; 3],
+}
+
+fn allocate_ports() -> Ports {
+    Ports {
+        public: get_free_port(),
+        worker_api: get_free_port(),
+        cas: [get_free_port(), get_free_port(), get_free_port()],
+    }
+}
+
+/// Write a JSON5 config with 1 server (2 listeners) + 3 workers.
+fn write_config(temp_dir: &Path, ports: &Ports) -> PathBuf {
+    let d = temp_dir.to_string_lossy().replace('\\', "/");
+    let config = format!(
+        r#"{{
+  stores: [
+    {{ name: "AC_STORE", memory: {{ eviction_policy: {{ max_bytes: 100000000 }} }} }},
+    {{ name: "SERVER_CAS", memory: {{ eviction_policy: {{ max_bytes: 100000000 }} }} }},
+    {{
+      name: "W1_STORE",
+      fast_slow: {{
+        fast: {{ filesystem: {{
+          content_path: "{d}/w1/cas",
+          temp_path: "{d}/w1/tmp",
+          eviction_policy: {{ max_bytes: 100000000 }},
+        }} }},
+        slow: {{ grpc: {{
+          instance_name: "main",
+          endpoints: [{{ address: "grpc://127.0.0.1:{public}" }}],
+          store_type: "cas",
+        }} }},
+        slow_direction: "get",
+      }},
+    }},
+    {{
+      name: "W2_STORE",
+      fast_slow: {{
+        fast: {{ filesystem: {{
+          content_path: "{d}/w2/cas",
+          temp_path: "{d}/w2/tmp",
+          eviction_policy: {{ max_bytes: 100000000 }},
+        }} }},
+        slow: {{ grpc: {{
+          instance_name: "main",
+          endpoints: [{{ address: "grpc://127.0.0.1:{public}" }}],
+          store_type: "cas",
+        }} }},
+        slow_direction: "get",
+      }},
+    }},
+    {{
+      name: "W3_STORE",
+      fast_slow: {{
+        fast: {{ filesystem: {{
+          content_path: "{d}/w3/cas",
+          temp_path: "{d}/w3/tmp",
+          eviction_policy: {{ max_bytes: 100000000 }},
+        }} }},
+        slow: {{ grpc: {{
+          instance_name: "main",
+          endpoints: [{{ address: "grpc://127.0.0.1:{public}" }}],
+          store_type: "cas",
+        }} }},
+        slow_direction: "get",
+      }},
+    }},
+  ],
+  schedulers: [
+    {{
+      name: "MAIN",
+      simple: {{
+        supported_platform_properties: {{ cpu_count: "minimum" }},
+      }},
+    }},
+  ],
+  workers: [
+    {{ local: {{
+      name: "worker-1",
+      worker_api_endpoint: {{ uri: "grpc://127.0.0.1:{wapi}" }},
+      cas_fast_slow_store: "W1_STORE",
+      cas_server_port: {c1},
+      blobs_available_interval_ms: 200,
+      work_directory: "{d}/w1/work",
+      upload_action_result: {{ upload_ac_results_strategy: "never" }},
+      platform_properties: {{ cpu_count: {{ values: ["1"] }} }},
+    }} }},
+    {{ local: {{
+      name: "worker-2",
+      worker_api_endpoint: {{ uri: "grpc://127.0.0.1:{wapi}" }},
+      cas_fast_slow_store: "W2_STORE",
+      cas_server_port: {c2},
+      blobs_available_interval_ms: 200,
+      work_directory: "{d}/w2/work",
+      upload_action_result: {{ upload_ac_results_strategy: "never" }},
+      platform_properties: {{ cpu_count: {{ values: ["1"] }} }},
+    }} }},
+    {{ local: {{
+      name: "worker-3",
+      worker_api_endpoint: {{ uri: "grpc://127.0.0.1:{wapi}" }},
+      cas_fast_slow_store: "W3_STORE",
+      cas_server_port: {c3},
+      blobs_available_interval_ms: 200,
+      work_directory: "{d}/w3/work",
+      upload_action_result: {{ upload_ac_results_strategy: "never" }},
+      platform_properties: {{ cpu_count: {{ values: ["1"] }} }},
+    }} }},
+  ],
+  servers: [
+    {{
+      name: "public",
+      listener: {{ http: {{ socket_address: "127.0.0.1:{public}" }} }},
+      services: {{
+        cas: [{{ instance_name: "main", cas_store: "SERVER_CAS" }}],
+        bytestream: [{{ instance_name: "main", cas_store: "SERVER_CAS" }}],
+        capabilities: [{{ instance_name: "main", remote_execution: {{ scheduler: "MAIN" }} }}],
+      }},
+    }},
+    {{
+      name: "worker_api",
+      listener: {{ http: {{ socket_address: "127.0.0.1:{wapi}" }} }},
+      services: {{
+        worker_api: {{ scheduler: "MAIN" }},
+      }},
+    }},
+  ],
+}}"#,
+        d = d,
+        wapi = ports.worker_api,
+        c1 = ports.cas[0],
+        c2 = ports.cas[1],
+        c3 = ports.cas[2],
+        public = ports.public,
+    );
+    let config_path = temp_dir.join("config.json5");
+    std::fs::write(&config_path, config).unwrap();
+    config_path
+}
+
+/// Compute SHA-256 digest of data, returning (hex_hash, size).
+fn sha256_digest(data: &[u8]) -> (String, i64) {
+    let mut hasher = Sha256::new();
+    hasher.update(data);
+    let hash = format!("{:x}", hasher.finalize());
+    (hash, data.len() as i64)
+}
+
+/// Holds a spawned nativelink process and its collected log lines.
+struct NativeLinkProcess {
+    child: Child,
+    log_lines: Arc<Mutex<Vec<String>>>,
+    /// Set to false when stderr reader thread finishes (child exited).
+    child_alive: Arc<AtomicBool>,
+}
+
+impl NativeLinkProcess {
+    /// Spawn the nativelink binary with the given config file.
+    fn spawn(config_path: &Path) -> Self {
+        let binary = env!("CARGO_BIN_EXE_nativelink");
+
+        let mut child = Command::new(binary)
+            .arg(config_path.to_str().unwrap())
+            .env(
+                "RUST_LOG",
+                "nativelink=trace,nativelink_worker=trace,nativelink_service=trace",
+            )
+            // Disable ANSI color codes for easier log parsing.
+            .env("NO_COLOR", "1")
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .spawn()
+            .expect("Failed to spawn nativelink binary");
+
+        let log_lines: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
+        let child_alive = Arc::new(AtomicBool::new(true));
+
+        // Collect stderr lines in a background thread.
+        let stderr = child.stderr.take().expect("Failed to capture stderr");
+        let log_lines_stderr = log_lines.clone();
+        let child_alive_stderr = child_alive.clone();
+        std::thread::spawn(move || {
+            let reader = BufReader::new(stderr);
+            for line in reader.lines() {
+                match line {
+                    Ok(line) => {
+                        log_lines_stderr.lock().unwrap().push(line);
+                    }
+                    Err(_) => break,
+                }
+            }
+            child_alive_stderr.store(false, Ordering::Relaxed);
+        });
+
+        // Also collect stdout in case tracing writes there.
+        let stdout = child.stdout.take().expect("Failed to capture stdout");
+        let log_lines_stdout = log_lines.clone();
+        std::thread::spawn(move || {
+            let reader = BufReader::new(stdout);
+            for line in reader.lines() {
+                match line {
+                    Ok(line) => {
+                        log_lines_stdout.lock().unwrap().push(line);
+                    }
+                    Err(_) => break,
+                }
+            }
+        });
+
+        Self { child, log_lines, child_alive }
+    }
+
+    /// Wait until at least `count` log lines matching `pattern` appear.
+    /// Returns false if the deadline expires or the child process exits.
+    async fn wait_for_log_count(&self, pattern: &str, count: usize, timeout: Duration) -> bool {
+        let deadline = tokio::time::Instant::now() + timeout;
+        loop {
+            {
+                let lines = self.log_lines.lock().unwrap();
+                let found = lines.iter().filter(|l| l.contains(pattern)).count();
+                if found >= count {
+                    return true;
+                }
+            }
+            if tokio::time::Instant::now() > deadline {
+                return false;
+            }
+            // Fail fast if the child process has exited.
+            if !self.child_alive.load(Ordering::Relaxed) {
+                // Give a brief moment for final log lines to flush.
+                tokio::time::sleep(Duration::from_millis(200)).await;
+                let lines = self.log_lines.lock().unwrap();
+                let found = lines.iter().filter(|l| l.contains(pattern)).count();
+                if found < count {
+                    error!(
+                        "!!! Child process exited while waiting for pattern={:?} count={} (found {}). Last 30 lines:",
+                        pattern, count, found,
+                    );
+                    for line in lines.iter().rev().take(30).collect::<Vec<_>>().into_iter().rev() {
+                        error!("  {line}");
+                    }
+                }
+                return found >= count;
+            }
+            tokio::time::sleep(Duration::from_millis(100)).await;
+        }
+    }
+
+    /// Count how many log lines match `pattern`.
+    fn count_logs(&self, pattern: &str) -> usize {
+        let lines = self.log_lines.lock().unwrap();
+        lines.iter().filter(|l| l.contains(pattern)).count()
+    }
+
+    /// Get all log lines matching `pattern`.
+    fn grep_logs(&self, pattern: &str) -> Vec<String> {
+        let lines = self.log_lines.lock().unwrap();
+        lines
+            .iter()
+            .filter(|l| l.contains(pattern))
+            .cloned()
+            .collect()
+    }
+}
+
+impl Drop for NativeLinkProcess {
+    fn drop(&mut self) {
+        // Send SIGKILL to stop the process.
+        let _ = self.child.kill();
+        let _ = self.child.wait();
+    }
+}
+
+/// Upload a blob to a worker's CAS endpoint via BatchUpdateBlobs.
+async fn upload_blob_to_worker_cas(
+    port: u16,
+    data: &[u8],
+) -> Result<(), Box<dyn std::error::Error>> {
+    let channel = Channel::from_shared(format!("http://127.0.0.1:{port}"))
+        .unwrap()
+        .connect_timeout(Duration::from_secs(5))
+        .timeout(Duration::from_secs(10))
+        .connect()
+        .await?;
+
+    let mut client = ContentAddressableStorageClient::new(channel);
+
+    let (hash, size) = sha256_digest(data);
+
+    let request = BatchUpdateBlobsRequest {
+        instance_name: String::new(),
+        requests: vec![batch_update_blobs_request::Request {
+            digest: Some(Digest {
+                hash,
+                size_bytes: size,
+            }),
+            data: data.to_vec().into(),
+            compressor: 0,
+        }],
+        digest_function: 0, // SHA256
+    };
+
+    client.batch_update_blobs(request).await?;
+    Ok(())
+}
+
+/// Read a blob from a CAS endpoint via BatchReadBlobs.
+/// Returns Ok(data) on success, or Err on gRPC/transport error.
+/// A gRPC OK with a non-OK status in the response means the blob was not found.
+async fn read_blob_from_cas(
+    port: u16,
+    instance_name: &str,
+    hash: &str,
+    size: i64,
+) -> Result<Option<Vec<u8>>, Box<dyn std::error::Error>> {
+    let channel = Channel::from_shared(format!("http://127.0.0.1:{port}"))
+        .unwrap()
+        .connect_timeout(Duration::from_secs(5))
+        .timeout(Duration::from_secs(10))
+        .connect()
+        .await?;
+
+    let mut client = ContentAddressableStorageClient::new(channel);
+
+    let request = BatchReadBlobsRequest {
+        instance_name: instance_name.to_string(),
+        digests: vec![Digest {
+            hash: hash.to_string(),
+            size_bytes: size,
+        }],
+        acceptable_compressors: vec![],
+        digest_function: 0,
+    };
+
+    let response = client.batch_read_blobs(request).await?;
+    let inner = response.into_inner();
+
+    if let Some(resp) = inner.responses.first() {
+        // status code 0 = OK
+        if resp.status.as_ref().is_some_and(|s| s.code == 0) {
+            return Ok(Some(resp.data.to_vec()));
+        }
+    }
+    Ok(None)
+}
+
+/// Represents a per-digest result from BatchReadBlobs.
+#[allow(dead_code)]
+struct CasReadResult {
+    /// gRPC status code (0 = OK, 14 = Unavailable, 5 = NotFound, etc.)
+    code: i32,
+    /// Status message (may contain redirect prefix for worker requests).
+    message: String,
+    /// Blob data (empty if not OK).
+    data: Vec<u8>,
+}
+
+/// Read a blob from a CAS endpoint with the `x-nativelink-worker` header set,
+/// simulating a worker-to-server request. Returns the raw per-digest result.
+async fn read_blob_from_cas_as_worker(
+    port: u16,
+    instance_name: &str,
+    hash: &str,
+    size: i64,
+) -> Result<CasReadResult, Box<dyn std::error::Error>> {
+    let channel = Channel::from_shared(format!("http://127.0.0.1:{port}"))
+        .unwrap()
+        .connect_timeout(Duration::from_secs(5))
+        .timeout(Duration::from_secs(10))
+        .connect()
+        .await?;
+
+    let mut client = ContentAddressableStorageClient::new(channel);
+
+    let mut request = tonic::Request::new(BatchReadBlobsRequest {
+        instance_name: instance_name.to_string(),
+        digests: vec![Digest {
+            hash: hash.to_string(),
+            size_bytes: size,
+        }],
+        acceptable_compressors: vec![],
+        digest_function: 0,
+    });
+    // Mark this as a worker request so the server returns a redirect
+    // instead of proxying the blob data.
+    request
+        .metadata_mut()
+        .insert("x-nativelink-worker", MetadataValue::from_static("true"));
+
+    let response = client.batch_read_blobs(request).await?;
+    let inner = response.into_inner();
+
+    let resp = inner
+        .responses
+        .into_iter()
+        .next()
+        .expect("Expected at least one response");
+    let status = resp.status.unwrap_or_default();
+    Ok(CasReadResult {
+        code: status.code,
+        message: status.message,
+        data: resp.data.to_vec(),
+    })
+}
+
+// ---------------------------------------------------------------------------
+// Test
+// ---------------------------------------------------------------------------
+
+/// Verify the full BlobsAvailable pipeline with 3 workers.
+///
+/// Steps:
+/// 1. Start a nativelink server with 3 workers, each with a CAS port
+/// 2. Wait for all workers to register and start BlobsAvailable reporting
+/// 3. Verify that each worker sends an initial full-snapshot BlobsAvailable
+/// 4. Upload unique blobs to each worker's CAS endpoint
+/// 5. Wait for the next periodic tick to send a delta BlobsAvailable
+/// 6. Verify the server logs show the blobs being registered in the locality map
+/// 7. Shutdown and verify cleanup
+#[tokio::test(flavor = "multi_thread")]
+async fn test_blobs_available_three_workers() {
+    let temp_dir = TempDir::new().expect("Failed to create temp dir");
+    let ports = allocate_ports();
+    let config_path = write_config(temp_dir.path(), &ports);
+
+    // --- Phase 1: Start the server ---
+
+    let process = NativeLinkProcess::spawn(&config_path);
+
+    // Wait for both server listeners to be ready.
+    let startup_timeout = Duration::from_secs(30);
+    assert!(
+        process
+            .wait_for_log_count("Ready, listening on", 2, startup_timeout)
+            .await,
+        "Server did not start both listeners within timeout. \
+         Lines captured: {}. Last 20 lines:\n{}",
+        process.log_lines.lock().unwrap().len(),
+        {
+            let lines = process.log_lines.lock().unwrap();
+            lines.iter().rev().take(20).rev().cloned().collect::<Vec<_>>().join("\n")
+        },
+    );
+
+
+    // --- Phase 2: Wait for all 3 workers to connect ---
+    assert!(
+        process
+            .wait_for_log_count("Worker registered with scheduler", 3, Duration::from_secs(15))
+            .await,
+        "Not all 3 workers registered. Found {} registrations. Logs:\n{}",
+        process.count_logs("Worker registered with scheduler"),
+        process.grep_logs("Worker registered").join("\n"),
+    );
+
+    // --- Phase 3: Verify BlobsAvailable reporting was registered ---
+    assert!(
+        process
+            .wait_for_log_count(
+                "Registered periodic BlobsAvailable reporting",
+                3,
+                Duration::from_secs(5),
+            )
+            .await,
+        "Not all 3 workers registered BlobsAvailable callbacks. Found {}.",
+        process.count_logs("Registered periodic BlobsAvailable reporting"),
+    );
+
+    // --- Phase 4: Wait for initial full-snapshot BlobsAvailable ---
+    // Each worker sends a full snapshot (is_first=true) on the first periodic tick.
+    // blobs_available_interval_ms=200, so this should happen within ~1 second.
+    assert!(
+        process
+            .wait_for_log_count("Sent periodic BlobsAvailable", 3, Duration::from_secs(5))
+            .await,
+        "Not all 3 workers sent initial BlobsAvailable. Found {}.",
+        process.count_logs("Sent periodic BlobsAvailable"),
+    );
+
+    // Verify that the initial snapshots had is_first=true.
+    let initial_logs = process.grep_logs("Sent periodic BlobsAvailable");
+    let is_first_count = initial_logs.iter().filter(|l| l.contains("is_first=true") || l.contains("is_first: true")).count();
+    assert!(
+        is_first_count >= 3,
+        "Expected at least 3 is_first=true BlobsAvailable, found {is_first_count}. Logs:\n{}",
+        initial_logs.join("\n"),
+    );
+
+
+    // --- Phase 5: Upload blobs to each worker's CAS ---
+    // Capture the send count BEFORE uploads so we can detect new delta sends.
+    let before_upload_send_count = process.count_logs("Sent periodic BlobsAvailable");
+    let blob_data: Vec<Vec<u8>> = vec![
+        b"Hello from worker-1! This is test blob data.".to_vec(),
+        b"Hello from worker-2! Different test blob data.".to_vec(),
+        b"Hello from worker-3! Yet another test blob.".to_vec(),
+    ];
+
+    for (i, data) in blob_data.iter().enumerate() {
+        let port = ports.cas[i];
+        // Retry a few times in case the worker CAS server isn't ready yet.
+        let mut uploaded = false;
+        for _ in 0..10 {
+            match upload_blob_to_worker_cas(port, data).await {
+                Ok(()) => {
+                    uploaded = true;
+                    break;
+                }
+                Err(_) => {
+                    tokio::time::sleep(Duration::from_millis(500)).await;
+                }
+            }
+        }
+        assert!(uploaded, "Failed to upload blob to worker-{}", i + 1);
+    }
+
+    // --- Phase 6: Wait for delta BlobsAvailable with the new blobs ---
+    // After uploading, the BlobChangeTracker's on_insert callback fires.
+    // The next periodic tick (within 200ms) will send a delta.
+    // We captured before_upload_send_count before uploads started.
+    assert!(
+        process
+            .wait_for_log_count(
+                "Sent periodic BlobsAvailable",
+                before_upload_send_count + 3,
+                Duration::from_secs(5),
+            )
+            .await,
+        "Workers did not send delta BlobsAvailable after blob upload. \
+         Had {before_upload_send_count} sends before upload, now have {}.",
+        process.count_logs("Sent periodic BlobsAvailable"),
+    );
+
+    // --- Phase 7: Verify server-side logging ---
+    // The WorkerApiServer should log "Registering blobs available from worker"
+    // for both the initial snapshot and the delta.
+    let server_register_count = process.count_logs("Registering blobs available from worker");
+    assert!(
+        server_register_count >= 3,
+        "Expected at least 3 'Registering blobs available from worker' logs, found {server_register_count}.",
+    );
+
+    // --- Phase 8: Verify delta-specific behavior ---
+    // After the initial full snapshot, subsequent sends should be deltas.
+    let all_sends = process.grep_logs("Sent periodic BlobsAvailable");
+    let delta_sends = all_sends
+        .iter()
+        .filter(|l| l.contains("is_first=false") || l.contains("is_first: false"))
+        .count();
+    assert!(
+        delta_sends >= 3,
+        "Expected at least 3 delta BlobsAvailable sends (is_first=false), found {delta_sends}.",
+    );
+
+
+    // --- Phase 10: Verify no-change ticks are skipped (trace level) ---
+    // Workers that have no changes since last tick should log
+    // "BlobsAvailable: no changes since last tick, skipping" at trace level.
+    // Give a little extra time for ticks with no changes.
+    tokio::time::sleep(Duration::from_millis(500)).await;
+    let skip_count = process.count_logs("no changes since last tick, skipping");
+    // We expect at least some skips once the delta has been sent and there
+    // are no further changes.
+    assert!(
+        skip_count > 0,
+        "Expected at least some 'no changes since last tick, skipping' trace logs \
+         (workers should skip sending when there are no new changes).",
+    );
+
+    // --- Phase 11: Verify the starting CAS server logs ---
+    let cas_server_logs = process.grep_logs("Starting worker CAS server for peer blob sharing");
+    assert_eq!(
+        cas_server_logs.len(),
+        3,
+        "Expected 3 worker CAS server start logs, found {}. Logs:\n{}",
+        cas_server_logs.len(),
+        cas_server_logs.join("\n"),
+    );
+
+
+    // --- Phase 12: Worker-2 reads blob from Worker-1 via peer sharing ---
+    // Upload a unique blob to Worker-1's CAS only. After BlobsAvailable
+    // propagates to the server's locality map, Worker-2 can fetch the blob
+    // through the chain: Worker-2 CAS → slow store (GrpcStore → server) →
+    // server WorkerProxyStore → locality map → Worker-1 CAS.
+    let cross_worker_blob = b"cross-worker test blob for peer sharing";
+    let (cw_hash, cw_size) = sha256_digest(cross_worker_blob);
+
+    // Capture count BEFORE the upload so the delta is not missed.
+    let before_register = process.count_logs("Registering blobs available from worker");
+
+    // Upload to Worker-1's CAS.
+    upload_blob_to_worker_cas(ports.cas[0], cross_worker_blob)
+        .await
+        .expect("Failed to upload cross-worker blob to worker-1");
+
+    // Read the blob back from Worker-1's CAS — should succeed directly.
+    let data = read_blob_from_cas(ports.cas[0], "", &cw_hash, cw_size)
+        .await
+        .expect("gRPC read from worker-1 failed");
+    assert_eq!(
+        data.as_deref(),
+        Some(cross_worker_blob.as_slice()),
+        "Blob read from worker-1's CAS should match uploaded data",
+    );
+    assert!(
+        process
+            .wait_for_log_count(
+                "Registering blobs available from worker",
+                before_register + 1,
+                Duration::from_secs(5),
+            )
+            .await,
+        "Server did not register BlobsAvailable after cross-worker blob upload.",
+    );
+
+    // Now read from Worker-2's CAS — Worker-2 doesn't have the blob locally,
+    // so its effective_cas_store chain kicks in:
+    //   fast (FilesystemStore) miss → slow (WorkerProxyStore(GrpcStore → server))
+    //   → server redirects → WorkerProxyStore follows redirect → Worker-1 → success
+    let data = read_blob_from_cas(ports.cas[1], "", &cw_hash, cw_size)
+        .await
+        .expect("gRPC read from worker-2 failed");
+
+    assert_eq!(
+        data.as_deref(),
+        Some(cross_worker_blob.as_slice()),
+        "Worker-2 should fetch the blob from Worker-1 via peer sharing",
+    );
+
+    // --- Phase 13: Server proxies CAS read to a worker ---
+    // The server's CAS (SERVER_CAS) is an empty MemoryStore wrapped with
+    // WorkerProxyStore. When a blob is not found locally, WorkerProxyStore
+    // consults the server-side locality map (populated by BlobsAvailable)
+    // and proxies the read to the worker that has it.
+
+    // Upload a unique blob to Worker-3's CAS.
+    let proxy_blob = b"proxy test blob - only on worker-3";
+    let (px_hash, px_size) = sha256_digest(proxy_blob);
+
+    // Capture count BEFORE the upload so the delta is not missed.
+    let before_register = process.count_logs("Registering blobs available from worker");
+
+    upload_blob_to_worker_cas(ports.cas[2], proxy_blob)
+        .await
+        .expect("Failed to upload proxy blob to worker-3");
+    assert!(
+        process
+            .wait_for_log_count(
+                "Registering blobs available from worker",
+                before_register + 1,
+                Duration::from_secs(5),
+            )
+            .await,
+        "Server did not register new BlobsAvailable after proxy blob upload.",
+    );
+
+    // Now read the blob via the server's public CAS endpoint.
+    // The server's MemoryStore doesn't have it, so WorkerProxyStore should
+    // proxy the read to Worker-3's CAS.
+    let data = read_blob_from_cas(ports.public, "main", &px_hash, px_size)
+        .await
+        .expect("gRPC read from server failed");
+
+    assert_eq!(
+        data.as_deref(),
+        Some(proxy_blob.as_slice()),
+        "Server should proxy the CAS read to worker-3 and return the blob",
+    );
+
+    // Verify the WorkerProxyStore logged the proxy operation.
+    assert!(
+        process
+            .wait_for_log_count(
+                "WorkerProxyStore: successfully proxied blob from worker",
+                1,
+                Duration::from_secs(3),
+            )
+            .await,
+        "Expected WorkerProxyStore to log successful proxy read. Logs:\n{}",
+        process
+            .grep_logs("WorkerProxyStore")
+            .join("\n"),
+    );
+
+    // --- Phase 14: Verify proxy vs redirect behavior ---
+    // Non-worker requests to the server's CAS should get proxied data.
+    // Worker requests (with x-nativelink-worker header) should get a redirect.
+
+    // Upload a fresh blob to Worker-1 for this test.
+    let redirect_blob = b"redirect vs proxy test blob - only on worker-1";
+    let (rd_hash, rd_size) = sha256_digest(redirect_blob);
+
+    // Capture count BEFORE the upload so the delta is not missed.
+    let before_register = process.count_logs("Registering blobs available from worker");
+
+    upload_blob_to_worker_cas(ports.cas[0], redirect_blob)
+        .await
+        .expect("Failed to upload redirect test blob to worker-1");
+    assert!(
+        process
+            .wait_for_log_count(
+                "Registering blobs available from worker",
+                before_register + 1,
+                Duration::from_secs(5),
+            )
+            .await,
+        "Server did not register BlobsAvailable for redirect test blob.",
+    );
+
+    // 14a: Non-worker request → server proxies data back.
+    let data = read_blob_from_cas(ports.public, "main", &rd_hash, rd_size)
+        .await
+        .expect("Non-worker read from server failed");
+    assert_eq!(
+        data.as_deref(),
+        Some(redirect_blob.as_slice()),
+        "Non-worker request should get proxied blob data from the server",
+    );
+
+    // 14b: Worker request → server returns redirect with peer endpoints.
+    let result = read_blob_from_cas_as_worker(ports.public, "main", &rd_hash, rd_size)
+        .await
+        .expect("Worker read from server failed at transport level");
+    // The server should return FailedPrecondition (code 9) with NL_REDIRECT:
+    // prefix containing the worker endpoint(s) that have the blob.
+    // FailedPrecondition is used instead of Unavailable so the GrpcStore
+    // retrier does not waste time retrying what is actually a redirect.
+    assert_eq!(
+        result.code, 9, // Code::FailedPrecondition
+        "Worker request should get FailedPrecondition redirect, got code={} message={:?}",
+        result.code, result.message,
+    );
+    assert!(
+        result.message.contains("NL_REDIRECT:"),
+        "Worker redirect message should contain NL_REDIRECT: prefix, got: {:?}",
+        result.message,
+    );
+    // The redirect should contain Worker-1's CAS endpoint.
+    // Workers advertise as grpc://<hostname>:<port>, so check for the port.
+    let expected_port_suffix = format!(":{}", ports.cas[0]);
+    assert!(
+        result.message.contains(&expected_port_suffix),
+        "Redirect should contain worker-1's CAS port ({}), got: {:?}",
+        expected_port_suffix, result.message,
+    );
+
+    // --- Phase 15: Multi-worker redirect lists all endpoints ---
+    // Upload a blob to Worker-1, then read it from Worker-2 (which populates
+    // Worker-2's CAS via the peer fetch). After Worker-2's BlobsAvailable
+    // propagates, a worker request to the server should get a redirect
+    // listing BOTH Worker-1 and Worker-2 as endpoints.
+    let multi_blob = b"multi-redirect test blob for phase 15";
+    let (multi_hash, multi_size) = sha256_digest(multi_blob);
+
+    let before_register = process.count_logs("Registering blobs available from worker");
+
+    // Upload to Worker-1.
+    upload_blob_to_worker_cas(ports.cas[0], multi_blob)
+        .await
+        .expect("Failed to upload multi-redirect blob to worker-1");
+
+    // Wait for the server to register the blob from Worker-1.
+    assert!(
+        process
+            .wait_for_log_count(
+                "Registering blobs available from worker",
+                before_register + 1,
+                Duration::from_secs(5),
+            )
+            .await,
+        "Server did not register BlobsAvailable for multi-redirect blob.",
+    );
+
+    let before_register = process.count_logs("Registering blobs available from worker");
+
+    // Read from Worker-2's CAS — this triggers peer fetch from Worker-1,
+    // populating Worker-2's local CAS.
+    let data = read_blob_from_cas(ports.cas[1], "", &multi_hash, multi_size)
+        .await
+        .expect("Worker-2 peer fetch failed for multi-redirect blob");
+    assert_eq!(
+        data.as_deref(),
+        Some(multi_blob.as_slice()),
+        "Worker-2 should fetch multi-redirect blob from Worker-1",
+    );
+
+    // Wait for Worker-2's BlobsAvailable to propagate the newly cached blob.
+    assert!(
+        process
+            .wait_for_log_count(
+                "Registering blobs available from worker",
+                before_register + 1,
+                Duration::from_secs(5),
+            )
+            .await,
+        "Server did not register Worker-2's BlobsAvailable after peer fetch.",
+    );
+
+    // Now a worker request should get a redirect listing BOTH workers.
+    let result = read_blob_from_cas_as_worker(ports.public, "main", &multi_hash, multi_size)
+        .await
+        .expect("Worker read for multi-redirect failed");
+    assert_eq!(
+        result.code, 9,
+        "Multi-redirect should use FailedPrecondition, got code={} message={:?}",
+        result.code, result.message,
+    );
+    assert!(
+        result.message.contains("NL_REDIRECT:"),
+        "Multi-redirect should contain NL_REDIRECT: prefix, got: {:?}",
+        result.message,
+    );
+    // Both Worker-1 and Worker-2 CAS ports should appear in the redirect.
+    let w1_port = format!(":{}", ports.cas[0]);
+    let w2_port = format!(":{}", ports.cas[1]);
+    assert!(
+        result.message.contains(&w1_port) && result.message.contains(&w2_port),
+        "Redirect should list both worker-1 ({}) and worker-2 ({}), got: {:?}",
+        w1_port, w2_port, result.message,
+    );
+
+    // Process is killed on drop.
+}
diff --git a/tests/execute_peer_sharing_test.rs b/tests/execute_peer_sharing_test.rs
new file mode 100644
index 000000000..d7a01b688
--- /dev/null
+++ b/tests/execute_peer_sharing_test.rs
@@ -0,0 +1,734 @@
+// Copyright 2025 The NativeLink Authors. All rights reserved.
+//
+// Licensed under the Functional Source License, Version 1.1, Apache 2.0 Future License
+// (the "License"); you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    See LICENSE file for details
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Integration test: Execute dependent actions where the second action's
+//! inputs are fetched from the first action's worker via peer-to-peer blob
+//! sharing (WorkerProxyStore redirects).
+//!
+//! Topology:
+//!   - 1 nativelink server (CAS + Execution + WorkerApi)
+//!   - 2 workers with peer CAS servers and distinct `worker_id` properties
+//!
+//! Flow:
+//!   1. Action A targets worker-1, produces output blob
+//!   2. BlobsAvailable propagates output digests to the server's locality map
+//!   3. Action B targets worker-2, depends on A's output — fetched via peer
+//!      sharing (WorkerProxyStore proxy → Worker-1 CAS)
+//!   4. Action C targets worker-1, depends on B's output — fetched from
+//!      worker-2, verifying bi-directional peer sharing
+
+use std::io::{BufRead, BufReader};
+use std::path::{Path, PathBuf};
+use std::process::{Child, Command as ProcessCommand, Stdio};
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::{Arc, Mutex};
+use std::time::Duration;
+
+use tracing::error;
+
+use nativelink_proto::build::bazel::remote::execution::v2::{
+    batch_update_blobs_request, content_addressable_storage_client::ContentAddressableStorageClient,
+    digest_function, execution_client::ExecutionClient, platform, Action, BatchUpdateBlobsRequest,
+    Command, Digest, Directory, ExecuteRequest, ExecuteResponse, FileNode, Platform,
+};
+use nativelink_proto::google::longrunning::operation;
+use prost::Message;
+use sha2::{Digest as Sha2Digest, Sha256};
+use tempfile::TempDir;
+use tonic::transport::Channel;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+fn get_free_port() -> u16 {
+    let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
+    listener.local_addr().unwrap().port()
+}
+
+struct Ports {
+    public: u16,
+    worker_api: u16,
+    cas: [u16; 2],
+}
+
+fn allocate_ports() -> Ports {
+    Ports {
+        public: get_free_port(),
+        worker_api: get_free_port(),
+        cas: [get_free_port(), get_free_port()],
+    }
+}
+
+/// Compute SHA-256 digest of data, returning a proto Digest.
+fn sha256_digest_proto(data: &[u8]) -> Digest {
+    let mut hasher = Sha256::new();
+    hasher.update(data);
+    Digest {
+        hash: format!("{:x}", hasher.finalize()),
+        size_bytes: data.len() as i64,
+    }
+}
+
+/// Serialize a prost Message and compute its digest.
+fn digest_of_message<M: Message>(msg: &M) -> (Vec<u8>, Digest) {
+    let data = msg.encode_to_vec();
+    let digest = sha256_digest_proto(&data);
+    (data, digest)
+}
+
+/// Write a JSON5 config with execution service, 2 workers with distinct
+/// `worker_id` platform properties for deterministic action routing.
+fn write_config(temp_dir: &Path, ports: &Ports) -> PathBuf {
+    let d = temp_dir.to_string_lossy().replace('\\', "/");
+    let config = format!(
+        r#"{{
+  stores: [
+    {{ name: "AC_STORE", memory: {{ eviction_policy: {{ max_bytes: 100000000 }} }} }},
+    {{ name: "SERVER_CAS", memory: {{ eviction_policy: {{ max_bytes: 100000000 }} }} }},
+    {{
+      name: "W1_STORE",
+      fast_slow: {{
+        fast: {{ filesystem: {{
+          content_path: "{d}/w1/cas",
+          temp_path: "{d}/w1/tmp",
+          eviction_policy: {{ max_bytes: 100000000 }},
+        }} }},
+        slow: {{ grpc: {{
+          instance_name: "main",
+          endpoints: [{{ address: "grpc://127.0.0.1:{public}" }}],
+          store_type: "cas",
+        }} }},
+        slow_direction: "get",
+      }},
+    }},
+    {{
+      name: "W2_STORE",
+      fast_slow: {{
+        fast: {{ filesystem: {{
+          content_path: "{d}/w2/cas",
+          temp_path: "{d}/w2/tmp",
+          eviction_policy: {{ max_bytes: 100000000 }},
+        }} }},
+        slow: {{ grpc: {{
+          instance_name: "main",
+          endpoints: [{{ address: "grpc://127.0.0.1:{public}" }}],
+          store_type: "cas",
+        }} }},
+        slow_direction: "get",
+      }},
+    }},
+  ],
+  schedulers: [
+    {{
+      name: "MAIN",
+      simple: {{
+        supported_platform_properties: {{
+          cpu_count: "minimum",
+          worker_id: "exact",
+        }},
+      }},
+    }},
+  ],
+  workers: [
+    {{ local: {{
+      name: "worker-1",
+      worker_api_endpoint: {{ uri: "grpc://127.0.0.1:{wapi}" }},
+      cas_fast_slow_store: "W1_STORE",
+      cas_server_port: {c1},
+      blobs_available_interval_ms: 200,
+      work_directory: "{d}/w1/work",
+      upload_action_result: {{
+        ac_store: "AC_STORE",
+        upload_ac_results_strategy: "success_only",
+      }},
+      platform_properties: {{
+        cpu_count: {{ values: ["1"] }},
+        worker_id: {{ values: ["w1"] }},
+      }},
+    }} }},
+    {{ local: {{
+      name: "worker-2",
+      worker_api_endpoint: {{ uri: "grpc://127.0.0.1:{wapi}" }},
+      cas_fast_slow_store: "W2_STORE",
+      cas_server_port: {c2},
+      blobs_available_interval_ms: 200,
+      work_directory: "{d}/w2/work",
+      upload_action_result: {{
+        ac_store: "AC_STORE",
+        upload_ac_results_strategy: "success_only",
+      }},
+      platform_properties: {{
+        cpu_count: {{ values: ["1"] }},
+        worker_id: {{ values: ["w2"] }},
+      }},
+    }} }},
+  ],
+  servers: [
+    {{
+      name: "public",
+      listener: {{ http: {{ socket_address: "127.0.0.1:{public}" }} }},
+      services: {{
+        cas: [{{ instance_name: "main", cas_store: "SERVER_CAS" }}],
+        ac: [{{ instance_name: "main", ac_store: "AC_STORE" }}],
+        bytestream: [{{ instance_name: "main", cas_store: "SERVER_CAS" }}],
+        capabilities: [{{ instance_name: "main", remote_execution: {{ scheduler: "MAIN" }} }}],
+        execution: [{{ instance_name: "main", cas_store: "SERVER_CAS", scheduler: "MAIN" }}],
+      }},
+    }},
+    {{
+      name: "worker_api",
+      listener: {{ http: {{ socket_address: "127.0.0.1:{wapi}" }} }},
+      services: {{
+        worker_api: {{ scheduler: "MAIN" }},
+      }},
+    }},
+  ],
+}}"#,
+        d = d,
+        wapi = ports.worker_api,
+        c1 = ports.cas[0],
+        c2 = ports.cas[1],
+        public = ports.public,
+    );
+    let config_path = temp_dir.join("config.json5");
+    std::fs::write(&config_path, config).unwrap();
+    config_path
+}
+
+struct NativeLinkProcess {
+    child: Child,
+    log_lines: Arc<Mutex<Vec<String>>>,
+    child_alive: Arc<AtomicBool>,
+}
+
+impl NativeLinkProcess {
+    fn spawn(config_path: &Path) -> Self {
+        let binary = env!("CARGO_BIN_EXE_nativelink");
+
+        let mut child = ProcessCommand::new(binary)
+            .arg(config_path.to_str().unwrap())
+            .env(
+                "RUST_LOG",
+                "nativelink=trace,nativelink_worker=trace,nativelink_service=trace,nativelink_store=trace",
+            )
+            .env("NO_COLOR", "1")
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .spawn()
+            .expect("Failed to spawn nativelink binary");
+
+        let log_lines: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
+        let child_alive = Arc::new(AtomicBool::new(true));
+
+        let stderr = child.stderr.take().unwrap();
+        let log_lines_stderr = log_lines.clone();
+        let child_alive_stderr = child_alive.clone();
+        std::thread::spawn(move || {
+            for line in BufReader::new(stderr).lines() {
+                match line {
+                    Ok(line) => log_lines_stderr.lock().unwrap().push(line),
+                    Err(_) => break,
+                }
+            }
+            child_alive_stderr.store(false, Ordering::Relaxed);
+        });
+
+        let stdout = child.stdout.take().unwrap();
+        let log_lines_stdout = log_lines.clone();
+        std::thread::spawn(move || {
+            for line in BufReader::new(stdout).lines() {
+                match line {
+                    Ok(line) => log_lines_stdout.lock().unwrap().push(line),
+                    Err(_) => break,
+                }
+            }
+        });
+
+        Self {
+            child,
+            log_lines,
+            child_alive,
+        }
+    }
+
+    async fn wait_for_log_count(&self, pattern: &str, count: usize, timeout: Duration) -> bool {
+        let deadline = tokio::time::Instant::now() + timeout;
+        loop {
+            {
+                let lines = self.log_lines.lock().unwrap();
+                if lines.iter().filter(|l| l.contains(pattern)).count() >= count {
+                    return true;
+                }
+            }
+            if tokio::time::Instant::now() > deadline {
+                return false;
+            }
+            if !self.child_alive.load(Ordering::Relaxed) {
+                tokio::time::sleep(Duration::from_millis(200)).await;
+                let lines = self.log_lines.lock().unwrap();
+                let found = lines.iter().filter(|l| l.contains(pattern)).count();
+                if found < count {
+                    error!(
+                        "!!! Child exited waiting for pattern={pattern:?} count={count} (found {found}). Last 40 lines:",
+                    );
+                    for line in lines.iter().rev().take(40).collect::<Vec<_>>().into_iter().rev() {
+                        error!("  {line}");
+                    }
+                }
+                return found >= count;
+            }
+            tokio::time::sleep(Duration::from_millis(100)).await;
+        }
+    }
+
+    fn count_logs(&self, pattern: &str) -> usize {
+        self.log_lines
+            .lock()
+            .unwrap()
+            .iter()
+            .filter(|l| l.contains(pattern))
+            .count()
+    }
+
+    fn grep_logs(&self, pattern: &str) -> Vec<String> {
+        self.log_lines
+            .lock()
+            .unwrap()
+            .iter()
+            .filter(|l| l.contains(pattern))
+            .cloned()
+            .collect()
+    }
+
+    /// Print all logs for debugging.
+    fn dump_logs(&self, label: &str) {
+        let lines = self.log_lines.lock().unwrap();
+        error!("=== {label} ({} lines) ===", lines.len());
+        for line in lines.iter() {
+            error!("  {line}");
+        }
+        error!("=== end {label} ===");
+    }
+}
+
+impl Drop for NativeLinkProcess {
+    fn drop(&mut self) {
+        let _ = self.child.kill();
+        let _ = self.child.wait();
+    }
+}
+
+/// Upload multiple blobs to the server's CAS via BatchUpdateBlobs.
+async fn upload_blobs_to_cas(
+    channel: &Channel,
+    blobs: &[(Vec<u8>, Digest)],
+) -> Result<(), Box<dyn std::error::Error>> {
+    let mut client = ContentAddressableStorageClient::new(channel.clone());
+    let requests: Vec<_> = blobs
+        .iter()
+        .map(|(data, digest)| batch_update_blobs_request::Request {
+            digest: Some(digest.clone()),
+            data: data.clone().into(),
+            compressor: 0,
+        })
+        .collect();
+    client
+        .batch_update_blobs(BatchUpdateBlobsRequest {
+            instance_name: "main".to_string(),
+            requests,
+            digest_function: digest_function::Value::Sha256.into(),
+        })
+        .await?;
+    Ok(())
+}
+
+/// Execute an action and wait for it to complete, returning the ExecuteResponse.
+async fn execute_and_wait(
+    channel: &Channel,
+    action_digest: Digest,
+) -> Result<ExecuteResponse, Box<dyn std::error::Error>> {
+    let mut client = ExecutionClient::new(channel.clone());
+    let request = ExecuteRequest {
+        instance_name: "main".to_string(),
+        action_digest: Some(action_digest),
+        skip_cache_lookup: true,
+        digest_function: digest_function::Value::Sha256.into(),
+        execution_policy: None,
+        results_cache_policy: None,
+    };
+
+    let response = client.execute(request).await?;
+    let mut stream = response.into_inner();
+
+    let mut last_response: Option<ExecuteResponse> = None;
+    while let Some(op) = stream.message().await? {
+        if op.done {
+            if let Some(operation::Result::Response(any)) = op.result {
+                let exec_response = ExecuteResponse::decode(any.value.as_ref())?;
+                last_response = Some(exec_response);
+            }
+            break;
+        }
+    }
+
+    last_response.ok_or_else(|| "Execute stream ended without done=true".into())
+}
+
+/// Build a Platform proto targeting a specific worker.
+fn make_platform(worker_id: &str) -> Platform {
+    Platform {
+        properties: vec![
+            platform::Property {
+                name: "cpu_count".to_string(),
+                value: "1".to_string(),
+            },
+            platform::Property {
+                name: "worker_id".to_string(),
+                value: worker_id.to_string(),
+            },
+        ],
+    }
+}
+
+/// Build and upload an action targeted at a specific worker.
+async fn create_action(
+    channel: &Channel,
+    arguments: Vec<String>,
+    output_files: Vec<String>,
+    input_root: &Directory,
+    target_worker: &str,
+) -> Result<Digest, Box<dyn std::error::Error>> {
+    let command = Command {
+        arguments,
+        output_files,
+        ..Default::default()
+    };
+    let (cmd_data, cmd_digest) = digest_of_message(&command);
+
+    let (root_data, root_digest) = digest_of_message(input_root);
+
+    let action = Action {
+        command_digest: Some(cmd_digest.clone()),
+        input_root_digest: Some(root_digest.clone()),
+        do_not_cache: true,
+        platform: Some(make_platform(target_worker)),
+        ..Default::default()
+    };
+    let (action_data, action_digest) = digest_of_message(&action);
+
+    upload_blobs_to_cas(
+        channel,
+        &[
+            (cmd_data, cmd_digest),
+            (root_data, root_digest),
+            (action_data, action_digest.clone()),
+        ],
+    )
+    .await?;
+
+    Ok(action_digest)
+}
+
+// ---------------------------------------------------------------------------
+// Test
+// ---------------------------------------------------------------------------
+
+/// Execute a chain of 3 dependent actions on alternating workers, exercising
+/// peer-to-peer blob sharing in both directions.
+///
+/// Action A → worker-1: `echo -n "HELLO_FROM_ACTION_A" > output.txt`
+/// Action B → worker-2: `cat input.txt > output.txt && echo -n "_PLUS_B" >> output.txt`
+///   (input = A's output, fetched from worker-1 via peer sharing)
+/// Action C → worker-1: `echo -n "_PLUS_C" > output.txt && cat input.txt >> output.txt`
+///   (input = B's output, fetched from worker-2 via peer sharing)
+#[tokio::test(flavor = "multi_thread")]
+async fn test_execute_dependent_actions_with_peer_sharing() {
+    let temp_dir = TempDir::new().expect("Failed to create temp dir");
+    let ports = allocate_ports();
+    let config_path = write_config(temp_dir.path(), &ports);
+
+    let process = NativeLinkProcess::spawn(&config_path);
+
+    // Wait for server listeners.
+    assert!(
+        process
+            .wait_for_log_count("Ready, listening on", 2, Duration::from_secs(30))
+            .await,
+        "Server did not start. Last 20 lines:\n{}",
+        {
+            let lines = process.grep_logs("");
+            lines.iter().rev().take(20).collect::<Vec<_>>().iter().rev()
+                .map(|s| s.as_str()).collect::<Vec<_>>().join("\n")
+        },
+    );
+
+    // Wait for both workers to register.
+    assert!(
+        process
+            .wait_for_log_count("Worker registered with scheduler", 2, Duration::from_secs(15))
+            .await,
+        "Not all workers registered. Found {}.",
+        process.count_logs("Worker registered with scheduler"),
+    );
+
+    // Wait for initial BlobsAvailable snapshots.
+    assert!(
+        process
+            .wait_for_log_count("Sent periodic BlobsAvailable", 2, Duration::from_secs(5))
+            .await,
+        "Workers did not send initial BlobsAvailable.",
+    );
+
+    let channel = Channel::from_shared(format!("http://127.0.0.1:{}", ports.public))
+        .unwrap()
+        .connect_timeout(Duration::from_secs(5))
+        .timeout(Duration::from_secs(60))
+        .connect()
+        .await
+        .expect("Failed to connect to server");
+
+    // =====================================================================
+    // ACTION A → worker-1: Produce a known output blob
+    // =====================================================================
+    let action_a_digest = create_action(
+        &channel,
+        vec![
+            "/bin/sh".to_string(),
+            "-c".to_string(),
+            "echo -n 'HELLO_FROM_ACTION_A' > output.txt".to_string(),
+        ],
+        vec!["output.txt".to_string()],
+        &Directory::default(),
+        "w1",
+    )
+    .await
+    .expect("Failed to create Action A");
+
+    let before_register = process.count_logs("Registering blobs available from worker");
+
+    let response_a = execute_and_wait(&channel, action_a_digest)
+        .await
+        .expect("Action A execution failed");
+
+    let result_a = response_a
+        .result
+        .as_ref()
+        .expect("Action A missing ActionResult");
+    assert_eq!(
+        result_a.exit_code, 0,
+        "Action A exit_code={}",
+        result_a.exit_code,
+    );
+    assert_eq!(result_a.output_files.len(), 1, "Action A output count");
+
+    let output_a_digest = result_a.output_files[0]
+        .digest
+        .as_ref()
+        .expect("Action A output missing digest");
+    let expected_a = b"HELLO_FROM_ACTION_A";
+    let expected_a_digest = sha256_digest_proto(expected_a);
+    assert_eq!(
+        output_a_digest.hash, expected_a_digest.hash,
+        "Action A output digest mismatch",
+    );
+
+    // Wait for BlobsAvailable to propagate A's outputs to the locality map.
+    assert!(
+        process
+            .wait_for_log_count(
+                "Registering blobs available from worker",
+                before_register + 1,
+                Duration::from_secs(5),
+            )
+            .await,
+        "BlobsAvailable not registered after Action A.",
+    );
+
+    // =====================================================================
+    // ACTION B → worker-2: Depends on A's output (peer sharing: w1 → w2)
+    // =====================================================================
+    // Worker-2 does not have A's output locally. The fetch chain:
+    //   Worker-2 FastStore (miss) → GrpcStore → server CAS →
+    //   WorkerProxyStore → locality map (w1 has it) → proxy from w1's CAS
+    let input_root_b = Directory {
+        files: vec![FileNode {
+            name: "input.txt".to_string(),
+            digest: Some(output_a_digest.clone()),
+            is_executable: false,
+            node_properties: None,
+        }],
+        ..Default::default()
+    };
+
+    let action_b_digest = create_action(
+        &channel,
+        vec![
+            "/bin/sh".to_string(),
+            "-c".to_string(),
+            "cat input.txt > output.txt && echo -n '_PLUS_B' >> output.txt".to_string(),
+        ],
+        vec!["output.txt".to_string()],
+        &input_root_b,
+        "w2",
+    )
+    .await
+    .expect("Failed to create Action B");
+
+    let proxy_before_b = process.count_logs("WorkerProxyStore: successfully read blob from redirected peer");
+
+    let before_register = process.count_logs("Registering blobs available from worker");
+
+    let response_b = execute_and_wait(&channel, action_b_digest)
+        .await
+        .expect("Action B execution failed");
+
+    let result_b = response_b
+        .result
+        .as_ref()
+        .expect("Action B missing ActionResult");
+    assert_eq!(
+        result_b.exit_code, 0,
+        "Action B exit_code={}\nAll logs:\n{}",
+        result_b.exit_code,
+        process.grep_logs("").join("\n"),
+    );
+    assert_eq!(result_b.output_files.len(), 1, "Action B output count");
+
+    let output_b_digest = result_b.output_files[0]
+        .digest
+        .as_ref()
+        .expect("Action B output missing digest");
+    let expected_b = b"HELLO_FROM_ACTION_A_PLUS_B";
+    let expected_b_digest = sha256_digest_proto(expected_b);
+    assert_eq!(
+        output_b_digest.hash, expected_b_digest.hash,
+        "Action B output digest mismatch. Expected {:?}, got hash {}",
+        String::from_utf8_lossy(expected_b),
+        output_b_digest.hash,
+    );
+
+    // Verify peer sharing: worker-2 received a redirect from the server's
+    // WorkerProxyStore and fetched A's output directly from worker-1's CAS.
+    let proxy_after_b = process.count_logs("WorkerProxyStore: successfully read blob from redirected peer");
+    if proxy_after_b <= proxy_before_b {
+        process.dump_logs("Action B peer sharing failure");
+    }
+    assert!(
+        proxy_after_b > proxy_before_b,
+        "Expected peer redirect from worker-1 for Action A's output. \
+         Redirect count before={proxy_before_b} after={proxy_after_b}.",
+    );
+
+    // Wait for BlobsAvailable after Action B.
+    assert!(
+        process
+            .wait_for_log_count(
+                "Registering blobs available from worker",
+                before_register + 1,
+                Duration::from_secs(5),
+            )
+            .await,
+        "BlobsAvailable not registered after Action B.",
+    );
+
+    // =====================================================================
+    // ACTION C → worker-1: Depends on B's output (peer sharing: w2 → w1)
+    // =====================================================================
+    // B's output is only on worker-2. Worker-1 must peer-fetch it.
+    // This verifies bi-directional peer sharing.
+    let input_root_c = Directory {
+        files: vec![FileNode {
+            name: "input.txt".to_string(),
+            digest: Some(output_b_digest.clone()),
+            is_executable: false,
+            node_properties: None,
+        }],
+        ..Default::default()
+    };
+
+    let action_c_digest = create_action(
+        &channel,
+        vec![
+            "/bin/sh".to_string(),
+            "-c".to_string(),
+            "echo -n '_PLUS_C' > output.txt && cat input.txt >> output.txt".to_string(),
+        ],
+        vec!["output.txt".to_string()],
+        &input_root_c,
+        "w1",
+    )
+    .await
+    .expect("Failed to create Action C");
+
+    let proxy_before_c = process.count_logs("WorkerProxyStore: successfully read blob from redirected peer");
+
+    let response_c = execute_and_wait(&channel, action_c_digest)
+        .await
+        .expect("Action C execution failed");
+
+    let result_c = response_c
+        .result
+        .as_ref()
+        .expect("Action C missing ActionResult");
+    assert_eq!(
+        result_c.exit_code, 0,
+        "Action C exit_code={}",
+        result_c.exit_code,
+    );
+    assert_eq!(result_c.output_files.len(), 1, "Action C output count");
+
+    let output_c_digest = result_c.output_files[0]
+        .digest
+        .as_ref()
+        .expect("Action C output missing digest");
+    let expected_c = b"_PLUS_CHELLO_FROM_ACTION_A_PLUS_B";
+    let expected_c_digest = sha256_digest_proto(expected_c);
+    assert_eq!(
+        output_c_digest.hash, expected_c_digest.hash,
+        "Action C output digest mismatch. Expected {:?}, got hash {}",
+        String::from_utf8_lossy(expected_c),
+        output_c_digest.hash,
+    );
+
+    // Verify peer redirect for Action C (w2 → w1 direction).
+    let proxy_after_c = process.count_logs("WorkerProxyStore: successfully read blob from redirected peer");
+    assert!(
+        proxy_after_c > proxy_before_c,
+        "Expected peer redirect from worker-2 for Action B's output. \
+         Redirect count before={proxy_before_c} after={proxy_after_c}. \
+         WorkerProxyStore logs:\n{}",
+        process.grep_logs("WorkerProxyStore").join("\n"),
+    );
+
+    // =====================================================================
+    // Summary assertions
+    // =====================================================================
+
+    // At least 2 proxy operations (one per cross-worker fetch).
+    let total_proxies = process.count_logs("WorkerProxyStore: successfully read blob from redirected peer");
+    assert!(
+        total_proxies >= 2,
+        "Expected at least 2 peer redirect reads (A→w2, B→w1), got {total_proxies}",
+    );
+
+    // BlobsAvailable should have been registered multiple times.
+    let total_registrations = process.count_logs("Registering blobs available from worker");
+    assert!(
+        total_registrations >= 4,
+        "Expected at least 4 BlobsAvailable registrations, got {total_registrations}",
+    );
+
+    // Process is killed on drop.
+}
diff --git a/toolchain-examples/nativelink-config.json5 b/toolchain-examples/nativelink-config.json5
index 7e40a65e4..8e66c47e0 100644
--- a/toolchain-examples/nativelink-config.json5
+++ b/toolchain-examples/nativelink-config.json5
@@ -47,6 +47,8 @@
           OSFamily: "priority",
           "container-image": "priority",
         },
+        // Enable locality-aware scheduling.
+        cas_store: "WORKER_FAST_SLOW_STORE",
       },
     },
   ],
@@ -57,6 +59,8 @@
           uri: "grpc://127.0.0.1:50061",
         },
         cas_fast_slow_store: "WORKER_FAST_SLOW_STORE",
+        // Expose a CAS server for peer-to-peer blob sharing.
+        cas_server_port: 50081,
         upload_action_result: {
           ac_store: "AC_MAIN_STORE",
         },