imazen
diff --git a/‎CHANGELOG.md‎
Lines changed: 30 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 1 deletion b/‎Cargo.lock‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/auto_afv_default_set_d1_2026-05-17.txt‎
Lines changed: 165 additions & 0 deletions b/‎benchmarks/auto_afv_default_set_d1_2026-05-17.txt‎
Lines changed: 165 additions & 0 deletions
diff --git a/‎benchmarks/auto_afv_screenshots_sweep_2026-05-17.meta‎
Lines changed: 63 additions & 0 deletions b/‎benchmarks/auto_afv_screenshots_sweep_2026-05-17.meta‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎benchmarks/auto_afv_screenshots_sweep_2026-05-17.txt‎
Lines changed: 45 additions & 0 deletions b/‎benchmarks/auto_afv_screenshots_sweep_2026-05-17.txt‎
Lines changed: 45 additions & 0 deletions
@@ -2,6 +2,36 @@
 
 ## [Unreleased]
 
+### Added (May 17, 2026)
+
+- **Auto-AFV-on-screenshots dispatch in the GPU strategy search**.
+  `LossyEncoder` now exposes `with_auto_evaluate_afv_on_screenshots(bool)`
+  (default `true`) that auto-enables AFV0-3 cost-grid evaluation inside
+  `prepare_strategy_search_plan_inner` when the per-block `mask1x1`
+  median exceeds `SCREENSHOT_MEDIAN_MASK_THRESHOLD` (95.0) AND
+  `effort >= 7`. Same discriminator the `SkippedStratSearchAsScreenshot`
+  path uses; reuses `aq_field_means` already produced for the AQ field
+  so the dispatch is essentially free (median over a few-thousand-entry
+  vector). Explicit `with_evaluate_afv(true)` still always wins.
+  Photos are byte-identical (median < 95 on every CLIC sample tested,
+  46-77 range — gate never fires). Screenshots see a small but real
+  bytes win on the subset where AFV picks survive the patches case-1
+  recompute: 10-image `gb82-sc` sweep at d=1.0 saves -0.091% bytes
+  total; per-image winners are gmessages.png (-0.788%), graph.png
+  (-0.403%), gui.png (-0.116%). On screenshots that trigger
+  `find_and_build_patches`, the CPU `compute_ac_strategy` recompute on
+  patches-subtracted XYB still overwrites GPU AFV picks (libjxl-parity
+  contract); preserving GPU AFV picks across patches recompute is
+  follow-on work. `corpus_regression` bitstream stays byte-identical on
+  photo rows (no dispatch fires) and on screenshot rows (they flow
+  through `refine_and_encode_smart` → `SkippedStratSearchAsScreenshot`
+  which never calls `prepare_strategy_search_plan`). Bench at
+  `benchmarks/auto_afv_screenshots_sweep_2026-05-17.{txt,meta}`. Tests:
+  `tests/afv_cost_grid_wiring.rs` (`test_auto_afv_default_on_but_synthetic_does_not_fire`,
+  `test_auto_afv_opt_out_disables_dispatch`). Reference: `dropped_optimizations_for_parity_2026-05-15.md`
+  item #1 and `vardct_gpu_dropped_optimizations_resurrection_2026-05-17.md`
+  top-3 conditional resurrection.
+
 ### Fixed (May 15, 2026)
 
 - **`decode_via_jxl_rs` was mislabeling sRGB-encoded f32 as linear** in
 
@@ -0,0 +1,165 @@
+warning: methods `capacity`, `len`, `is_empty`, `lookup_only`, and `unique_keys` are never used
+   --> /home/lilith/work/zen/jxl-encoder/jxl-encoder/src/modular/inline_dedup_table.rs:216:12
+    |
+186 | impl InlineDedupTable {
+    | --------------------- methods in this implementation
+...
+216 |     pub fn capacity(&self) -> usize {
+    |            ^^^^^^^^
+...
+222 |     pub fn len(&self) -> usize {
+    |            ^^^
+...
+228 |     pub fn is_empty(&self) -> bool {
+    |            ^^^^^^^^
+...
+356 |     pub fn lookup_only(&self, key: &[u8; KEY_BYTES]) -> Option<u32> {
+    |            ^^^^^^^^^^^
+...
+383 |     pub fn unique_keys(&self) -> &[[u8; KEY_BYTES]] {
+    |            ^^^^^^^^^^^
+    |
+    = note: `#[warn(dead_code)]` (part of `#[warn(unused)]`) on by default
+
+warning: fields `gather_dedup_phase3`, `parallel_max_depth`, `parallel_recursion_floor`, and `parallel_root_threshold` are never read
+   --> /home/lilith/work/zen/jxl-encoder/jxl-encoder/src/modular/tree_learn.rs:195:9
+    |
+135 | pub struct TreeLearningParams {
+    |            ------------------ fields in this struct
+...
+195 |     pub gather_dedup_phase3: bool,
+    |         ^^^^^^^^^^^^^^^^^^^
+...
+201 |     pub parallel_max_depth: u32,
+    |         ^^^^^^^^^^^^^^^^^^
+...
+205 |     pub parallel_recursion_floor: usize,
+    |         ^^^^^^^^^^^^^^^^^^^^^^^^
+...
+209 |     pub parallel_root_threshold: usize,
+    |         ^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: function `gather_samples_strided_with_dedup` is never used
+   --> /home/lilith/work/zen/jxl-encoder/jxl-encoder/src/modular/tree_learn.rs:843:15
+    |
+843 | pub(crate) fn gather_samples_strided_with_dedup(
+    |               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: enum `IwWeightKind` is never used
+  --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:39:10
+   |
+39 | pub enum IwWeightKind {
+   |          ^^^^^^^^^^^^
+   |
+   = note: `#[warn(dead_code)]` (part of `#[warn(unused)]`) on by default
+
+warning: struct `IwWeightConfig` is never constructed
+  --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:73:12
+   |
+73 | pub struct IwWeightConfig {
+   |            ^^^^^^^^^^^^^^
+
+warning: function `compute_iw_weights` is never used
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:123:8
+    |
+123 | pub fn compute_iw_weights(
+    |        ^^^^^^^^^^^^^^^^^^
+
+warning: function `compute_local_variance` is never used
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:194:4
+    |
+194 | fn compute_local_variance(
+    |    ^^^^^^^^^^^^^^^^^^^^^^
+
+warning: function `compute_directional_max_variance` is never used
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:251:4
+    |
+251 | fn compute_directional_max_variance(
+    |    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+warning: function `local_variance_into` is never used
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:321:4
+    |
+321 | fn local_variance_into(plane: &[f32], width: usize, height: usize, half: usize, out: &mut [f32]) {
+    |    ^^^^^^^^^^^^^^^^^^^
+
+warning: enum `GradNorm` is never used
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:345:6
+    |
+345 | enum GradNorm {
+    |      ^^^^^^^^
+
+warning: function `compute_gradient` is never used
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:350:4
+    |
+350 | fn compute_gradient(
+    |    ^^^^^^^^^^^^^^^^
+
+warning: struct `WeightedPool` is never constructed
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:379:12
+    |
+379 | pub struct WeightedPool;
+    |            ^^^^^^^^^^^^
+
+warning: associated functions `mean`, `l2`, and `l4` are never used
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:384:12
+    |
+381 | impl WeightedPool {
+    | ----------------- associated functions in this implementation
+...
+384 |     pub fn mean(values: &[f32], weights: &[f32]) -> f64 {
+    |            ^^^^
+...
+398 |     pub fn l2(values: &[f32], weights: &[f32]) -> f64 {
+    |            ^^
+...
+412 |     pub fn l4(values: &[f32], weights: &[f32]) -> f64 {
+    |            ^^
+
+warning: struct `IwSsimFeatures` is never constructed
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:434:12
+    |
+434 | pub struct IwSsimFeatures {
+    |            ^^^^^^^^^^^^^^
+
+warning: associated items `FEATURES_PER_CALL`, `as_array`, and `pool_from_maps` are never used
+   --> /home/lilith/work/zen/zensim/zensim/src/iw_pool.rs:451:15
+    |
+449 | impl IwSsimFeatures {
+    | ------------------- associated items in this implementation
+450 |     /// Number of features per call — matches `FEATURES_PER_CHANNEL_*_MASKED` in `metric.rs`.
+451 |     pub const FEATURES_PER_CALL: usize = 6;
+    |               ^^^^^^^^^^^^^^^^^
+...
+454 |     pub fn as_array(&self) -> [f64; 6] {
+    |            ^^^^^^^^
+...
+474 |     pub fn pool_from_maps(
+    |            ^^^^^^^^^^^^^^
+
+warning: `jxl-encoder` (lib) generated 3 warnings
+warning: `zensim` (lib) generated 12 warnings
+    Finished `release` profile [optimized + debuginfo] target(s) in 0.14s
+     Running `target/release/examples/auto_afv_bytes_ab --distance 1.0`
+[auto_afv_bytes_ab] distance=1 images=6 (auto-AFV OFF baseline vs ON)
+image                                                           MP    off_bytes     on_bytes   Δ_bytes    Δ_pct afv_o afv_n    ms_on
+[auto_afv] mask1x1 block-mean median=100.013, threshold=95.000, effort=7, fired=true
+[auto_afv] mask1x1 block-mean median=100.013, threshold=95.000, effort=7, fired=true
+terminal.png                                                  1.75        63175        63175        +0  +0.000%     0    40   306.0
+[auto_afv] mask1x1 block-mean median=100.013, threshold=95.000, effort=7, fired=true
+[auto_afv] mask1x1 block-mean median=100.013, threshold=95.000, effort=7, fired=true
+imac_g3.png                                                   5.62       299455       299455        +0  +0.000%     0     0  1318.2
+[auto_afv] mask1x1 block-mean median=48.822, threshold=95.000, effort=7, fired=false
+[auto_afv] mask1x1 block-mean median=48.822, threshold=95.000, effort=7, fired=false
+windows95.png                                                 0.31        51672        51672        +0  +0.000%     0     0    67.1
+[auto_afv] mask1x1 block-mean median=46.294, threshold=95.000, effort=7, fired=false
+[auto_afv] mask1x1 block-mean median=46.294, threshold=95.000, effort=7, fired=false
+02809272b4ca9b08af45771501b741296187c7e26907efb44abbbfcb6cd804f7.png  1.05       297492       297492        +0  +0.000%     0     0   398.4
+[auto_afv] mask1x1 block-mean median=59.988, threshold=95.000, effort=7, fired=false
+[auto_afv] mask1x1 block-mean median=59.988, threshold=95.000, effort=7, fired=false
+07b9f93f170a0381836bdf301280a5b80b2c4be6e66f793a3c335dc200fb4e5b.png  1.05       193716       193716        +0  +0.000%     0     0   148.6
+[auto_afv] mask1x1 block-mean median=77.282, threshold=95.000, effort=7, fired=false
+[auto_afv] mask1x1 block-mean median=77.282, threshold=95.000, effort=7, fired=false
+22ea12c903e41583b7c469cb86040157.png                          1.05        98304        98304        +0  +0.000%     0     0   147.0
+
+TOTAL (n=6 images, 10.82 MP): 1003814 → 1003814 bytes (+0 bytes, +0.000%)
@@ -0,0 +1,63 @@
+Auto-AFV-on-screenshots dispatch — bytes A/B sweep
+====================================================
+
+Date: 2026-05-17
+Commit (will be set at commit time)
+Host: water-cooled AMD Ryzen 9 7950X + RTX 5070 (CUDA backend)
+Workspace: ~/work/zen/jxl-encoder-gpu--afv-cost-grid (jj on main)
+
+Sweep
+-----
+Corpus: codec-corpus/gb82-sc (10 PNG screenshots)
+Distances: 1.0, 2.0, 3.0
+Encoder entry: GpuEncoder::encode_lossy_to_bitstream_via_precomputed
+Effort: 7 (default)
+Knob under test: LossyEncoder::with_auto_evaluate_afv_on_screenshots(bool)
+  - PATH A (off): explicit override `with_auto_evaluate_afv_on_screenshots(false)` to
+    reproduce pre-2026-05-17 default-off behavior
+  - PATH B (on): new default; auto-AFV gates on `mask1x1 median > 95 AND effort >= 7`
+
+Driver:    cargo run --release -p jxl-encoder-gpu --features 'cuda encoder' \
+              --example auto_afv_bytes_ab -- --distance <D> [--image PATH ...]
+
+Headline
+--------
+d=1.0  (10 screenshots, 30.65 MP total): -1343 bytes / -0.091%
+d=2.0  (10 screenshots, 30.65 MP total):  -465 bytes / -0.041%
+d=3.0  (10 screenshots, 30.65 MP total):  +0 bytes  /  +0.000%
+
+Per-image winners at d=1.0:
+  gmessages.png  -1190 bytes  (-0.788%)   184 AFV picks survive case-1 path
+  graph.png      -108  bytes  (-0.403%)    13 AFV picks survive
+  gui.png         -45  bytes  (-0.116%)     9 AFV picks survive
+
+Per-image byte-identical (AFV picks were wiped by patches case-1 recompute):
+  terminal.png   40 picks → 0 bytes change
+  windows.png    264 picks → 0 bytes change
+  windows95.png  median 48.8 → gate correctly skips
+
+Photos (default candidate set, 3 CLIC images at d=1.0): all byte-identical.
+  median per image: 46.3, 60.0, 77.3 (all < 95 threshold → gate stays off)
+
+Interaction with patches case-1 path
+-------------------------------------
+On screenshots that trigger `find_and_build_patches`, the CPU patches-subtracted
+recompute (`compute_ac_strategy` at encoder.rs:2120) overwrites our GPU AFV picks
+with a CPU-only DCT8-dominant assignment. This is the libjxl-parity contract and
+applies to terminal.png + windows.png in our sweep. The auto-AFV win only realizes
+on screenshots where patches DON'T fire OR where the CPU recompute still picks AFV.
+
+A future chunk could merge the GPU's AFV picks into the patches-subtracted CPU
+assignment (or port the AFV cost-grid into the CPU `compute_ac_strategy`); that's
+out of scope for this 1-2 day chunk.
+
+Photo verification (corpus_regression analog)
+----------------------------------------------
+3 CLIC photos at d=1.0 with new default (`auto_evaluate_afv_on_screenshots = true`)
+produce byte-identical bitstream to pre-2026-05-17 (off-AFV) baseline. Auto dispatch
+threshold (median > 95) is well above all measured photo medians (46-77), so the
+gate never fires on photo content. `corpus_regression` test (gated `corpus` feature)
+should continue to pass byte-for-byte on its photo rows. Screenshot rows in
+`corpus_regression` flow through `refine_and_encode_smart` →
+`SkippedStratSearchAsScreenshot` (no strat-search), so they are also unaffected
+by this dispatch.
@@ -0,0 +1,45 @@
+=== distance=1.0 ===
+[auto_afv_bytes_ab] distance=1 images=10 (auto-AFV OFF baseline vs ON)
+image                                                           MP    off_bytes     on_bytes   Δ_bytes    Δ_pct afv_o afv_n    ms_on
+codec_wiki.png                                                4.26       116673       116673        +0  +0.000%     0     0   782.9
+gmessages.png                                                 4.45       151002       149812     -1190  -0.788%     0   184   746.9
+graph.png                                                     0.38        26832        26724      -108  -0.403%     0    13    62.5
+gui.png                                                       1.53        38663        38618       -45  -0.116%     0     9   228.9
+imac_dark.png                                                 5.62       305481       305481        +0  +0.000%     0     0  1150.0
+imac_g3.png                                                   5.62       299455       299455        +0  +0.000%     0     0  1152.5
+imessage.png                                                  3.16       149771       149771        +0  +0.000%     0     0   542.2
+terminal.png                                                  1.75        63175        63175        +0  +0.000%     0    40   249.3
+windows95.png                                                 0.31        51672        51672        +0  +0.000%     0     0    61.1
+windows.png                                                   3.56       269079       269079        +0  +0.000%     0   264   619.1
+
+TOTAL (n=10 images, 30.65 MP): 1471803 → 1470460 bytes (-1343 bytes, -0.091%)
+=== distance=2.0 ===
+[auto_afv_bytes_ab] distance=2 images=10 (auto-AFV OFF baseline vs ON)
+image                                                           MP    off_bytes     on_bytes   Δ_bytes    Δ_pct afv_o afv_n    ms_on
+codec_wiki.png                                                4.26        93748        93748        +0  +0.000%     0     0   877.3
+gmessages.png                                                 4.45       113909       113450      -459  -0.403%     0    46   755.1
+graph.png                                                     0.38        20640        20640        +0  +0.000%     0     0    67.9
+gui.png                                                       1.53        28739        28733        -6  -0.021%     0     1   231.0
+imac_dark.png                                                 5.62       235459       235459        +0  +0.000%     0     0  1262.3
+imac_g3.png                                                   5.62       249040       249040        +0  +0.000%     0     0  1154.2
+imessage.png                                                  3.16       114258       114258        +0  +0.000%     0     0   560.5
+terminal.png                                                  1.75        51197        51197        +0  +0.000%     0     0   281.2
+windows95.png                                                 0.31        39650        39650        +0  +0.000%     0     0    62.7
+windows.png                                                   3.56       193757       193757        +0  +0.000%     0    53   674.5
+
+TOTAL (n=10 images, 30.65 MP): 1140397 → 1139932 bytes (-465 bytes, -0.041%)
+=== distance=3.0 ===
+[auto_afv_bytes_ab] distance=3 images=10 (auto-AFV OFF baseline vs ON)
+image                                                           MP    off_bytes     on_bytes   Δ_bytes    Δ_pct afv_o afv_n    ms_on
+codec_wiki.png                                                4.26        81876        81876        +0  +0.000%     0     0   837.2
+gmessages.png                                                 4.45        92662        92662        +0  +0.000%     0     0   773.4
+graph.png                                                     0.38        16911        16911        +0  +0.000%     0     0    64.5
+gui.png                                                       1.53        23922        23922        +0  +0.000%     0     0   228.2
+imac_dark.png                                                 5.62       201030       201030        +0  +0.000%     0     0  1203.5
+imac_g3.png                                                   5.62       223092       223092        +0  +0.000%     0     0  1196.1
+imessage.png                                                  3.16        98410        98410        +0  +0.000%     0     0   530.9
+terminal.png                                                  1.75        43012        43012        +0  +0.000%     0     0   249.9
+windows95.png                                                 0.31        33827        33827        +0  +0.000%     0     0    60.6
+windows.png                                                   3.56       159138       159138        +0  +0.000%     0     0   632.6
+
+TOTAL (n=10 images, 30.65 MP): 973880 → 973880 bytes (+0 bytes, +0.000%)