opensearch-project · praveenMprasad · Mar 13, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 17, 2026
@@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## [Unreleased 3.6](https://github.com/opensearch-project/k-NN/compare/main...HEAD)
 ### Features
+* Add MUVERA ingest and search request processors for multi-vector FDE encoding [#3163](https://github.com/opensearch-project/k-NN/issues/3163)
 * Support Lucene BBQ Flat for 1 bit [#3154](https://github.com/opensearch-project/k-NN/pull/3154)
 * Add 32x support for SQ encoder on Faiss [#3193](https://github.com/opensearch-project/k-NN/pull/3193)
 * Faiss SQ 1bit MOS changes [#3182](https://github.com/opensearch-project/k-NN/pull/3182)

@@ -94,6 +94,9 @@
 import org.opensearch.knn.quantization.models.quantizationState.QuantizationStateCache;
 import org.opensearch.knn.search.extension.MMRSearchExtBuilder;
 
+import org.opensearch.ingest.Processor;
+import org.opensearch.knn.processor.muvera.MuveraIngestProcessor;
+import org.opensearch.knn.processor.muvera.MuveraSearchRequestProcessor;
 import org.opensearch.knn.search.processor.mmr.MMRKnnQueryTransformer;
 import org.opensearch.knn.search.processor.mmr.MMROverSampleProcessor;
 import org.opensearch.knn.search.processor.mmr.MMRQueryTransformer;
@@ -106,6 +109,7 @@
 import org.opensearch.plugins.ActionPlugin;
 import org.opensearch.plugins.EnginePlugin;
 import org.opensearch.plugins.ExtensiblePlugin;
+import org.opensearch.plugins.IngestPlugin;
 import org.opensearch.plugins.MapperPlugin;
 import org.opensearch.plugins.Plugin;
 import org.opensearch.plugins.ReloadablePlugin;
@@ -190,7 +194,8 @@ public class KNNPlugin extends Plugin
         ExtensiblePlugin,
         SystemIndexPlugin,
         ReloadablePlugin,
-        SearchPipelinePlugin {
+        SearchPipelinePlugin,
+        IngestPlugin {
 
     public static final String LEGACY_KNN_BASE_URI = "/_opendistro/_knn";
     public static final String KNN_BASE_URI = "/_plugins/_knn";
@@ -518,4 +523,16 @@ public Map<String, SystemGeneratedProcessor.SystemGeneratedFactory<SearchRespons
     ) {
         return Map.of(MMRRerankProcessor.MMRRerankProcessorFactory.TYPE, new MMRRerankProcessor.MMRRerankProcessorFactory());
     }
+
+    @Override
+    public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
+        return Map.of(MuveraIngestProcessor.TYPE, new MuveraIngestProcessor.Factory());
+    }
+
+    @Override
+    public Map<String, org.opensearch.search.pipeline.Processor.Factory<SearchRequestProcessor>> getRequestProcessors(
+        Parameters parameters
+    ) {
+        return Map.of(MuveraSearchRequestProcessor.TYPE, new MuveraSearchRequestProcessor.Factory());
+    }
 }
@@ -0,0 +1,221 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.processor.muvera;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+/**
+ * MUVERA (Multi-Vector Retrieval via Fixed Dimensional Encodings)
+ *
+ * Converts variable-length multi-vector embeddings (e.g. ColBERT/ColPali token embeddings)
+ * into fixed-size single vectors that approximate MaxSim scoring via dot product.
+ *
+ * The output FDE dimension = rReps * 2^kSim * dimProj.
+ *
+ * Document processing: normalizes cluster centers by count, fills empty clusters via Hamming nearest neighbor.
+ * Query processing: raw sum (no normalization), no empty cluster filling.
+ *
+ * The random seed must be identical between index time and query time to produce compatible encodings.
+ */
+public class MuveraEncoder {
+    private final int dim;
+    private final int kSim;
+    private final int dimProj;
+    private final int rReps;
+    private final int numPartitions;
+    private final double[][] simhashVectors;
+    private final double[][][] dimReductionProjections;
+
+    /**
+     * Maximum allowed FDE dimension. Matches the k-NN engine max dimension limit (16,000)
+     * to ensure the FDE vector can be indexed in any supported engine.
+     */
+    static final int MAX_FDE_DIMENSION = 16_000;
+
+    public MuveraEncoder(int dim, int kSim, int dimProj, int rReps, long seed) {
+        if (dim <= 0) {
+            throw new IllegalArgumentException("dim must be positive, got: " + dim);
+        }
+        if (kSim < 0) {
+            throw new IllegalArgumentException("k_sim must be non-negative, got: " + kSim);
+        }
+        if (dimProj <= 0) {
+            throw new IllegalArgumentException("dim_proj must be positive, got: " + dimProj);
+        }
+        if (rReps <= 0) {
+            throw new IllegalArgumentException("r_reps must be positive, got: " + rReps);
+        }
+
+        // Validate that the resulting FDE dimension does not exceed the k-NN engine limit.
+        // FDE dimension = rReps * 2^kSim * dimProj
+        long fdeDimension = (long) rReps * (1L << kSim) * dimProj;
+        if (fdeDimension > MAX_FDE_DIMENSION) {
+            throw new IllegalArgumentException(
+                "MUVERA parameters produce an FDE dimension of ["
+                    + fdeDimension
+                    + "] which exceeds the maximum allowed dimension of ["
+                    + MAX_FDE_DIMENSION
+                    + "]. Reduce r_reps, k_sim, or dim_proj. (r_reps="
+                    + rReps
+                    + " * 2^k_sim="
+                    + (1L << kSim)
+                    + " * dim_proj="
+                    + dimProj
+                    + ")"
+            );
+        }
+
+        this.dim = dim;
+        this.kSim = kSim;
+        this.dimProj = dimProj;
+        this.rReps = rReps;
+        this.numPartitions = 1 << kSim;
+
+        Random rng = new Random(seed);
+
+        // SimHash hyperplanes: rReps sets of kSim hyperplanes, each dim-dimensional
+        simhashVectors = new double[rReps][kSim * dim];
+        for (int r = 0; r < rReps; r++) {
+            for (int i = 0; i < kSim * dim; i++) {
+                simhashVectors[r][i] = rng.nextGaussian();
+            }
+        }
+
+        // Random projection matrices with entries in {-1, +1}
+        dimReductionProjections = new double[rReps][dim][dimProj];
+        for (int r = 0; r < rReps; r++) {
+            for (int i = 0; i < dim; i++) {
+                for (int j = 0; j < dimProj; j++) {
+                    dimReductionProjections[r][i][j] = rng.nextBoolean() ? 1.0 : -1.0;
+                }
+            }
+        }
+    }
+
+    /**
+     * Returns the output FDE dimension: rReps * 2^kSim * dimProj.
+     */
+    public int getEmbeddingSize() {
+        return rReps * numPartitions * dimProj;
+    }
+
+    private int getClusterId(double[] vector, int repIndex) {
+        int clusterId = 0;
+        for (int k = 0; k < kSim; k++) {
+            double dot = 0;
+            int offset = k * dim;
+            for (int d = 0; d < this.dim; d++) {
+                dot += vector[d] * simhashVectors[repIndex][offset + d];
+            }
+            if (dot > 0) {
+                clusterId |= (1 << k);
+            }
+        }
+        return clusterId;
+    }
+
+    private int hammingDistance(int a, int b) {
+        return Integer.bitCount(a ^ b);
+    }
+
+    /**
+     * Core FDE encoding.
+     *
+     * @param vectors    multi-vector input, shape [numVectors][dim]
+     * @param fillEmpty  if true, fill empty clusters from Hamming-nearest non-empty cluster (document mode)
+     * @param normalize  if true, normalize cluster centers by count (document mode)
+     * @return FDE vector of length getEmbeddingSize()
+     */
+    public float[] process(double[][] vectors, boolean fillEmpty, boolean normalize) {
+        int nVectors = vectors.length;
+        float[] output = new float[getEmbeddingSize()];
+        int outOffset = 0;
+        double scale = 1.0 / Math.sqrt(dimProj);
+
+        for (int r = 0; r < rReps; r++) {
+            double[][] centers = new double[numPartitions][dim];
+            int[] counts = new int[numPartitions];
+            List<List<Integer>> clusterVecIndices = new ArrayList<>(numPartitions);
+            for (int i = 0; i < numPartitions; i++) {
+                clusterVecIndices.add(new ArrayList<>());
+            }
+
+            // Assign vectors to clusters and accumulate
+            for (int v = 0; v < nVectors; v++) {
+                int cid = getClusterId(vectors[v], r);
+                counts[cid]++;
+                clusterVecIndices.get(cid).add(v);
+                for (int d = 0; d < dim; d++) {
+                    centers[cid][d] += vectors[v][d];
+                }
+            }
+
+            // Normalize by count (document mode)
+            if (normalize) {
+                for (int c = 0; c < numPartitions; c++) {
+                    if (counts[c] > 0) {
+                        for (int d = 0; d < dim; d++) {
+                            centers[c][d] /= counts[c];
+                        }
+                    }
+                }
+            }
+
+            // Fill empty clusters from Hamming-nearest non-empty cluster (document mode)
+            if (fillEmpty) {
+                for (int c = 0; c < numPartitions; c++) {
+                    if (counts[c] == 0) {
+                        int nearest = -1;
+                        int minDist = Integer.MAX_VALUE;
+                        for (int other = 0; other < numPartitions; other++) {
+                            if (counts[other] > 0) {
+                                int dist = hammingDistance(c, other);
+                                if (dist < minDist) {
+                                    minDist = dist;
+                                    nearest = other;
+                                }
+                            }
+                        }
+                        if (nearest >= 0) {
+                            int vecIdx = clusterVecIndices.get(nearest).get(0);
+                            System.arraycopy(vectors[vecIdx], 0, centers[c], 0, dim);
+                        }
+                    }
+                }
+            }
+
+            // Random projection: centers[c] (dim) x projection (dim x dimProj) -> projected (dimProj)
+            for (int c = 0; c < numPartitions; c++) {
+                for (int j = 0; j < dimProj; j++) {
+                    double val = 0;
+                    for (int d = 0; d < dim; d++) {
+                        val += centers[c][d] * dimReductionProjections[r][d][j];
+                    }
+                    output[outOffset++] = (float) (scale * val);
+                }
+            }
+        }
+        return output;
+    }
+
+    /**
+     * Encode document multi-vectors into a single FDE vector.
+     * Normalizes cluster centers and fills empty clusters.
+     */
+    public float[] processDocument(double[][] vectors) {
+        return process(vectors, true, true);
+    }
+
+    /**
+     * Encode query multi-vectors into a single FDE vector.
+     * No normalization, no empty cluster filling.
+     */
+    public float[] processQuery(double[][] vectors) {
+        return process(vectors, false, false);
+    }
+}