Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
862f6aa
Add MUVERA processors for multi-vector search
prasadnu Mar 13, 2026
eb2b695
Merge branch 'main' into feature/muvera-processors
navneet1v Mar 14, 2026
19384aa
Add MUVERA processor integration tests
prasadnu Mar 14, 2026
29f46f3
Merge branch 'main' into feature/muvera-processors
navneet1v Mar 17, 2026
ddaaaf1
Fix IT compilation: rename deleteIndex helper
prasadnu Mar 17, 2026
bab7366
Address review nits: clean up imports
prasadnu Mar 17, 2026
ba68cbd
Validate MUVERA hyperparameters bounds
prasadnu Mar 20, 2026
f0e3b4b
Fix spotless formatting in MuveraEncoderTests
prasadnu Mar 20, 2026
3e75b93
Merge branch 'main' into feature/muvera-processors
navneet1v Mar 27, 2026
8483eee
Add ignoreMissing support to MuveraIngestProcessor with unit tests
prasadnu Apr 2, 2026
8d1a2a4
Cap prefetchK to 10,000 in MuveraSearchRequestProcessor
prasadnu Apr 2, 2026
beb6241
Refactor processRequest into validateUserRequest/extractRequestParams…
prasadnu Apr 2, 2026
13917a3
Move readLongProperty to MuveraProcessorUtils, add ordering and ignor…
prasadnu Apr 2, 2026
14ec585
Add ignore_failure search pipeline IT and ignore_missing=false ingest IT
prasadnu Apr 2, 2026
6ea3388
Convert simhash/projection to float, reuse arrays in process loop, ad…
prasadnu Apr 17, 2026
8a3073b
Refactor search processor to template query approach with context var…
prasadnu Apr 19, 2026
1379664
Fix template query extraction: use TemplateQueryBuilder.getContent() …
prasadnu Apr 19, 2026
fdfb796
Add debug logging to trace template query processing
prasadnu Apr 19, 2026
f9cc137
Fix missing closing brace in MuveraIngestProcessor
prasadnu Apr 19, 2026
979eb1b
Add MuveraProcessorUtils with shared readLongProperty
prasadnu Apr 19, 2026
dd2b92f
Remove debug logging from search request processor
prasadnu Apr 19, 2026
d45f6c9
Update search request processor tests for template query format
prasadnu Apr 19, 2026
99b5b68
Update integration tests for template query format
prasadnu Apr 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased 3.6](https://github.com/opensearch-project/k-NN/compare/main...HEAD)
### Features
* Add MUVERA ingest and search request processors for multi-vector FDE encoding [#3163](https://github.com/opensearch-project/k-NN/issues/3163)
* Support Lucene BBQ Flat for 1 bit [#3154](https://github.com/opensearch-project/k-NN/pull/3154)
* Add 32x support for SQ encoder on Faiss [#3193](https://github.com/opensearch-project/k-NN/pull/3193)
* Faiss SQ 1bit MOS changes [#3182](https://github.com/opensearch-project/k-NN/pull/3182)
Expand Down
19 changes: 18 additions & 1 deletion src/main/java/org/opensearch/knn/plugin/KNNPlugin.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@
import org.opensearch.knn.quantization.models.quantizationState.QuantizationStateCache;
import org.opensearch.knn.search.extension.MMRSearchExtBuilder;

import org.opensearch.ingest.Processor;
import org.opensearch.knn.processor.muvera.MuveraIngestProcessor;
import org.opensearch.knn.processor.muvera.MuveraSearchRequestProcessor;
import org.opensearch.knn.search.processor.mmr.MMRKnnQueryTransformer;
import org.opensearch.knn.search.processor.mmr.MMROverSampleProcessor;
import org.opensearch.knn.search.processor.mmr.MMRQueryTransformer;
Expand All @@ -106,6 +109,7 @@
import org.opensearch.plugins.ActionPlugin;
import org.opensearch.plugins.EnginePlugin;
import org.opensearch.plugins.ExtensiblePlugin;
import org.opensearch.plugins.IngestPlugin;
import org.opensearch.plugins.MapperPlugin;
import org.opensearch.plugins.Plugin;
import org.opensearch.plugins.ReloadablePlugin;
Expand Down Expand Up @@ -190,7 +194,8 @@ public class KNNPlugin extends Plugin
ExtensiblePlugin,
SystemIndexPlugin,
ReloadablePlugin,
SearchPipelinePlugin {
SearchPipelinePlugin,
IngestPlugin {

public static final String LEGACY_KNN_BASE_URI = "/_opendistro/_knn";
public static final String KNN_BASE_URI = "/_plugins/_knn";
Expand Down Expand Up @@ -518,4 +523,16 @@ public Map<String, SystemGeneratedProcessor.SystemGeneratedFactory<SearchRespons
) {
return Map.of(MMRRerankProcessor.MMRRerankProcessorFactory.TYPE, new MMRRerankProcessor.MMRRerankProcessorFactory());
}

@Override
public Map<String, Processor.Factory> getProcessors(Processor.Parameters parameters) {
return Map.of(MuveraIngestProcessor.TYPE, new MuveraIngestProcessor.Factory());
}

@Override
public Map<String, org.opensearch.search.pipeline.Processor.Factory<SearchRequestProcessor>> getRequestProcessors(
Parameters parameters
) {
return Map.of(MuveraSearchRequestProcessor.TYPE, new MuveraSearchRequestProcessor.Factory());
}
}
221 changes: 221 additions & 0 deletions src/main/java/org/opensearch/knn/processor/muvera/MuveraEncoder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.processor.muvera;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

/**
* MUVERA (Multi-Vector Retrieval via Fixed Dimensional Encodings)
*
* Converts variable-length multi-vector embeddings (e.g. ColBERT/ColPali token embeddings)
* into fixed-size single vectors that approximate MaxSim scoring via dot product.
*
* The output FDE dimension = rReps * 2^kSim * dimProj.
*
* Document processing: normalizes cluster centers by count, fills empty clusters via Hamming nearest neighbor.
* Query processing: raw sum (no normalization), no empty cluster filling.
*
* The random seed must be identical between index time and query time to produce compatible encodings.
*/
public class MuveraEncoder {
private final int dim;
private final int kSim;
private final int dimProj;
private final int rReps;
private final int numPartitions;
private final double[][] simhashVectors;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason to use double instead of float? The return type signatures are all floats.
The operations for similarity match can benefit with SIMD/Panama calculations and the memory consumption can also reduce by half.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point on the memory savings. The internal arrays use double for numerical precision during accumulation — with ~1000 vectors per document, the cluster center sums can get large and float accumulation could introduce meaningful rounding errors. The output is cast to float at the end since that's what the knn_vector field stores.

That said, for the simhash and projection matrices (which are generated once at init), float would be fine. I'll convert those to float in a follow-up to reduce the encoder's memory footprint. The cluster center accumulation should stay double to avoid precision loss during summation.

Open to converting everything to float if you think the precision tradeoff is acceptable — happy to benchmark the quality impact.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the simhash and projection matrices (which are generated once at init), float would be fine. I'll convert those to float in a follow-up to reduce the encoder's memory footprint. The cluster center accumulation should stay double to avoid precision loss during summation.

I think this makes sense. Let's reduce the footprint as much as we can

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Converted simhash and projection matrices to float. Cluster center accumulation stays double to avoid precision loss during summation. See latest commit.

private final double[][][] dimReductionProjections;

/**
* Maximum allowed FDE dimension. Matches the k-NN engine max dimension limit (16,000)
* to ensure the FDE vector can be indexed in any supported engine.
*/
static final int MAX_FDE_DIMENSION = 16_000;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there chances people want to configure the max dimension? this can be a configuration setting in the processor ?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mingshl The 16,000 cap mirrors the k-NN engine's max vector dimension, going above it would fail at indexing regardless. Users already control the actual FDE size through the MUVERA parameters (dim, k_sim, dim_proj, r_reps). Adding a configurable max would let users set a tighter bound, but it feels like an edge case since the engine enforces the real ceiling. Happy to add it as an optional max_fde_dimension processor config if you think it's worth it though.


public MuveraEncoder(int dim, int kSim, int dimProj, int rReps, long seed) {
if (dim <= 0) {
throw new IllegalArgumentException("dim must be positive, got: " + dim);
}
if (kSim < 0) {
throw new IllegalArgumentException("k_sim must be non-negative, got: " + kSim);
}
if (dimProj <= 0) {
throw new IllegalArgumentException("dim_proj must be positive, got: " + dimProj);
}
if (rReps <= 0) {
throw new IllegalArgumentException("r_reps must be positive, got: " + rReps);
}

// Validate that the resulting FDE dimension does not exceed the k-NN engine limit.
// FDE dimension = rReps * 2^kSim * dimProj
long fdeDimension = (long) rReps * (1L << kSim) * dimProj;
if (fdeDimension > MAX_FDE_DIMENSION) {
throw new IllegalArgumentException(
"MUVERA parameters produce an FDE dimension of ["
+ fdeDimension
+ "] which exceeds the maximum allowed dimension of ["
+ MAX_FDE_DIMENSION
+ "]. Reduce r_reps, k_sim, or dim_proj. (r_reps="
+ rReps
+ " * 2^k_sim="
+ (1L << kSim)
+ " * dim_proj="
+ dimProj
+ ")"
);
}

this.dim = dim;
this.kSim = kSim;
this.dimProj = dimProj;
this.rReps = rReps;
this.numPartitions = 1 << kSim;
Comment thread
praveenMprasad marked this conversation as resolved.

Random rng = new Random(seed);

// SimHash hyperplanes: rReps sets of kSim hyperplanes, each dim-dimensional
simhashVectors = new double[rReps][kSim * dim];
for (int r = 0; r < rReps; r++) {
for (int i = 0; i < kSim * dim; i++) {
simhashVectors[r][i] = rng.nextGaussian();
}
}

// Random projection matrices with entries in {-1, +1}
dimReductionProjections = new double[rReps][dim][dimProj];
for (int r = 0; r < rReps; r++) {
for (int i = 0; i < dim; i++) {
for (int j = 0; j < dimProj; j++) {
dimReductionProjections[r][i][j] = rng.nextBoolean() ? 1.0 : -1.0;
}
}
}
}

/**
* Returns the output FDE dimension: rReps * 2^kSim * dimProj.
*/
public int getEmbeddingSize() {
return rReps * numPartitions * dimProj;
}

private int getClusterId(double[] vector, int repIndex) {
int clusterId = 0;
for (int k = 0; k < kSim; k++) {
double dot = 0;
int offset = k * dim;
for (int d = 0; d < this.dim; d++) {
dot += vector[d] * simhashVectors[repIndex][offset + d];
}
if (dot > 0) {
clusterId |= (1 << k);
}
}
return clusterId;
}

private int hammingDistance(int a, int b) {
return Integer.bitCount(a ^ b);
}

/**
* Core FDE encoding.
*
* @param vectors multi-vector input, shape [numVectors][dim]
* @param fillEmpty if true, fill empty clusters from Hamming-nearest non-empty cluster (document mode)
* @param normalize if true, normalize cluster centers by count (document mode)
* @return FDE vector of length getEmbeddingSize()
*/
public float[] process(double[][] vectors, boolean fillEmpty, boolean normalize) {
int nVectors = vectors.length;
float[] output = new float[getEmbeddingSize()];
int outOffset = 0;
double scale = 1.0 / Math.sqrt(dimProj);

for (int r = 0; r < rReps; r++) {
double[][] centers = new double[numPartitions][dim];
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems too heavy. Can we optimize this by reusing the arrays somehow? Allocation for every loop can get expensive.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pre-allocated centers, counts, and clusterVecIndices arrays outside the loop and reset them per repetition. No more per-loop allocation.

int[] counts = new int[numPartitions];
List<List<Integer>> clusterVecIndices = new ArrayList<>(numPartitions);
for (int i = 0; i < numPartitions; i++) {
clusterVecIndices.add(new ArrayList<>());
}

// Assign vectors to clusters and accumulate
for (int v = 0; v < nVectors; v++) {
int cid = getClusterId(vectors[v], r);
counts[cid]++;
clusterVecIndices.get(cid).add(v);
for (int d = 0; d < dim; d++) {
centers[cid][d] += vectors[v][d];
}
}

// Normalize by count (document mode)
if (normalize) {
for (int c = 0; c < numPartitions; c++) {
if (counts[c] > 0) {
for (int d = 0; d < dim; d++) {
centers[c][d] /= counts[c];
}
}
}
}

// Fill empty clusters from Hamming-nearest non-empty cluster (document mode)
if (fillEmpty) {
for (int c = 0; c < numPartitions; c++) {
if (counts[c] == 0) {
int nearest = -1;
int minDist = Integer.MAX_VALUE;
for (int other = 0; other < numPartitions; other++) {
if (counts[other] > 0) {
int dist = hammingDistance(c, other);
if (dist < minDist) {
minDist = dist;
nearest = other;
}
}
}
if (nearest >= 0) {
int vecIdx = clusterVecIndices.get(nearest).get(0);
System.arraycopy(vectors[vecIdx], 0, centers[c], 0, dim);
}
}
}
}

// Random projection: centers[c] (dim) x projection (dim x dimProj) -> projected (dimProj)
for (int c = 0; c < numPartitions; c++) {
for (int j = 0; j < dimProj; j++) {
double val = 0;
for (int d = 0; d < dim; d++) {
val += centers[c][d] * dimReductionProjections[r][d][j];
}
output[outOffset++] = (float) (scale * val);
}
}
}
return output;
}

/**
* Encode document multi-vectors into a single FDE vector.
* Normalizes cluster centers and fills empty clusters.
*/
public float[] processDocument(double[][] vectors) {
return process(vectors, true, true);
}

/**
* Encode query multi-vectors into a single FDE vector.
* No normalization, no empty cluster filling.
*/
public float[] processQuery(double[][] vectors) {
return process(vectors, false, false);
}
}
Loading