Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions redisvl/query/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,20 @@
class Vector(BaseModel):
"""
Simple object containing the necessary arguments to perform a multi vector query.

Args:
vector: The vector values as a list of floats or bytes
field_name: The name of the vector field to search
dtype: The data type of the vector (default: "float32")
weight: The weight for this vector in the combined score (default: 1.0)
max_distance: The maximum distance for vector range search (default: 2.0, range: [0.0, 2.0])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for providing the range!

"""

vector: Union[List[float], bytes]
field_name: str
dtype: str = "float32"
weight: float = 1.0
max_distance: float = 2.0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of a custom validation method, you could validate this with Pydantic's built-in Field (you might have to from pydantic import Field)

Suggested change
max_distance: float = 2.0
max_distance: float = Field(default=2.0, ge=0.0, le=2.0)


@field_validator("dtype")
@classmethod
Expand All @@ -36,6 +44,15 @@ def validate_dtype(cls, dtype: str) -> str:
)
return dtype

@field_validator("max_distance")
@classmethod
def validate_max_distance(cls, max_distance: float) -> float:
if not isinstance(max_distance, (float, int)):
raise ValueError("max_distance must be a value between 0.0 and 2.0")
Comment on lines +50 to +51
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type check isinstance(max_distance, (float, int)) on line 43 is redundant because Pydantic's field validation already ensures the field is of type float. Additionally, this check will always pass since the field is declared as max_distance: float = 2.0. The more important validation is the range check on line 45, which should remain.

Suggested change
if not isinstance(max_distance, (float, int)):
raise ValueError("max_distance must be a value between 0.0 and 2.0")

Copilot uses AI. Check for mistakes.
if max_distance < 0.0 or max_distance > 2.0:
raise ValueError("max_distance must be a value between 0.0 and 2.0")
return max_distance

@model_validator(mode="after")
def validate_vector(self) -> Self:
"""If the vector passed in is an array of float convert it to a byte string."""
Expand Down Expand Up @@ -361,14 +378,14 @@ def _build_query_string(self) -> str:

# base KNN query
range_queries = []
for i, (vector, field) in enumerate(
[(v.vector, v.field_name) for v in self._vectors]
for i, (vector, field, max_dist) in enumerate(
[(v.vector, v.field_name, v.max_distance) for v in self._vectors]
):
range_queries.append(
f"@{field}:[VECTOR_RANGE 2.0 $vector_{i}]=>{{$YIELD_DISTANCE_AS: distance_{i}}}"
f"@{field}:[VECTOR_RANGE {max_dist} $vector_{i}]=>{{$YIELD_DISTANCE_AS: distance_{i}}}"
)

range_query = " | ".join(range_queries)
range_query = " AND ".join(range_queries)

filter_expression = self._filter_expression
if isinstance(self._filter_expression, FilterExpression):
Expand Down
57 changes: 57 additions & 0 deletions tests/integration/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,63 @@ def test_multivector_query_datatypes(index):
) # allow for small floating point error


# paramatrized format is ((max_distance_1, max_distance_2), expected_num_results)
@pytest.mark.parametrize(
"distances_and_results",
[
[(0.2, 0.2), 0],
[(0.9, 0.2), 0],
[(0.35, 0.5), 1],
[(0.2, 0.9), 2],
[(0.3, 1.0), 3],
[(1.3, 1.9), 6],
],
)
def test_multivector_query_max_distances(index, distances_and_results):
skip_if_redis_version_below(index.client, "7.2.0")

vector_vals = [[0.1, 0.2, 0.5], [1.2, 0.3, -0.4, 0.7, 0.2]]
vector_fields = ["user_embedding", "image_embedding"]
distances, num_results = distances_and_results
return_fields = [
"distance_0",
"distance_1",
"score_0",
"score_1",
"user_embedding",
"image_embedding",
]

vectors = []
for vector, field, distance in zip(vector_vals, vector_fields, distances):
vectors.append(Vector(vector=vector, field_name=field, max_distance=distance))

multi_query = MultiVectorQuery(
vectors=vectors,
return_fields=return_fields,
num_results=10,
)
results = index.query(multi_query)

# verify we get the right number of total results
assert len(results) == num_results

# verify we're filtering vectors based on max_distances
for i in range(len(results)):
assert float(results[i]["distance_0"]) <= distances[0]
assert float(results[i]["distance_1"]) <= distances[1]

# check we're indeed filtering on both distances and not just the lesser of the two
if results:
first_distances = [float(result["distance_0"]) for result in results]
second_distances = [float(result["distance_1"]) for result in results]

# this test only applies for our specific test case values
assert (max(first_distances) > distances[1]) or (
max(second_distances) > distances[0]
)


def test_multivector_query_mixed_index(index):
# test that we can do multi vector queries on indices with both a 'flat' and 'hnsw' index
skip_if_redis_version_below(index.client, "7.2.0")
Expand Down
2 changes: 2 additions & 0 deletions tests/integration/test_redis_cluster_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def test_search_index_cluster_info(redis_cluster_url):
finally:
index.delete(drop=True)


@pytest.mark.requires_cluster
@pytest.mark.asyncio
async def test_async_search_index_cluster_info(redis_cluster_url):
Expand All @@ -110,6 +111,7 @@ async def test_async_search_index_cluster_info(redis_cluster_url):
await index.delete(drop=True)
await client.aclose()


@pytest.mark.requires_cluster
@pytest.mark.asyncio
async def test_async_search_index_client(redis_cluster_url):
Expand Down
1 change: 1 addition & 0 deletions tests/integration/test_search_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ def test_search_index_delete(index):
assert not index.exists()
assert index.name not in convert_bytes(index.client.execute_command("FT._LIST"))


@pytest.mark.parametrize("num_docs", [0, 1, 5, 10, 2042])
def test_search_index_clear(index, num_docs):
index.create(overwrite=True, drop=True)
Expand Down
41 changes: 36 additions & 5 deletions tests/unit/test_aggregation_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ def test_multi_vector_query():
assert multivector_query._vectors[0].field_name == "field_1"
assert multivector_query._vectors[0].weight == 1.0
assert multivector_query._vectors[0].dtype == "float32"
assert multivector_query._vectors[0].max_distance == 2.0
assert multivector_query._filter_expression == None
assert multivector_query._num_results == 10
assert multivector_query._loadfields == []
Expand All @@ -325,10 +326,21 @@ def test_multi_vector_query():
vector_field_names = ["field_1", "field_2", "field_3", "field_4"]
weights = [0.2, 0.5, 0.6, 0.1]
dtypes = ["float32", "float32", "float32", "float32"]
distances = [2.0, 1.5, 0.4, 0.01]

args = []
for vec, field, weight, dtype in zip(vectors, vector_field_names, weights, dtypes):
args.append(Vector(vector=vec, field_name=field, weight=weight, dtype=dtype))
for vec, field, weight, dtype, distance in zip(
vectors, vector_field_names, weights, dtypes, distances
):
args.append(
Vector(
vector=vec,
field_name=field,
weight=weight,
dtype=dtype,
max_distance=distance,
)
)

multivector_query = MultiVectorQuery(vectors=args)

Expand Down Expand Up @@ -358,16 +370,28 @@ def test_multi_vector_query_string():
field_2 = "image embedding"
weight_1 = 0.2
weight_2 = 0.7
max_distance_1 = 0.7
max_distance_2 = 1.8
multi_vector_query = MultiVectorQuery(
vectors=[
Vector(vector=sample_vector_2, field_name=field_1, weight=weight_1),
Vector(vector=sample_vector_3, field_name=field_2, weight=weight_2),
Vector(
vector=sample_vector_2,
field_name=field_1,
weight=weight_1,
max_distance=max_distance_1,
),
Vector(
vector=sample_vector_3,
field_name=field_2,
weight=weight_2,
max_distance=max_distance_2,
),
]
)

assert (
str(multi_vector_query)
== f"@{field_1}:[VECTOR_RANGE 2.0 $vector_0]=>{{$YIELD_DISTANCE_AS: distance_0}} | @{field_2}:[VECTOR_RANGE 2.0 $vector_1]=>{{$YIELD_DISTANCE_AS: distance_1}} SCORER TFIDF DIALECT 2 APPLY (2 - @distance_0)/2 AS score_0 APPLY (2 - @distance_1)/2 AS score_1 APPLY @score_0 * {weight_1} + @score_1 * {weight_2} AS combined_score SORTBY 2 @combined_score DESC MAX 10"
== f"@{field_1}:[VECTOR_RANGE {max_distance_1} $vector_0]=>{{$YIELD_DISTANCE_AS: distance_0}} AND @{field_2}:[VECTOR_RANGE {max_distance_2} $vector_1]=>{{$YIELD_DISTANCE_AS: distance_1}} SCORER TFIDF DIALECT 2 APPLY (2 - @distance_0)/2 AS score_0 APPLY (2 - @distance_1)/2 AS score_1 APPLY @score_0 * {weight_1} + @score_1 * {weight_2} AS combined_score SORTBY 2 @combined_score DESC MAX 10"
)


Expand Down Expand Up @@ -411,6 +435,13 @@ def test_vector_object_validation():
vec = Vector(vector=sample_vector, field_name="text embedding", dtype=dtype)
assert isinstance(vec, Vector)

# max_distance is bounded to [0, 2.0]
for distance in [-0.1, 2.001, 35, -float("inf"), +float("inf")]:
with pytest.raises(ValueError):
vec = Vector(
vector=sample_vector, field_name="text embedding", max_distance=distance
)


def test_vector_object_handles_byte_conversion():
# test that passing an array of floats gets converted to bytes
Expand Down