Skip to content

Commit c086d09

Browse files
authored
.Net: [MEVD] SQL Server approximate vector search (#13573)
Closes #13270
1 parent 9f2dafc commit c086d09

File tree

6 files changed

+615
-46
lines changed

6 files changed

+615
-46
lines changed

dotnet/src/VectorData/SqlServer/SqlServerCollection.cs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,18 +116,24 @@ public override Task EnsureCollectionExistsAsync(CancellationToken cancellationT
116116
private async Task CreateCollectionAsync(bool ifNotExists, CancellationToken cancellationToken)
117117
{
118118
using SqlConnection connection = new(this._connectionString);
119-
using SqlCommand command = SqlServerCommandBuilder.CreateTable(
119+
List<SqlCommand> commands = SqlServerCommandBuilder.CreateTable(
120120
connection,
121121
this._schema,
122122
this.Name,
123123
ifNotExists,
124124
this._model);
125125

126-
await connection.ExecuteWithErrorHandlingAsync(
127-
this._collectionMetadata,
128-
"CreateCollection",
129-
() => command.ExecuteNonQueryAsync(cancellationToken),
130-
cancellationToken).ConfigureAwait(false);
126+
foreach (SqlCommand command in commands)
127+
{
128+
using (command)
129+
{
130+
await connection.ExecuteWithErrorHandlingAsync(
131+
this._collectionMetadata,
132+
"CreateCollection",
133+
() => command.ExecuteNonQueryAsync(cancellationToken),
134+
cancellationToken).ConfigureAwait(false);
135+
}
136+
}
131137
}
132138

133139
/// <inheritdoc/>

dotnet/src/VectorData/SqlServer/SqlServerCommandBuilder.cs

Lines changed: 173 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@ namespace Microsoft.SemanticKernel.Connectors.SqlServer;
1818

1919
internal static class SqlServerCommandBuilder
2020
{
21-
internal static SqlCommand CreateTable(
21+
internal static List<SqlCommand> CreateTable(
2222
SqlConnection connection,
2323
string? schema,
2424
string tableName,
2525
bool ifNotExists,
2626
CollectionModel model)
2727
{
28+
List<SqlCommand> commands = [];
29+
2830
StringBuilder sb = new(200);
2931
if (ifNotExists)
3032
{
@@ -90,17 +92,6 @@ internal static SqlCommand CreateTable(
9092
}
9193
}
9294

93-
foreach (var vectorProperty in model.VectorProperties)
94-
{
95-
switch (vectorProperty.IndexKind)
96-
{
97-
case IndexKind.Flat or null or "": // TODO: Move to early validation
98-
break;
99-
default:
100-
throw new NotSupportedException($"Index kind {vectorProperty.IndexKind} is not supported.");
101-
}
102-
}
103-
10495
// Create full-text catalog and index for properties marked as IsFullTextIndexed
10596
var fullTextProperties = new List<DataPropertyModel>();
10697
foreach (var dataProperty in model.DataProperties)
@@ -145,7 +136,45 @@ internal static SqlCommand CreateTable(
145136

146137
sb.Append("END;");
147138

148-
return connection.CreateCommand(sb);
139+
commands.Add(connection.CreateCommand(sb));
140+
141+
// CREATE VECTOR INDEX must be in a separate batch from CREATE TABLE.
142+
// It is also a preview feature in SQL Server 2025, requiring PREVIEW_FEATURES to be enabled.
143+
bool hasVectorIndex = false;
144+
foreach (var vectorProperty in model.VectorProperties)
145+
{
146+
switch (vectorProperty.IndexKind)
147+
{
148+
case IndexKind.Flat or null or "":
149+
continue;
150+
151+
case IndexKind.DiskAnn:
152+
if (!hasVectorIndex)
153+
{
154+
SqlCommand enablePreview = connection.CreateCommand();
155+
enablePreview.CommandText = "ALTER DATABASE SCOPED CONFIGURATION SET PREVIEW_FEATURES = ON;";
156+
commands.Add(enablePreview);
157+
hasVectorIndex = true;
158+
}
159+
160+
string distanceFunction = vectorProperty.DistanceFunction ?? DistanceFunction.CosineDistance;
161+
(string distanceMetric, _) = MapDistanceFunction(distanceFunction);
162+
163+
StringBuilder vectorIndexSb = new(200);
164+
vectorIndexSb.Append("CREATE VECTOR INDEX ");
165+
vectorIndexSb.AppendIndexName(tableName, vectorProperty.StorageName);
166+
vectorIndexSb.Append(" ON ").AppendTableName(schema, tableName);
167+
vectorIndexSb.Append('(').AppendIdentifier(vectorProperty.StorageName).Append(')');
168+
vectorIndexSb.Append(" WITH (METRIC = '").Append(distanceMetric).AppendLine("', TYPE = 'DISKANN');");
169+
commands.Add(connection.CreateCommand(vectorIndexSb));
170+
break;
171+
172+
default:
173+
throw new NotSupportedException($"Index kind '{vectorProperty.IndexKind}' is not supported by the SQL Server connector.");
174+
}
175+
}
176+
177+
return commands;
149178
}
150179

151180
internal static SqlCommand DropTableIfExists(SqlConnection connection, string? schema, string tableName)
@@ -414,6 +443,21 @@ internal static SqlCommand SelectVector<TRecord>(
414443
string distanceFunction = vectorProperty.DistanceFunction ?? DistanceFunction.CosineDistance;
415444
(string distanceMetric, string sorting) = MapDistanceFunction(distanceFunction);
416445

446+
return UseVectorSearch(vectorProperty)
447+
? SelectVectorWithVectorSearch(connection, schema, tableName, vectorProperty, model, top, options, vector, distanceMetric, sorting)
448+
: SelectVectorWithVectorDistance(connection, schema, tableName, vectorProperty, model, top, options, vector, distanceMetric, sorting);
449+
}
450+
451+
private static SqlCommand SelectVectorWithVectorDistance<TRecord>(
452+
SqlConnection connection, string? schema, string tableName,
453+
VectorPropertyModel vectorProperty,
454+
CollectionModel model,
455+
int top,
456+
VectorSearchOptions<TRecord> options,
457+
SqlVector<float> vector,
458+
string distanceMetric,
459+
string sorting)
460+
{
417461
SqlCommand command = connection.CreateCommand();
418462
command.Parameters.AddWithValue("@vector", vector);
419463

@@ -467,6 +511,60 @@ internal static SqlCommand SelectVector<TRecord>(
467511
return command;
468512
}
469513

514+
/// <summary>
515+
/// Generates a SELECT query using the VECTOR_SEARCH() function for approximate nearest neighbor search
516+
/// when the vector property has a vector index (e.g. DiskANN).
517+
/// </summary>
518+
private static SqlCommand SelectVectorWithVectorSearch<TRecord>(
519+
SqlConnection connection, string? schema, string tableName,
520+
VectorPropertyModel vectorProperty,
521+
CollectionModel model,
522+
int top,
523+
VectorSearchOptions<TRecord> options,
524+
SqlVector<float> vector,
525+
string distanceMetric,
526+
string sorting)
527+
{
528+
// VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first,
529+
// then predicates are applied). Pre-filtering is not supported.
530+
if (options.Filter is not null)
531+
{
532+
throw new NotSupportedException(
533+
"Filtering is not supported with approximate vector search (VECTOR_SEARCH). " +
534+
"Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE.");
535+
}
536+
537+
SqlCommand command = connection.CreateCommand();
538+
command.Parameters.AddWithValue("@vector", vector);
539+
540+
StringBuilder sb = new(300);
541+
542+
// VECTOR_SEARCH returns all columns from the table plus a 'distance' column.
543+
// We select the needed columns from the table alias and alias 'distance' as 'score'.
544+
sb.Append("SELECT ");
545+
sb.AppendIdentifiers(model.Properties, prefix: "t.", includeVectors: options.IncludeVectors);
546+
sb.AppendLine(",");
547+
sb.AppendLine("s.[distance] AS [score]");
548+
sb.Append("FROM VECTOR_SEARCH(TABLE = ");
549+
sb.AppendTableName(schema, tableName);
550+
sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName);
551+
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\'');
552+
sb.Append(", TOP_N = ").Append(top + options.Skip).AppendLine(") AS s");
553+
554+
if (options.ScoreThreshold is not null)
555+
{
556+
command.Parameters.AddWithValue("@scoreThreshold", options.ScoreThreshold!.Value);
557+
sb.AppendLine("WHERE s.[distance] <= @scoreThreshold");
558+
}
559+
560+
sb.AppendFormat("ORDER BY [score] {0}", sorting);
561+
sb.AppendLine();
562+
sb.AppendFormat("OFFSET {0} ROWS FETCH NEXT {1} ROWS ONLY;", options.Skip, top);
563+
564+
command.CommandText = sb.ToString();
565+
return command;
566+
}
567+
470568
internal static SqlCommand SelectHybrid<TRecord>(
471569
SqlConnection connection, string? schema, string tableName,
472570
VectorPropertyModel vectorProperty,
@@ -477,6 +575,17 @@ internal static SqlCommand SelectHybrid<TRecord>(
477575
SqlVector<float> vector,
478576
string keywords)
479577
{
578+
bool useVectorSearch = UseVectorSearch(vectorProperty);
579+
580+
// VECTOR_SEARCH() currently only supports post-filtering (TOP_N candidates are returned first,
581+
// then predicates are applied). Pre-filtering is not supported.
582+
if (useVectorSearch && options.Filter is not null)
583+
{
584+
throw new NotSupportedException(
585+
"Filtering is not supported with approximate vector search (VECTOR_SEARCH). " +
586+
"Remove the filter or use IndexKind.Flat for exact search with VECTOR_DISTANCE.");
587+
}
588+
480589
string distanceFunction = vectorProperty.DistanceFunction ?? DistanceFunction.CosineDistance;
481590
(string distanceMetric, _) = MapDistanceFunction(distanceFunction);
482591

@@ -531,36 +640,54 @@ internal static SqlCommand SelectHybrid<TRecord>(
531640
sb.AppendLine("),");
532641

533642
// CTE 2: Semantic/vector search
534-
sb.AppendLine("semantic_search AS (");
535-
sb.AppendLine(" SELECT TOP(@candidateCount)");
536-
sb.Append(" ").AppendIdentifier(model.KeyProperty.StorageName).AppendLine(",");
537-
sb.AppendLine(" RANK() OVER (ORDER BY cosine_distance) AS [rank]");
538-
sb.AppendLine(" FROM (");
539-
sb.AppendLine(" SELECT TOP(@candidateCount)");
540-
sb.Append(" w.").AppendIdentifier(model.KeyProperty.StorageName).AppendLine(",");
541-
sb.Append(" VECTOR_DISTANCE('").Append(distanceMetric).Append("', ")
542-
.AppendIdentifier(vectorProperty.StorageName)
543-
.Append(", CAST(@vector AS VECTOR(").Append(vector.Length).AppendLine("))) AS cosine_distance");
544-
sb.Append(" FROM ").AppendTableName(schema, tableName).AppendLine(" w");
545-
546-
// Apply filter to semantic search if specified
547-
if (options.Filter is not null)
643+
if (useVectorSearch)
548644
{
549-
// We need to re-translate the filter for the semantic search CTE
550-
// The parameters are already added from keyword search, so we start fresh for this CTE
551-
int filterParamStart = command.Parameters.Count;
552-
SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: filterParamStart, tableAlias: "w");
553-
translator.Translate(appendWhere: true);
554-
foreach (object parameter in translator.ParameterValues)
645+
// Use VECTOR_SEARCH() for approximate nearest neighbor search with a vector index
646+
sb.AppendLine("semantic_search AS (");
647+
sb.AppendLine(" SELECT TOP(@candidateCount)");
648+
sb.Append(" t.").AppendIdentifier(model.KeyProperty.StorageName).AppendLine(",");
649+
sb.AppendLine(" RANK() OVER (ORDER BY s.[distance]) AS [rank]");
650+
sb.AppendLine(" FROM VECTOR_SEARCH(TABLE = ");
651+
sb.Append(" ").AppendTableName(schema, tableName);
652+
sb.Append(" AS t, COLUMN = ").AppendIdentifier(vectorProperty.StorageName);
653+
sb.Append(", SIMILAR_TO = @vector, METRIC = '").Append(distanceMetric).Append('\'');
654+
sb.Append(", TOP_N = @candidateCount").AppendLine(") AS s");
655+
sb.AppendLine("),");
656+
}
657+
else
658+
{
659+
// Use VECTOR_DISTANCE() for exact brute-force search (flat index / no index)
660+
sb.AppendLine("semantic_search AS (");
661+
sb.AppendLine(" SELECT TOP(@candidateCount)");
662+
sb.Append(" ").AppendIdentifier(model.KeyProperty.StorageName).AppendLine(",");
663+
sb.AppendLine(" RANK() OVER (ORDER BY cosine_distance) AS [rank]");
664+
sb.AppendLine(" FROM (");
665+
sb.AppendLine(" SELECT TOP(@candidateCount)");
666+
sb.Append(" w.").AppendIdentifier(model.KeyProperty.StorageName).AppendLine(",");
667+
sb.Append(" VECTOR_DISTANCE('").Append(distanceMetric).Append("', ")
668+
.AppendIdentifier(vectorProperty.StorageName)
669+
.Append(", CAST(@vector AS VECTOR(").Append(vector.Length).AppendLine("))) AS cosine_distance");
670+
sb.Append(" FROM ").AppendTableName(schema, tableName).AppendLine(" w");
671+
672+
// Apply filter to semantic search if specified
673+
if (options.Filter is not null)
555674
{
556-
command.AddParameter(property: null, $"@_{filterParamStart++}", parameter);
675+
// We need to re-translate the filter for the semantic search CTE
676+
// The parameters are already added from keyword search, so we start fresh for this CTE
677+
int filterParamStart = command.Parameters.Count;
678+
SqlServerFilterTranslator translator = new(model, options.Filter, sb, startParamIndex: filterParamStart, tableAlias: "w");
679+
translator.Translate(appendWhere: true);
680+
foreach (object parameter in translator.ParameterValues)
681+
{
682+
command.AddParameter(property: null, $"@_{filterParamStart++}", parameter);
683+
}
684+
sb.AppendLine();
557685
}
558-
sb.AppendLine();
559-
}
560686

561-
sb.AppendLine(" ORDER BY cosine_distance");
562-
sb.AppendLine(" ) AS similar_documents");
563-
sb.AppendLine("),");
687+
sb.AppendLine(" ORDER BY cosine_distance");
688+
sb.AppendLine(" ) AS similar_documents");
689+
sb.AppendLine("),");
690+
}
564691

565692
// CTE 3: Combined results with RRF scoring
566693
sb.AppendLine("hybrid_result AS (");
@@ -901,4 +1028,11 @@ private static string Map(PropertyModel property)
9011028
DistanceFunction.NegativeDotProductSimilarity => ("DOT", "ASC"),
9021029
_ => throw new NotSupportedException($"Distance function {name} is not supported.")
9031030
};
1031+
1032+
/// <summary>
1033+
/// Returns whether VECTOR_SEARCH() (approximate/indexed search) should be used for the given vector property,
1034+
/// as opposed to VECTOR_DISTANCE() (exact/brute-force search).
1035+
/// </summary>
1036+
private static bool UseVectorSearch(VectorPropertyModel vectorProperty)
1037+
=> vectorProperty.IndexKind is not (null or "" or IndexKind.Flat);
9041038
}

dotnet/src/VectorData/SqlServer/SqlServerModelBuilder.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@
55
using System.Diagnostics.CodeAnalysis;
66
using Microsoft.Data.SqlTypes;
77
using Microsoft.Extensions.AI;
8+
using Microsoft.Extensions.VectorData;
89
using Microsoft.Extensions.VectorData.ProviderServices;
910

1011
namespace Microsoft.SemanticKernel.Connectors.SqlServer;
1112

1213
internal class SqlServerModelBuilder() : CollectionModelBuilder(s_modelBuildingOptions)
1314
{
1415
internal const string SupportedVectorTypes = "SqlVector<float>, ReadOnlyMemory<float>, Embedding<float>, float[]";
16+
internal const string SupportedIndexKinds = $"{IndexKind.Flat}, {IndexKind.DiskAnn}";
1517

1618
private static readonly CollectionModelBuildingOptions s_modelBuildingOptions = new()
1719
{
@@ -33,6 +35,23 @@ protected override void ValidateKeyProperty(KeyPropertyModel keyProperty)
3335
}
3436
}
3537

38+
protected override void ValidateProperty(PropertyModel propertyModel, VectorStoreCollectionDefinition? definition)
39+
{
40+
base.ValidateProperty(propertyModel, definition);
41+
42+
if (propertyModel is VectorPropertyModel vectorProperty)
43+
{
44+
switch (vectorProperty.IndexKind)
45+
{
46+
case IndexKind.Flat or IndexKind.DiskAnn or null or "":
47+
break;
48+
default:
49+
throw new NotSupportedException(
50+
$"Index kind '{vectorProperty.IndexKind}' is not supported by the SQL Server connector. Supported index kinds: {SupportedIndexKinds}");
51+
}
52+
}
53+
}
54+
3655
protected override bool IsDataPropertyTypeValid(Type type, [NotNullWhen(false)] out string? supportedTypes)
3756
{
3857
supportedTypes = "string, short, int, long, double, float, decimal, bool, DateTime, DateTimeOffset, DateOnly, TimeOnly, Guid, byte[], string[], List<string>";

0 commit comments

Comments
 (0)