Skip to content

Commit 931bd38

Browse files
committed
feat(repository): add TagID and IsRecommended fields to vector embeddings
- Introduced `TagID` and `IsRecommended` fields across various repository implementations (Elasticsearch, Milvus, Postgres, Qdrant, SQLite) to enhance document categorization and recommendation capabilities. - Updated the logic for handling `IsEnabled` to ensure it reflects the actual state from the source data. - Adjusted the knowledge service to incorporate the new `IsRecommended` field, improving the indexing process for knowledge chunks. These changes enhance the flexibility and functionality of document retrieval and indexing across the application.
1 parent 7b8a78a commit 931bd38

6 files changed

Lines changed: 25 additions & 6 deletions

File tree

internal/application/repository/retriever/elasticsearch/structs.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ type VectorEmbedding struct {
1515
ChunkID string `json:"chunk_id" gorm:"column:chunk_id"` // Unique ID of the text chunk
1616
KnowledgeID string `json:"knowledge_id" gorm:"column:knowledge_id"` // ID of the knowledge item
1717
KnowledgeBaseID string `json:"knowledge_base_id" gorm:"column:knowledge_base_id"` // ID of the knowledge base
18+
TagID string `json:"tag_id" gorm:"column:tag_id"` // Tag ID for categorization
1819
Embedding []float32 `json:"embedding" gorm:"column:embedding;not null"` // Vector embedding of the content
1920
IsEnabled bool `json:"is_enabled"` // Whether the chunk is enabled
21+
IsRecommended bool `json:"is_recommended"` // Whether the chunk is recommended
2022
}
2123

2224
// VectorEmbeddingWithScore extends VectorEmbedding with similarity score
@@ -34,7 +36,9 @@ func ToDBVectorEmbedding(embedding *types.IndexInfo, additionalParams map[string
3436
ChunkID: embedding.ChunkID,
3537
KnowledgeID: embedding.KnowledgeID,
3638
KnowledgeBaseID: embedding.KnowledgeBaseID,
37-
IsEnabled: true, // Default to enabled
39+
TagID: embedding.TagID,
40+
IsEnabled: embedding.IsEnabled,
41+
IsRecommended: embedding.IsRecommended,
3842
}
3943
// Add embedding data if available in additionalParams
4044
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), "embedding") {
@@ -65,8 +69,10 @@ func FromDBVectorEmbeddingWithScore(id string,
6569
ChunkID: embedding.ChunkID,
6670
KnowledgeID: embedding.KnowledgeID,
6771
KnowledgeBaseID: embedding.KnowledgeBaseID,
72+
TagID: embedding.TagID,
6873
Content: embedding.Content,
6974
Score: embedding.Score,
7075
MatchType: matchType,
76+
IsEnabled: embedding.IsEnabled,
7177
}
7278
}

internal/application/repository/retriever/milvus/repository.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,11 @@ func (m *milvusRepository) getBaseFilterForQuery(params types.RetrieveParams) (s
592592
Value: params.ExcludeChunkIDs,
593593
})
594594
}
595+
filters = append(filters, &universalFilterCondition{
596+
Field: fieldIsEnabled,
597+
Operator: operatorEqual,
598+
Value: true,
599+
})
595600
if len(filters) == 0 {
596601
return "", nil, nil
597602
}
@@ -919,7 +924,7 @@ func toMilvusVectorEmbedding(embedding *types.IndexInfo, additionalParams map[st
919924
KnowledgeID: embedding.KnowledgeID,
920925
KnowledgeBaseID: embedding.KnowledgeBaseID,
921926
TagID: embedding.TagID,
922-
IsEnabled: true, // Default to enabled
927+
IsEnabled: embedding.IsEnabled,
923928
}
924929
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), fieldEmbedding) {
925930
if embeddingMap, ok := additionalParams[fieldEmbedding].(map[string][]float32); ok {

internal/application/repository/retriever/postgres/structs.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func toDBVectorEmbedding(indexInfo *types.IndexInfo, additionalParams map[string
6666
KnowledgeBaseID: indexInfo.KnowledgeBaseID,
6767
TagID: indexInfo.TagID,
6868
Content: common.CleanInvalidUTF8(indexInfo.Content),
69-
IsEnabled: true, // Default to enabled
69+
IsEnabled: indexInfo.IsEnabled,
7070
}
7171
// Add embedding data if available in additionalParams
7272
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), "embedding") {

internal/application/repository/retriever/qdrant/repository.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -797,14 +797,19 @@ func (q *qdrantRepository) CopyIndices(ctx context.Context,
797797
targetSourceID = uuid.New().String()
798798
}
799799

800+
isEnabled := true
801+
if v, ok := payload[fieldIsEnabled]; ok {
802+
isEnabled = v.GetBoolValue()
803+
}
800804
newPayload := qdrant.NewValueMap(map[string]any{
801805
fieldContent: payload[fieldContent].GetStringValue(),
802806
fieldSourceID: targetSourceID,
803807
fieldSourceType: payload[fieldSourceType].GetIntegerValue(),
804808
fieldChunkID: targetChunkID,
805809
fieldKnowledgeID: targetKnowledgeID,
806810
fieldKnowledgeBaseID: targetKnowledgeBaseID,
807-
fieldIsEnabled: true,
811+
fieldTagID: payload[fieldTagID].GetStringValue(),
812+
fieldIsEnabled: isEnabled,
808813
})
809814

810815
var vectors *qdrant.Vectors
@@ -923,7 +928,7 @@ func toQdrantVectorEmbedding(embedding *types.IndexInfo, additionalParams map[st
923928
KnowledgeID: embedding.KnowledgeID,
924929
KnowledgeBaseID: embedding.KnowledgeBaseID,
925930
TagID: embedding.TagID,
926-
IsEnabled: true, // Default to enabled
931+
IsEnabled: embedding.IsEnabled,
927932
}
928933
if additionalParams != nil && slices.Contains(slices.Collect(maps.Keys(additionalParams)), fieldEmbedding) {
929934
if embeddingMap, ok := additionalParams[fieldEmbedding].(map[string][]float32); ok {

internal/application/repository/retriever/sqlite/repository.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ func (r *sqliteRepository) vectorRetrieve(ctx context.Context, params types.Retr
437437
// --- Internal helpers ---
438438

439439
func toSQLiteEmbedding(info *types.IndexInfo) *sqliteEmbedding {
440-
enabled := true
440+
enabled := info.IsEnabled
441441
return &sqliteEmbedding{
442442
SourceID: info.SourceID,
443443
SourceType: int(info.SourceType),

internal/application/service/knowledge.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6224,6 +6224,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
62246224
KnowledgeType: types.KnowledgeTypeFAQ,
62256225
TagID: chunk.TagID,
62266226
IsEnabled: chunk.IsEnabled,
6227+
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
62276228
},
62286229
}, nil
62296230
}
@@ -6252,6 +6253,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
62526253
KnowledgeType: types.KnowledgeTypeFAQ,
62536254
TagID: chunk.TagID,
62546255
IsEnabled: chunk.IsEnabled,
6256+
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
62556257
})
62566258

62576259
// 每个相似问创建一个索引项
@@ -6277,6 +6279,7 @@ func (s *knowledgeService) buildFAQIndexInfoList(
62776279
KnowledgeType: types.KnowledgeTypeFAQ,
62786280
TagID: chunk.TagID,
62796281
IsEnabled: chunk.IsEnabled,
6282+
IsRecommended: chunk.Flags.HasFlag(types.ChunkFlagRecommended),
62806283
})
62816284
}
62826285

0 commit comments

Comments
 (0)