Skip to content

Commit d9976f2

Browse files
committed
feat(reorder): add enbale_reorder as search parameter
Signed-off-by: LHT129 <tianlan.lht@antgroup.com>
1 parent dc85bb1 commit d9976f2

12 files changed

Lines changed: 161 additions & 21 deletions

src/algorithm/hgraph.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2106,7 +2106,7 @@ HGraph::SearchWithRequest(const SearchRequest& request) const {
21062106

21072107
this->pool_->ReturnOne(vt);
21082108

2109-
if (use_reorder_) {
2109+
if (use_reorder_ and params.enable_reorder) {
21102110
this->reorder(raw_query, this->high_precise_codes_, search_result, k);
21112111
}
21122112

src/algorithm/hgraph_parameter.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@ class HGraphSearchParameters : public IndexSearchParameter {
7575

7676
public:
7777
int64_t ef_search{30};
78-
float topk_factor{0.0F};
79-
bool use_reorder{false};
78+
8079
bool use_extra_info_filter{false};
8180

8281
private:

src/algorithm/index_search_parameter.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ class IndexSearchParameter {
3737
timeout_ms = json[SEARCH_MAX_TIME_COST_MS].GetInt();
3838
enable_time_record = true;
3939
}
40+
41+
if (json.Contains(SEARCH_PARAM_FACTOR)) {
42+
topk_factor = json[SEARCH_PARAM_FACTOR].GetFloat();
43+
}
44+
45+
if (json.Contains(SEARCH_PARAM_ENABLE_REORDER)) {
46+
enable_reorder = json[SEARCH_PARAM_ENABLE_REORDER].GetBool();
47+
}
4048
}
4149

4250
public:
@@ -45,5 +53,9 @@ class IndexSearchParameter {
4553
// for timeout
4654
double timeout_ms{std::numeric_limits<double>::max()};
4755
bool enable_time_record{false};
56+
57+
// for reorder
58+
bool enable_reorder{true};
59+
float topk_factor{0.0F};
4860
};
4961
} // namespace vsag

src/algorithm/ivf.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ IVF::create_search_param(const std::string& parameters, const FilterPtr& filter)
692692
param.factor = search_param.topk_factor;
693693
param.first_order_scan_ratio = search_param.first_order_scan_ratio;
694694
param.parallel_search_thread_count = search_param.parallel_search_thread_count;
695+
param.enable_reorder = search_param.enable_reorder;
695696
if (search_param.enable_time_record) {
696697
param.time_cost = std::make_shared<Timer>();
697698
param.time_cost->SetThreshold(search_param.timeout_ms);
@@ -933,7 +934,7 @@ IVF::SearchWithRequest(const SearchRequest& request) const {
933934
auto param = this->create_search_param(request.params_str_, request.filter_);
934935
param.search_mode = KNN_SEARCH;
935936
param.topk = request.topk_;
936-
if (use_reorder_) {
937+
if (use_reorder_ and param.enable_reorder) {
937938
param.topk = static_cast<int64_t>(param.factor * static_cast<float>(request.topk_));
938939
}
939940
auto query = request.query_;
@@ -949,7 +950,7 @@ IVF::SearchWithRequest(const SearchRequest& request) const {
949950
}
950951
Statistics stats;
951952
auto search_result = this->search<KNN_SEARCH>(query, param, stats);
952-
if (use_reorder_) {
953+
if (use_reorder_ and param.enable_reorder) {
953954
return reorder(request.topk_, search_result, query->GetFloat32Vectors(), param, stats);
954955
}
955956
auto count = static_cast<const int64_t>(search_result->Size());

src/algorithm/ivf_parameter.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,6 @@ IVFSearchParameters::FromJson(const std::string& json_string) {
108108
IVF_SEARCH_PARAM_SCAN_BUCKETS_COUNT));
109109
obj.scan_buckets_count = params[INDEX_TYPE_IVF][IVF_SEARCH_PARAM_SCAN_BUCKETS_COUNT].GetInt();
110110

111-
// set obj.topk_factor
112-
if (params[INDEX_TYPE_IVF].Contains(SEARCH_PARAM_FACTOR)) {
113-
obj.topk_factor = params[INDEX_TYPE_IVF][SEARCH_PARAM_FACTOR].GetFloat();
114-
}
115-
116111
// set obj.first_order_scan_ratio
117112
if (params[INDEX_TYPE_IVF].Contains(GNO_IMI_SEARCH_PARAM_FIRST_ORDER_SCAN_RATIO)) {
118113
obj.first_order_scan_ratio =

src/algorithm/ivf_parameter.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ class IVFSearchParameters : public IndexSearchParameter {
5656

5757
public:
5858
int64_t scan_buckets_count{30};
59-
float topk_factor{2.0F};
6059
float first_order_scan_ratio{1.0F};
6160

6261
private:

src/impl/inner_search_param.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,22 @@ class InnerSearchParam {
3636

3737
public:
3838
int64_t topk{0};
39+
float factor{2.0F};
3940
float radius{0.0F};
40-
InnerIdType ep{0};
41-
uint64_t ef{10};
42-
FilterPtr is_inner_id_allowed{nullptr};
43-
float skip_ratio{0.8F};
41+
bool enable_reorder{true};
4442
InnerSearchMode search_mode{KNN_SEARCH};
4543
int range_search_limit_size{-1};
44+
4645
int64_t parallel_search_thread_count{1};
46+
FilterPtr is_inner_id_allowed{nullptr};
47+
48+
// for HGraph
49+
InnerIdType ep{0};
50+
uint64_t ef{10};
51+
float skip_ratio{0.8F};
4752

48-
// for ivf
53+
// for IVF
4954
int scan_bucket_size{1};
50-
float factor{2.0F};
5155
float first_order_scan_ratio{1.0F};
5256
Allocator* search_alloc{nullptr};
5357
std::vector<ExecutorPtr> executors;

src/inner_string_params.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ const char* const SPARSE_CODES = "sparse";
171171

172172
const char* const IVF_SEARCH_PARAM_SCAN_BUCKETS_COUNT = "scan_buckets_count";
173173
const char* const SEARCH_PARAM_FACTOR = "factor";
174+
const char* const SEARCH_PARAM_ENABLE_REORDER = "enable_reorder";
174175
const char* const SEARCH_PARALLELISM = "parallelism";
175176
const char* const SEARCH_MAX_TIME_COST_MS = "timeout_ms";
176177
const char* const SPARSE_N_CANDIDATE = "n_candidate";

tests/test_hgraph.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2125,6 +2125,61 @@ TEST_CASE("(Daily) HGraph Search Over Time", "[ft][hgraph][daily]") {
21252125
TestHGraphSearchOverTime(test_index, resource);
21262126
}
21272127

2128+
static void
2129+
TestHGraphSearchDisableReorder(const fixtures::HGraphTestIndexPtr& test_index,
2130+
const fixtures::HGraphResourcePtr& resource) {
2131+
using namespace fixtures;
2132+
auto origin_size = vsag::Options::Instance().block_size_limit();
2133+
auto size = GENERATE(1024 * 1024 * 2);
2134+
constexpr const char* search_param_tmp = R"({{
2135+
"hgraph": {{
2136+
"ef_search": 200,
2137+
"enable_reorder": {}
2138+
}}
2139+
}})";
2140+
2141+
for (auto metric_type : resource->metric_types) {
2142+
for (auto dim : resource->dims) {
2143+
auto base_quantization_str = "sq4_uniform,fp32";
2144+
float recall_with_reorder = 0.95;
2145+
float recall_without_reorder = 0.75;
2146+
INFO(
2147+
fmt::format("metric_type: {}, dim: {}, base_quantization_str: {}, "
2148+
"recall_with_reorder: {}, recall_without_reorder: {}",
2149+
metric_type,
2150+
dim,
2151+
base_quantization_str,
2152+
recall_with_reorder,
2153+
recall_without_reorder));
2154+
vsag::Options::Instance().set_block_size_limit(size);
2155+
HGraphTestIndex::HGraphBuildParam build_param(metric_type, dim, base_quantization_str);
2156+
auto param = HGraphTestIndex::GenerateHGraphBuildParametersString(build_param);
2157+
auto index = TestIndex::TestFactory(test_index->name, param, true);
2158+
auto dataset =
2159+
HGraphTestIndex::pool.GetDatasetAndCreate(dim, resource->base_count, metric_type);
2160+
TestIndex::TestBuildIndex(index, dataset, true);
2161+
auto recall_result_with_reorder = TestIndex::TestKnnSearch(
2162+
index, dataset, fmt::format(search_param_tmp, true), recall_with_reorder, true);
2163+
auto recall_result_without_reorder = TestIndex::TestKnnSearch(
2164+
index, dataset, fmt::format(search_param_tmp, false), recall_without_reorder, true);
2165+
REQUIRE(recall_result_with_reorder > recall_result_without_reorder);
2166+
vsag::Options::Instance().set_block_size_limit(origin_size);
2167+
}
2168+
}
2169+
}
2170+
2171+
TEST_CASE("(PR) HGraph Search Disable Reorder", "[ft][hgraph][pr]") {
2172+
auto test_index = std::make_shared<fixtures::HGraphTestIndex>();
2173+
auto resource = test_index->GetResource(true);
2174+
TestHGraphSearchDisableReorder(test_index, resource);
2175+
}
2176+
2177+
TEST_CASE("(Daily) HGraph Search Disable Reorder", "[ft][hgraph][daily]") {
2178+
auto test_index = std::make_shared<fixtures::HGraphTestIndex>();
2179+
auto resource = test_index->GetResource(false);
2180+
TestHGraphSearchDisableReorder(test_index, resource);
2181+
}
2182+
21282183
static void
21292184
TestHGraphDiskIOType(const fixtures::HGraphTestIndexPtr& test_index,
21302185
const fixtures::HGraphResourcePtr& resource) {

tests/test_index.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,14 +435,14 @@ TestIndex::TestKnnSearchCompare(const IndexPtr& index_weak,
435435
}
436436
}
437437

438-
void
438+
float
439439
TestIndex::TestKnnSearch(const IndexPtr& index,
440440
const TestDatasetPtr& dataset,
441441
const std::string& search_param,
442442
float expected_recall,
443443
bool expected_success) {
444444
if (not index->CheckFeature(vsag::SUPPORT_KNN_SEARCH)) {
445-
return;
445+
return 0.0F;
446446
}
447447
auto queries = dataset->query_;
448448
auto query_count = queries->GetNumElements();
@@ -458,7 +458,7 @@ TestIndex::TestKnnSearch(const IndexPtr& index,
458458
if (res.has_value()) {
459459
REQUIRE(res.value()->GetDim() == 0);
460460
}
461-
return;
461+
return 0.0F;
462462
} else {
463463
REQUIRE(res.has_value() == true);
464464
}
@@ -474,6 +474,7 @@ TestIndex::TestKnnSearch(const IndexPtr& index,
474474
expected_recall * query_count));
475475
}
476476
REQUIRE(cur_recall > expected_recall * query_count * RECALL_THRESHOLD);
477+
return cur_recall / static_cast<float>(query_count);
477478
}
478479

479480
void

0 commit comments

Comments
 (0)