Skip to content

crash when Tune index to rabitq #1760

@shadowao

Description

@shadowao

Describe the bug
crash backtrace

Using host libthread_db library "/lib64/libthread_db.so.1".
Core was generated by `./tests/functests [ft][hnsw][bq_tune]'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x0000154a1dd39a57 in vsag::HGraph::GetVectorByInnerId (this=0xfbd280, inner_id=0, data=0x108ecb0) at /data/vsag/dev/vsag/src/algorithm/hgraph.cpp:2021
2021        const auto* buffer = codes->GetCodesById(inner_id, release);
(gdb) bt
#0  0x0000154a1dd39a57 in vsag::HGraph::GetVectorByInnerId (this=0xfbd280, inner_id=0, data=0x108ecb0) at /data/vsag/dev/vsag/src/algorithm/hgraph.cpp:2021
#1  0x0000154a1dd2ddec in vsag::HGraph::Tune (this=0xfbd280, parameters=..., disable_future_tuning=true) at /data/vsag/dev/vsag/src/algorithm/hgraph.cpp:585
#2  0x0000154a1d4771a1 in vsag::IndexImpl<vsag::HGraph>::Tune (this=0xf9cc20, parameters=..., disable_future_tuning=true) at /data/vsag/dev/vsag/src/index/index_impl.h:94
#3  0x00000000005b229a in (anonymous namespace)::CATCH2_INTERNAL_TEST_110::test (this=0xfb1670) at /data/vsag/dev/vsag/tests/test_hnsw.cpp:985
#4  0x00000000005bf400 in Catch::TestInvokerFixture<(anonymous namespace)::CATCH2_INTERNAL_TEST_110>::invoke (this=0xfa0e20)
    at /data/vsag/dev/vsag/build/_deps/catch2-src/src/catch2/../catch2/internal/catch_test_registry.hpp:70
#5  0x0000154a1fee96a5 in Catch::TestCaseHandle::invoke (this=0xfbc010) at src/catch2/../catch2/catch_test_case_info.hpp:124
#6  0x0000154a1fee863d in Catch::RunContext::invokeActiveTestCase (this=0x7ffffba0d870) at src/catch2/internal/catch_run_context.cpp:580
#7  0x0000154a1fee839a in Catch::RunContext::runCurrentTest (this=0x7ffffba0d870) at src/catch2/internal/catch_run_context.cpp:543
#8  0x0000154a1fee6782 in Catch::RunContext::runTest (this=0x7ffffba0d870, testCase=...) at src/catch2/internal/catch_run_context.cpp:240
#9  0x0000154a1fe968da in Catch::(anonymous namespace)::TestGroup::execute (this=0x7ffffba0d860) at src/catch2/catch_session.cpp:117
#10 0x0000154a1fe97b97 in Catch::Session::runInternal (this=0x7ffffba0db80) at src/catch2/catch_session.cpp:337
#11 0x0000154a1fe97741 in Catch::Session::run (this=0x7ffffba0db80) at src/catch2/catch_session.cpp:269
#12 0x000000000067e98d in Catch::Session::run<char> (this=0x7ffffba0db80, argc=2, argv=0x7ffffba0de18) at /data/vsag/dev/vsag/build/_deps/catch2-src/src/catch2/../catch2/catch_session.hpp:41
#13 0x000000000067e368 in main (argc=2, argv=0x7ffffba0de18) at /data/vsag/dev/vsag/tests/test_main.cpp:62

To Reproduce
Codes to reproduce the behavior:

TEST_CASE_PERSISTENT_FIXTURE(fixtures::HNSWTestIndex,
    "Test DeserializeV2",
    "[ft][hnsw][bq_tune]") {
  auto origin_size = vsag::Options::Instance().block_size_limit();
  auto size = 1024 * 1024 * 2;
  vsag::Options::Instance().set_block_size_limit(size);
  int dim = 128;
  int max_degree = 32;  // hgraph uses 2x, so 16*2
  int ef_search = 200;
  int ef_construction = 100;
  int base_id = 30000;
  std::mt19937 rng;
  rng.seed(47);
  std::uniform_real_distribution<> distrib_real;

  const char* hgraph_params = "{\"dim\":128,\"dtype\":\"float32\",\"metric_type\":\"l2\","
      "\"use_old_serial_format\":true,\"index_param\":{\"ef_construction\":100,\"max_degree\":32,"
      "\"store_raw_vector\":true,\"base_quantization_type\":\"fp32\",\"build_thread_count\":0}}";
  const char* bq_tune_params = "{\"dim\":128,\"dtype\":\"float32\",\"metric_type\":\"l2\","
      "\"use_old_serial_format\":true,\"index_param\":{\"ef_construction\":100,\"max_degree\":32,"
      "\"base_quantization_type\":\"fp32\",\"build_thread_count\":0,\"use_reorder\":true,\"ignore_reorder\":true,"
      "\"precise_quantization_type\":\"fp32\",\"precise_io_type\":\"block_memory_io\"}}";
  const char* search_params = "{\"hgraph\":{\"ef_search\":200,\"skip_ratio\":0.7,\"use_extra_info_filter\":false}}";

  // 1. Create and build two HGRAPH indices via vsag::Factory
  auto base_idx = vsag::Factory::CreateIndex("hgraph", hgraph_params);
  REQUIRE(base_idx.has_value());
  int base_num = 200;
  auto base_ids = new int64_t[base_num];
  auto base_vectors = new float[dim * base_num];
  for (int64_t i = 0; i < base_num; ++i) base_ids[i] = base_id + i;
  for (int64_t i = 0; i < dim * base_num; ++i) base_vectors[i] = distrib_real(rng);
  auto base_dataset = vsag::Dataset::Make();
  base_dataset->Dim(dim)->NumElements(base_num)->Ids(base_ids)->Float32Vectors(base_vectors)->Owner(false);
  auto build_ret = base_idx.value()->Build(base_dataset);
  REQUIRE(build_ret.has_value());

  // 2. Tune both to HNSW_SQ via vsag::Index::Tune
  auto tune_base = base_idx.value()->Tune(bq_tune_params, true);
  REQUIRE(tune_base.has_value());
  REQUIRE(tune_base.value());

  auto base_range = base_idx.value()->GetMinAndMaxId();
  std::cout << "base_range: " << base_range.value().first << ", " << base_range.value().second << std::endl;


  int64_t query_id = base_ids[0];
  auto query_dataset = vsag::Dataset::Make();
  query_dataset->Dim(dim)->NumElements(1)->Ids(&query_id)->Float32Vectors(base_vectors)->Owner(false);
  std::string search_params_str(search_params);
  vsag::SearchParam search_param(false, search_params_str, nullptr, nullptr);
  auto search_ret = base_idx.value()->KnnSearch(query_dataset, 5, search_param);
  REQUIRE(search_ret.has_value());
  REQUIRE(search_ret.value()->GetDim() > 0);

  delete[] base_ids;
  delete[] base_vectors;
}

Environment
Please run bash scripts/check_environment.sh and paste the output here:

  • OS: Linux
  • vsag version: v0.18.3
  • compiler version: GCC 9
  • interface: cpp

Expected behavior
A clear and concise description of what you expected to happen.

Screenshots
If applicable, add screenshots to help explain your problem.

Additional context
Add any other context about the problem here.

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions