From 2af16f1f253f5272412b02701282bdbdd43e7279 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 2 Apr 2026 14:01:43 -0700 Subject: [PATCH 01/32] add adaptive batch size heuristic for filtered search --- bindings/cpp/src/dynamic_vamana_index_impl.h | 21 ++++++++- bindings/cpp/tests/runtime_test.cpp | 49 ++++++++++++++++++++ bindings/cpp/tests/utils.h | 13 ++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 4b16cf4bc..c7bd75041 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -38,6 +38,20 @@ namespace svs { namespace runtime { +// Compute the next batch size based on observed filter hit rate. +// On the first round (found == 0), returns initial_batch_size unchanged. +// On subsequent rounds, estimates how many candidates are needed to find the +// remaining results given the observed hit rate. +inline size_t compute_filtered_batch_size( + size_t found, size_t needed, size_t total_checked, size_t initial_batch_size +) { + if (found == 0 || found >= needed) { + return initial_batch_size; + } + double hit_rate = static_cast(found) / total_checked; + return static_cast((needed - found) / hit_rate); +} + // Dynamic Vamana index implementation class DynamicVamanaIndexImpl { using allocator_type = svs::data::Blocked>; @@ -125,9 +139,12 @@ class DynamicVamanaIndexImpl { auto query = queries.get_datum(i); auto iterator = impl_->batch_iterator(query); size_t found = 0; + size_t total_checked = 0; + auto batch_size = sp.buffer_config_.get_search_window_size(); do { - iterator.next(k); + iterator.next(batch_size); for (auto& neighbor : iterator.results()) { + total_checked++; if (filter->is_member(neighbor.id())) { result.set(neighbor, i, found); found++; @@ -136,6 +153,8 @@ class DynamicVamanaIndexImpl { } } } + batch_size = + compute_filtered_batch_size(found, k, total_checked, batch_size); } while (found < k && !iterator.done()); // Pad results if not enough neighbors found diff --git a/bindings/cpp/tests/runtime_test.cpp b/bindings/cpp/tests/runtime_test.cpp index 201375d3c..2f296790c 100644 --- a/bindings/cpp/tests/runtime_test.cpp +++ b/bindings/cpp/tests/runtime_test.cpp @@ -501,6 +501,55 @@ CATCH_TEST_CASE("SearchWithIDFilter", "[runtime]") { svs::runtime::v0::DynamicVamanaIndex::destroy(index); } +CATCH_TEST_CASE("SearchWithRestrictiveFilter", "[runtime][filtered_search]") { + const auto& test_data = get_test_data(); + // Build index + svs::runtime::v0::DynamicVamanaIndex* index = nullptr; + svs::runtime::v0::VamanaIndex::BuildParams build_params{64}; + svs::runtime::v0::Status status = svs::runtime::v0::DynamicVamanaIndex::build( + &index, + test_d, + svs::runtime::v0::MetricType::L2, + svs::runtime::v0::StorageKind::FP32, + build_params + ); + CATCH_REQUIRE(status.ok()); + CATCH_REQUIRE(index != nullptr); + + // Add data + std::vector labels(test_n); + std::iota(labels.begin(), labels.end(), 0); + status = index->add(test_n, labels.data(), test_data.data()); + CATCH_REQUIRE(status.ok()); + + const int nq = 5; + const float* xq = test_data.data(); + const int k = 5; + + // 10% selectivity: accept every 10th ID + std::unordered_set valid_ids; + for (size_t i = 0; i < test_n; i += 10) { + valid_ids.insert(i); + } + test_utils::IDFilterSet filter(valid_ids); + + std::vector distances(nq * k); + std::vector result_labels(nq * k); + + status = + index->search(nq, xq, k, distances.data(), result_labels.data(), nullptr, &filter); + CATCH_REQUIRE(status.ok()); + + // All returned labels must be in the valid set + for (int i = 0; i < nq * k; ++i) { + if (svs::runtime::v0::is_specified(result_labels[i])) { + CATCH_REQUIRE(valid_ids.contains(result_labels[i])); + } + } + + svs::runtime::v0::DynamicVamanaIndex::destroy(index); +} + CATCH_TEST_CASE("RangeSearchFunctional", "[runtime]") { const auto& test_data = get_test_data(); // Build index diff --git a/bindings/cpp/tests/utils.h b/bindings/cpp/tests/utils.h index 8d1bc89f6..e2174b938 100644 --- a/bindings/cpp/tests/utils.h +++ b/bindings/cpp/tests/utils.h @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace svs_test { @@ -73,6 +74,18 @@ class IDFilterRange : public svs::runtime::v0::IDFilter { bool is_member(size_t id) const override { return id >= min_id_ && id < max_id_; } }; +// ID filter that accepts only IDs in a given set +class IDFilterSet : public svs::runtime::v0::IDFilter { + private: + std::unordered_set valid_ids_; + + public: + IDFilterSet(std::unordered_set ids) + : valid_ids_(std::move(ids)) {} + + bool is_member(size_t id) const override { return valid_ids_.contains(id); } +}; + // Custom results allocator for testing class TestResultsAllocator : public svs::runtime::v0::ResultsAllocator { private: From 605a0bef2b9ec8de29b0b720a365d38be9775c66 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 2 Apr 2026 14:16:16 -0700 Subject: [PATCH 02/32] use IDFilterRange instead of IDFilterSet in test --- bindings/cpp/tests/runtime_test.cpp | 15 +++++++-------- bindings/cpp/tests/utils.h | 13 ------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/bindings/cpp/tests/runtime_test.cpp b/bindings/cpp/tests/runtime_test.cpp index 2f296790c..92b819894 100644 --- a/bindings/cpp/tests/runtime_test.cpp +++ b/bindings/cpp/tests/runtime_test.cpp @@ -526,12 +526,10 @@ CATCH_TEST_CASE("SearchWithRestrictiveFilter", "[runtime][filtered_search]") { const float* xq = test_data.data(); const int k = 5; - // 10% selectivity: accept every 10th ID - std::unordered_set valid_ids; - for (size_t i = 0; i < test_n; i += 10) { - valid_ids.insert(i); - } - test_utils::IDFilterSet filter(valid_ids); + // 10% selectivity: accept only IDs 0-9 out of 100 + size_t min_id = 0; + size_t max_id = test_n / 10; + test_utils::IDFilterRange filter(min_id, max_id); std::vector distances(nq * k); std::vector result_labels(nq * k); @@ -540,10 +538,11 @@ CATCH_TEST_CASE("SearchWithRestrictiveFilter", "[runtime][filtered_search]") { index->search(nq, xq, k, distances.data(), result_labels.data(), nullptr, &filter); CATCH_REQUIRE(status.ok()); - // All returned labels must be in the valid set + // All returned labels must fall inside the filter range for (int i = 0; i < nq * k; ++i) { if (svs::runtime::v0::is_specified(result_labels[i])) { - CATCH_REQUIRE(valid_ids.contains(result_labels[i])); + CATCH_REQUIRE(result_labels[i] >= min_id); + CATCH_REQUIRE(result_labels[i] < max_id); } } diff --git a/bindings/cpp/tests/utils.h b/bindings/cpp/tests/utils.h index e2174b938..8d1bc89f6 100644 --- a/bindings/cpp/tests/utils.h +++ b/bindings/cpp/tests/utils.h @@ -22,7 +22,6 @@ #include #include #include -#include #include namespace svs_test { @@ -74,18 +73,6 @@ class IDFilterRange : public svs::runtime::v0::IDFilter { bool is_member(size_t id) const override { return id >= min_id_ && id < max_id_; } }; -// ID filter that accepts only IDs in a given set -class IDFilterSet : public svs::runtime::v0::IDFilter { - private: - std::unordered_set valid_ids_; - - public: - IDFilterSet(std::unordered_set ids) - : valid_ids_(std::move(ids)) {} - - bool is_member(size_t id) const override { return valid_ids_.contains(id); } -}; - // Custom results allocator for testing class TestResultsAllocator : public svs::runtime::v0::ResultsAllocator { private: From 309d0add26f4709a8e811ffd704854ae11588a6f Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Fri, 3 Apr 2026 17:12:36 -0700 Subject: [PATCH 03/32] address PR review: refactor and optimize adaptive batch size - Rename compute_filtered_batch_size to predict_further_processing and move to svs_runtime_utils.h for reuse - Use float arithmetic instead of double for hit rate calculation - Compute batch size at loop start to avoid unnecessary computation - Use iterator.size() instead of per-element increment for total_checked - Initial batch size = max(k, search_window_size) - Apply adaptive batch size to vamana_index_impl.h filtered search --- bindings/cpp/src/dynamic_vamana_index_impl.h | 22 ++++---------------- bindings/cpp/src/svs_runtime_utils.h | 14 +++++++++++++ bindings/cpp/src/vamana_index_impl.h | 7 ++++++- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index c7bd75041..fe4c0b49b 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -38,20 +38,6 @@ namespace svs { namespace runtime { -// Compute the next batch size based on observed filter hit rate. -// On the first round (found == 0), returns initial_batch_size unchanged. -// On subsequent rounds, estimates how many candidates are needed to find the -// remaining results given the observed hit rate. -inline size_t compute_filtered_batch_size( - size_t found, size_t needed, size_t total_checked, size_t initial_batch_size -) { - if (found == 0 || found >= needed) { - return initial_batch_size; - } - double hit_rate = static_cast(found) / total_checked; - return static_cast((needed - found) / hit_rate); -} - // Dynamic Vamana index implementation class DynamicVamanaIndexImpl { using allocator_type = svs::data::Blocked>; @@ -140,11 +126,13 @@ class DynamicVamanaIndexImpl { auto iterator = impl_->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - auto batch_size = sp.buffer_config_.get_search_window_size(); + auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); do { + batch_size = + predict_further_processing(total_checked, found, k, batch_size); iterator.next(batch_size); + total_checked += iterator.size(); for (auto& neighbor : iterator.results()) { - total_checked++; if (filter->is_member(neighbor.id())) { result.set(neighbor, i, found); found++; @@ -153,8 +141,6 @@ class DynamicVamanaIndexImpl { } } } - batch_size = - compute_filtered_batch_size(found, k, total_checked, batch_size); } while (found < k && !iterator.done()); // Pad results if not enough neighbors found diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index e0d7c68af..b5fd12756 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -431,6 +431,20 @@ auto dispatch_storage_kind(StorageKind kind, F&& f, Args&&... args) { } } // namespace storage +// Predict how many more items need to be processed to reach the goal, +// based on the observed hit rate so far. +// If no hits yet, returns `hint` unchanged. +// The caller should cap the result to a max batch size if needed. +inline size_t predict_further_processing( + size_t processed, size_t hits, size_t goal, size_t hint +) { + if (hits == 0 || hits >= goal) { + return hint; + } + float batch_size = static_cast(goal - hits) * processed / hits; + return std::max(static_cast(batch_size), size_t{1}); +} + inline svs::threads::ThreadPoolHandle default_threadpool() { return svs::threads::ThreadPoolHandle(svs::threads::OMPThreadPool(omp_get_max_threads()) ); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 4cf58d7e0..d5a731017 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -131,8 +131,13 @@ class VamanaIndexImpl { auto query = queries.get_datum(i); auto iterator = get_impl()->batch_iterator(query); size_t found = 0; + size_t total_checked = 0; + auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); do { - iterator.next(k); + batch_size = + predict_further_processing(total_checked, found, k, batch_size); + iterator.next(batch_size); + total_checked += iterator.size(); for (auto& neighbor : iterator.results()) { if (filter->is_member(neighbor.id())) { result.set(neighbor, i, found); From 62d9bdff2144be154e3488ba35fd64df5ffff670 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Fri, 3 Apr 2026 22:40:09 -0700 Subject: [PATCH 04/32] add batch size cap and comments to adaptive filtered search - Cap batch size with std::min instead of modulo to avoid SIGFPE - Add comments explaining adaptive batch sizing logic --- bindings/cpp/src/dynamic_vamana_index_impl.h | 12 ++++++++++-- bindings/cpp/src/vamana_index_impl.h | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index fe4c0b49b..f74978591 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -126,10 +126,18 @@ class DynamicVamanaIndexImpl { auto iterator = impl_->batch_iterator(query); size_t found = 0; size_t total_checked = 0; + // Use adaptive batch sizing: start with at least k candidates, + // then adjust based on observed filter hit rate. auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); + const auto max_batch_size = batch_size; do { - batch_size = - predict_further_processing(total_checked, found, k, batch_size); + // Estimate how many candidates we need to find remaining + // results given the observed hit rate so far. + batch_size = predict_further_processing( + total_checked, found, k, batch_size + ); + // Cap to avoid oversized batches in the iterator. + batch_size = std::min(batch_size, max_batch_size); iterator.next(batch_size); total_checked += iterator.size(); for (auto& neighbor : iterator.results()) { diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index d5a731017..65cee325f 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -132,10 +132,18 @@ class VamanaIndexImpl { auto iterator = get_impl()->batch_iterator(query); size_t found = 0; size_t total_checked = 0; + // Use adaptive batch sizing: start with at least k candidates, + // then adjust based on observed filter hit rate. auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); + const auto max_batch_size = batch_size; do { - batch_size = - predict_further_processing(total_checked, found, k, batch_size); + // Estimate how many candidates we need to find remaining + // results given the observed hit rate so far. + batch_size = predict_further_processing( + total_checked, found, k, batch_size + ); + // Cap to avoid oversized batches in the iterator. + batch_size = std::min(batch_size, max_batch_size); iterator.next(batch_size); total_checked += iterator.size(); for (auto& neighbor : iterator.results()) { From ee06f00e480ad68281a806e3916dea25ecd5bf99 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 6 Apr 2026 13:17:34 -0700 Subject: [PATCH 05/32] apply clang-format to adaptive batch size code --- bindings/cpp/src/dynamic_vamana_index_impl.h | 5 ++--- bindings/cpp/src/svs_runtime_utils.h | 5 ++--- bindings/cpp/src/vamana_index_impl.h | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index f74978591..b27958703 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -133,9 +133,8 @@ class DynamicVamanaIndexImpl { do { // Estimate how many candidates we need to find remaining // results given the observed hit rate so far. - batch_size = predict_further_processing( - total_checked, found, k, batch_size - ); + batch_size = + predict_further_processing(total_checked, found, k, batch_size); // Cap to avoid oversized batches in the iterator. batch_size = std::min(batch_size, max_batch_size); iterator.next(batch_size); diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index b5fd12756..6caa1a325 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -435,9 +435,8 @@ auto dispatch_storage_kind(StorageKind kind, F&& f, Args&&... args) { // based on the observed hit rate so far. // If no hits yet, returns `hint` unchanged. // The caller should cap the result to a max batch size if needed. -inline size_t predict_further_processing( - size_t processed, size_t hits, size_t goal, size_t hint -) { +inline size_t +predict_further_processing(size_t processed, size_t hits, size_t goal, size_t hint) { if (hits == 0 || hits >= goal) { return hint; } diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 65cee325f..2fd1f1452 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -139,9 +139,8 @@ class VamanaIndexImpl { do { // Estimate how many candidates we need to find remaining // results given the observed hit rate so far. - batch_size = predict_further_processing( - total_checked, found, k, batch_size - ); + batch_size = + predict_further_processing(total_checked, found, k, batch_size); // Cap to avoid oversized batches in the iterator. batch_size = std::min(batch_size, max_batch_size); iterator.next(batch_size); From c92d331cd43abb61727c0dc37a3a15e77c256bc4 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Fri, 10 Apr 2026 15:22:20 -0700 Subject: [PATCH 06/32] remove batch size cap, add filter_stop early exit heuristic - Remove max_batch_size cap that limited adaptive sizing effectiveness - Add filter_stop param to SearchParams (default 0 = never give up) - Add should_stop_filtered_search() helper in svs_runtime_utils.h - If hit rate falls below filter_stop after first round, return empty so caller can fall back to exact search --- bindings/cpp/include/svs/runtime/vamana_index.h | 5 +++++ bindings/cpp/src/dynamic_vamana_index_impl.h | 12 +++++------- bindings/cpp/src/svs_runtime_utils.h | 13 +++++++++++++ bindings/cpp/src/vamana_index_impl.h | 12 +++++------- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/bindings/cpp/include/svs/runtime/vamana_index.h b/bindings/cpp/include/svs/runtime/vamana_index.h index 988319528..5286551da 100644 --- a/bindings/cpp/include/svs/runtime/vamana_index.h +++ b/bindings/cpp/include/svs/runtime/vamana_index.h @@ -40,6 +40,11 @@ struct VamanaSearchParameters { size_t search_buffer_capacity = Unspecify(); size_t prefetch_lookahead = Unspecify(); size_t prefetch_step = Unspecify(); + // Minimum filter hit rate to continue filtered search. + // If the hit rate after the first round falls below this threshold, + // stop and return empty results (caller can fall back to exact search). + // Default 0 means never give up. + float filter_stop = 0.0f; }; } // namespace detail diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index b27958703..6c28ee958 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -118,6 +118,7 @@ class DynamicVamanaIndexImpl { // Selective search with IDSelector auto old_sp = impl_->get_search_parameters(); impl_->set_search_parameters(sp); + const float filter_stop = params ? params->filter_stop : 0.0f; auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { @@ -126,17 +127,10 @@ class DynamicVamanaIndexImpl { auto iterator = impl_->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - // Use adaptive batch sizing: start with at least k candidates, - // then adjust based on observed filter hit rate. auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); - const auto max_batch_size = batch_size; do { - // Estimate how many candidates we need to find remaining - // results given the observed hit rate so far. batch_size = predict_further_processing(total_checked, found, k, batch_size); - // Cap to avoid oversized batches in the iterator. - batch_size = std::min(batch_size, max_batch_size); iterator.next(batch_size); total_checked += iterator.size(); for (auto& neighbor : iterator.results()) { @@ -148,6 +142,10 @@ class DynamicVamanaIndexImpl { } } } + if (should_stop_filtered_search(total_checked, found, filter_stop)) { + found = 0; + break; + } } while (found < k && !iterator.done()); // Pad results if not enough neighbors found diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 6caa1a325..82527383b 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -444,6 +444,19 @@ predict_further_processing(size_t processed, size_t hits, size_t goal, size_t hi return std::max(static_cast(batch_size), size_t{1}); } +// Check if the filtered search should stop early based on the observed hit rate. +// Returns true if the hit rate is below the threshold, meaning the caller should +// give up and let the caller fall back to exact search. +inline bool should_stop_filtered_search( + size_t total_checked, size_t found, float filter_stop +) { + if (filter_stop <= 0 || total_checked == 0 || found == 0) { + return false; + } + float hit_rate = static_cast(found) / total_checked; + return hit_rate < filter_stop; +} + inline svs::threads::ThreadPoolHandle default_threadpool() { return svs::threads::ThreadPoolHandle(svs::threads::OMPThreadPool(omp_get_max_threads()) ); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 2fd1f1452..81cd8ae31 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -124,6 +124,7 @@ class VamanaIndexImpl { get_impl()->set_search_parameters(old_sp); }); get_impl()->set_search_parameters(sp); + const float filter_stop = params ? params->filter_stop : 0.0f; auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { @@ -132,17 +133,10 @@ class VamanaIndexImpl { auto iterator = get_impl()->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - // Use adaptive batch sizing: start with at least k candidates, - // then adjust based on observed filter hit rate. auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); - const auto max_batch_size = batch_size; do { - // Estimate how many candidates we need to find remaining - // results given the observed hit rate so far. batch_size = predict_further_processing(total_checked, found, k, batch_size); - // Cap to avoid oversized batches in the iterator. - batch_size = std::min(batch_size, max_batch_size); iterator.next(batch_size); total_checked += iterator.size(); for (auto& neighbor : iterator.results()) { @@ -154,6 +148,10 @@ class VamanaIndexImpl { } } } + if (should_stop_filtered_search(total_checked, found, filter_stop)) { + found = 0; + break; + } } while (found < k && !iterator.done()); // Pad results if not enough neighbors found From 6a10c8e4dd96fde599c8e76aaac105e2f8043d8b Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Fri, 10 Apr 2026 15:27:15 -0700 Subject: [PATCH 07/32] apply clang-format --- bindings/cpp/src/svs_runtime_utils.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 82527383b..1ab1d4d07 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -447,9 +447,8 @@ predict_further_processing(size_t processed, size_t hits, size_t goal, size_t hi // Check if the filtered search should stop early based on the observed hit rate. // Returns true if the hit rate is below the threshold, meaning the caller should // give up and let the caller fall back to exact search. -inline bool should_stop_filtered_search( - size_t total_checked, size_t found, float filter_stop -) { +inline bool +should_stop_filtered_search(size_t total_checked, size_t found, float filter_stop) { if (filter_stop <= 0 || total_checked == 0 || found == 0) { return false; } From 90751ca482c69b0e88f2923a60a3149c877f3d20 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Fri, 10 Apr 2026 16:27:36 -0700 Subject: [PATCH 08/32] add test for filter_stop early exit heuristic Verifies that search with filter_stop=0.5 gives up and returns unspecified results when hit rate (~10%) is below threshold. --- bindings/cpp/tests/runtime_test.cpp | 68 +++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/bindings/cpp/tests/runtime_test.cpp b/bindings/cpp/tests/runtime_test.cpp index 92b819894..abd142962 100644 --- a/bindings/cpp/tests/runtime_test.cpp +++ b/bindings/cpp/tests/runtime_test.cpp @@ -549,6 +549,74 @@ CATCH_TEST_CASE("SearchWithRestrictiveFilter", "[runtime][filtered_search]") { svs::runtime::v0::DynamicVamanaIndex::destroy(index); } +CATCH_TEST_CASE("FilterStopEarlyExit", "[runtime][filtered_search]") { + const auto& test_data = get_test_data(); + // Build index + svs::runtime::v0::DynamicVamanaIndex* index = nullptr; + svs::runtime::v0::VamanaIndex::BuildParams build_params{64}; + svs::runtime::v0::Status status = svs::runtime::v0::DynamicVamanaIndex::build( + &index, + test_d, + svs::runtime::v0::MetricType::L2, + svs::runtime::v0::StorageKind::FP32, + build_params + ); + CATCH_REQUIRE(status.ok()); + CATCH_REQUIRE(index != nullptr); + + // Add data + std::vector labels(test_n); + std::iota(labels.begin(), labels.end(), 0); + status = index->add(test_n, labels.data(), test_data.data()); + CATCH_REQUIRE(status.ok()); + + const int nq = 5; + const float* xq = test_data.data(); + const int k = 5; + + // 10% selectivity: accept only IDs 0-9 out of 100 + size_t min_id = 0; + size_t max_id = test_n / 10; + test_utils::IDFilterRange filter(min_id, max_id); + + std::vector distances(nq * k); + std::vector result_labels(nq * k); + + // Set filter_stop = 0.5 (50%). With ~10% hit rate, search should give up + // and return unspecified results. + svs::runtime::v0::VamanaIndex::SearchParams search_params; + search_params.filter_stop = 0.5f; + + status = index->search( + nq, xq, k, distances.data(), result_labels.data(), &search_params, &filter + ); + CATCH_REQUIRE(status.ok()); + + // All results should be unspecified (early exit returned empty) + for (int i = 0; i < nq * k; ++i) { + CATCH_REQUIRE(!svs::runtime::v0::is_specified(result_labels[i])); + } + + // Now search without filter_stop — should find valid results + std::vector distances2(nq * k); + std::vector result_labels2(nq * k); + + status = index->search( + nq, xq, k, distances2.data(), result_labels2.data(), nullptr, &filter + ); + CATCH_REQUIRE(status.ok()); + + // Should have valid results in the filter range + for (int i = 0; i < nq * k; ++i) { + if (svs::runtime::v0::is_specified(result_labels2[i])) { + CATCH_REQUIRE(result_labels2[i] >= min_id); + CATCH_REQUIRE(result_labels2[i] < max_id); + } + } + + svs::runtime::v0::DynamicVamanaIndex::destroy(index); +} + CATCH_TEST_CASE("RangeSearchFunctional", "[runtime]") { const auto& test_data = get_test_data(); // Build index From fc39a16b4db4f1a85fa2883497e42c4853d0cceb Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 13 Apr 2026 13:39:10 -0700 Subject: [PATCH 09/32] set filter_stop default to 0.1 for benchmarking Enables early exit by default so OpenSearch can test the heuristic without plumbing a new search parameter through the stack. --- bindings/cpp/include/svs/runtime/vamana_index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/cpp/include/svs/runtime/vamana_index.h b/bindings/cpp/include/svs/runtime/vamana_index.h index 5286551da..e4d530fb9 100644 --- a/bindings/cpp/include/svs/runtime/vamana_index.h +++ b/bindings/cpp/include/svs/runtime/vamana_index.h @@ -44,7 +44,7 @@ struct VamanaSearchParameters { // If the hit rate after the first round falls below this threshold, // stop and return empty results (caller can fall back to exact search). // Default 0 means never give up. - float filter_stop = 0.0f; + float filter_stop = 0.1f; }; } // namespace detail From 9e7f26e8d93794d9956cc89fb1b0145e98839911 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 13 Apr 2026 13:50:41 -0700 Subject: [PATCH 10/32] cap max batch size to number of vectors in index Batch size can never exceed the index size since there are no more vectors to check beyond that. --- bindings/cpp/src/dynamic_vamana_index_impl.h | 8 ++++++-- bindings/cpp/src/vamana_index_impl.h | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 6c28ee958..bde347abd 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -128,9 +128,13 @@ class DynamicVamanaIndexImpl { size_t found = 0; size_t total_checked = 0; auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); + // Maximum batch size is the number of vectors in the index. + const auto max_batch_size = impl_->size(); do { - batch_size = - predict_further_processing(total_checked, found, k, batch_size); + batch_size = std::min( + predict_further_processing(total_checked, found, k, batch_size), + max_batch_size + ); iterator.next(batch_size); total_checked += iterator.size(); for (auto& neighbor : iterator.results()) { diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 81cd8ae31..f6bfb75aa 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -134,9 +134,13 @@ class VamanaIndexImpl { size_t found = 0; size_t total_checked = 0; auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); + // Maximum batch size is the number of vectors in the index. + const auto max_batch_size = get_impl()->size(); do { - batch_size = - predict_further_processing(total_checked, found, k, batch_size); + batch_size = std::min( + predict_further_processing(total_checked, found, k, batch_size), + max_batch_size + ); iterator.next(batch_size); total_checked += iterator.size(); for (auto& neighbor : iterator.results()) { From fca06e22ef010e18ce04ac4befd781444ee54626 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 13 Apr 2026 13:57:00 -0700 Subject: [PATCH 11/32] move max_batch_size cap into predict_further_processing Add max_batch_size parameter instead of capping at each call site. --- bindings/cpp/src/dynamic_vamana_index_impl.h | 6 ++---- bindings/cpp/src/svs_runtime_utils.h | 11 ++++++----- bindings/cpp/src/vamana_index_impl.h | 6 ++---- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index bde347abd..6ec9dc2ba 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -128,12 +128,10 @@ class DynamicVamanaIndexImpl { size_t found = 0; size_t total_checked = 0; auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); - // Maximum batch size is the number of vectors in the index. const auto max_batch_size = impl_->size(); do { - batch_size = std::min( - predict_further_processing(total_checked, found, k, batch_size), - max_batch_size + batch_size = predict_further_processing( + total_checked, found, k, batch_size, max_batch_size ); iterator.next(batch_size); total_checked += iterator.size(); diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 1ab1d4d07..77b2cb854 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -434,14 +434,15 @@ auto dispatch_storage_kind(StorageKind kind, F&& f, Args&&... args) { // Predict how many more items need to be processed to reach the goal, // based on the observed hit rate so far. // If no hits yet, returns `hint` unchanged. -// The caller should cap the result to a max batch size if needed. -inline size_t -predict_further_processing(size_t processed, size_t hits, size_t goal, size_t hint) { +// Result is capped at `max_batch_size` (e.g., number of vectors in the index). +inline size_t predict_further_processing( + size_t processed, size_t hits, size_t goal, size_t hint, size_t max_batch_size +) { if (hits == 0 || hits >= goal) { - return hint; + return std::min(hint, max_batch_size); } float batch_size = static_cast(goal - hits) * processed / hits; - return std::max(static_cast(batch_size), size_t{1}); + return std::min(std::max(static_cast(batch_size), size_t{1}), max_batch_size); } // Check if the filtered search should stop early based on the observed hit rate. diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index f6bfb75aa..a351f67e7 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -134,12 +134,10 @@ class VamanaIndexImpl { size_t found = 0; size_t total_checked = 0; auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); - // Maximum batch size is the number of vectors in the index. const auto max_batch_size = get_impl()->size(); do { - batch_size = std::min( - predict_further_processing(total_checked, found, k, batch_size), - max_batch_size + batch_size = predict_further_processing( + total_checked, found, k, batch_size, max_batch_size ); iterator.next(batch_size); total_checked += iterator.size(); From ad0d299e1a04c7760f8224d59d6ba3cd42680d33 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 13 Apr 2026 16:28:17 -0700 Subject: [PATCH 12/32] revert filter_stop default to 0 (disabled) Keep early exit opt-in only. OpenSearch can set filter_stop=0.01 when ready to test the heuristic. --- bindings/cpp/include/svs/runtime/vamana_index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/cpp/include/svs/runtime/vamana_index.h b/bindings/cpp/include/svs/runtime/vamana_index.h index e4d530fb9..5286551da 100644 --- a/bindings/cpp/include/svs/runtime/vamana_index.h +++ b/bindings/cpp/include/svs/runtime/vamana_index.h @@ -44,7 +44,7 @@ struct VamanaSearchParameters { // If the hit rate after the first round falls below this threshold, // stop and return empty results (caller can fall back to exact search). // Default 0 means never give up. - float filter_stop = 0.1f; + float filter_stop = 0.0f; }; } // namespace detail From 657e3137a0e47eecae557a517be8182a9236a4a4 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 14 Apr 2026 12:43:12 -0700 Subject: [PATCH 13/32] address review: Unspecify filter_stop, hoist max_batch_size, rename max_value - Use Unspecify() for filter_stop default, set_if_specified pattern - Move max_batch_size (impl size) out of search_closure - Rename max_batch_size to max_value in predict_further_processing --- bindings/cpp/include/svs/runtime/vamana_index.h | 4 ++-- bindings/cpp/src/dynamic_vamana_index_impl.h | 7 +++++-- bindings/cpp/src/svs_runtime_utils.h | 8 ++++---- bindings/cpp/src/vamana_index_impl.h | 7 +++++-- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/bindings/cpp/include/svs/runtime/vamana_index.h b/bindings/cpp/include/svs/runtime/vamana_index.h index 5286551da..62ab7a639 100644 --- a/bindings/cpp/include/svs/runtime/vamana_index.h +++ b/bindings/cpp/include/svs/runtime/vamana_index.h @@ -43,8 +43,8 @@ struct VamanaSearchParameters { // Minimum filter hit rate to continue filtered search. // If the hit rate after the first round falls below this threshold, // stop and return empty results (caller can fall back to exact search). - // Default 0 means never give up. - float filter_stop = 0.0f; + // Default unspecified means never give up (treated as 0). + float filter_stop = Unspecify(); }; } // namespace detail diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 6ec9dc2ba..f3655329a 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -118,7 +118,11 @@ class DynamicVamanaIndexImpl { // Selective search with IDSelector auto old_sp = impl_->get_search_parameters(); impl_->set_search_parameters(sp); - const float filter_stop = params ? params->filter_stop : 0.0f; + float filter_stop = 0.0f; + if (params) { + set_if_specified(filter_stop, params->filter_stop); + } + const auto max_batch_size = impl_->size(); auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { @@ -128,7 +132,6 @@ class DynamicVamanaIndexImpl { size_t found = 0; size_t total_checked = 0; auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); - const auto max_batch_size = impl_->size(); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 77b2cb854..de09c2966 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -434,15 +434,15 @@ auto dispatch_storage_kind(StorageKind kind, F&& f, Args&&... args) { // Predict how many more items need to be processed to reach the goal, // based on the observed hit rate so far. // If no hits yet, returns `hint` unchanged. -// Result is capped at `max_batch_size` (e.g., number of vectors in the index). +// Result is capped at `max_value` (e.g., number of vectors in the index). inline size_t predict_further_processing( - size_t processed, size_t hits, size_t goal, size_t hint, size_t max_batch_size + size_t processed, size_t hits, size_t goal, size_t hint, size_t max_value ) { if (hits == 0 || hits >= goal) { - return std::min(hint, max_batch_size); + return std::min(hint, max_value); } float batch_size = static_cast(goal - hits) * processed / hits; - return std::min(std::max(static_cast(batch_size), size_t{1}), max_batch_size); + return std::min(std::max(static_cast(batch_size), size_t{1}), max_value); } // Check if the filtered search should stop early based on the observed hit rate. diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index a351f67e7..2cc3080d3 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -124,7 +124,11 @@ class VamanaIndexImpl { get_impl()->set_search_parameters(old_sp); }); get_impl()->set_search_parameters(sp); - const float filter_stop = params ? params->filter_stop : 0.0f; + float filter_stop = 0.0f; + if (params) { + set_if_specified(filter_stop, params->filter_stop); + } + const auto max_batch_size = get_impl()->size(); auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { @@ -134,7 +138,6 @@ class VamanaIndexImpl { size_t found = 0; size_t total_checked = 0; auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); - const auto max_batch_size = get_impl()->size(); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size From f6f4d0de5565a7314da5a63e433b0eb2b9024227 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 14:48:32 -0700 Subject: [PATCH 14/32] set internal filter_stop default to 0.1 for benchmarking When caller doesn't set filter_stop, the internal fallback is now 0.1 (give up below 10% hit rate) instead of 0 (never give up). Caller can still override by setting filter_stop explicitly. --- bindings/cpp/src/dynamic_vamana_index_impl.h | 2 +- bindings/cpp/src/vamana_index_impl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index f3655329a..f1fb5b935 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -118,7 +118,7 @@ class DynamicVamanaIndexImpl { // Selective search with IDSelector auto old_sp = impl_->get_search_parameters(); impl_->set_search_parameters(sp); - float filter_stop = 0.0f; + float filter_stop = 0.1f; if (params) { set_if_specified(filter_stop, params->filter_stop); } diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 2cc3080d3..4dfefed94 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -124,7 +124,7 @@ class VamanaIndexImpl { get_impl()->set_search_parameters(old_sp); }); get_impl()->set_search_parameters(sp); - float filter_stop = 0.0f; + float filter_stop = 0.1f; if (params) { set_if_specified(filter_stop, params->filter_stop); } From 0cb70470fddfd26dfedea84584cb5d590e666350 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 15:07:18 -0700 Subject: [PATCH 15/32] early exit on zero hits when filter_stop is enabled Give up immediately after first round if no results pass the filter, instead of continuing to grind through the graph. --- bindings/cpp/src/svs_runtime_utils.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index de09c2966..670f5741f 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -450,9 +450,12 @@ inline size_t predict_further_processing( // give up and let the caller fall back to exact search. inline bool should_stop_filtered_search(size_t total_checked, size_t found, float filter_stop) { - if (filter_stop <= 0 || total_checked == 0 || found == 0) { + if (filter_stop <= 0 || total_checked == 0) { return false; } + if (found == 0) { + return true; + } float hit_rate = static_cast(found) / total_checked; return hit_rate < filter_stop; } From bfc10bd184526098c5e3bc537298ab0771933a5b Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 16:36:34 -0700 Subject: [PATCH 16/32] add pre-search filter sampling to estimate hit rate Sample random IDs before graph traversal to estimate filter hit rate. Uses this to set smarter initial batch size (k / hit_rate) and to trigger early exit before any search if hit rate < filter_stop. Controlled by filter_estimate_batch param (default enabled). --- .../cpp/include/svs/runtime/vamana_index.h | 3 ++ bindings/cpp/src/dynamic_vamana_index_impl.h | 31 +++++++++++++++++- bindings/cpp/src/svs_runtime_utils.h | 26 +++++++++++++++ bindings/cpp/src/vamana_index_impl.h | 32 ++++++++++++++++++- 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/bindings/cpp/include/svs/runtime/vamana_index.h b/bindings/cpp/include/svs/runtime/vamana_index.h index 62ab7a639..2149c0d3e 100644 --- a/bindings/cpp/include/svs/runtime/vamana_index.h +++ b/bindings/cpp/include/svs/runtime/vamana_index.h @@ -45,6 +45,9 @@ struct VamanaSearchParameters { // stop and return empty results (caller can fall back to exact search). // Default unspecified means never give up (treated as 0). float filter_stop = Unspecify(); + // Enable pre-search filter sampling to estimate hit rate before graph traversal. + // Uses a random sample of IDs to set initial batch size and trigger early exit. + OptionalBool filter_estimate_batch = Unspecify(); }; } // namespace detail diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index f1fb5b935..e0055fb78 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -122,8 +122,32 @@ class DynamicVamanaIndexImpl { if (params) { set_if_specified(filter_stop, params->filter_stop); } + bool filter_estimate_batch = true; + if (params) { + set_if_specified(filter_estimate_batch, params->filter_estimate_batch); + } const auto max_batch_size = impl_->size(); + // Pre-search filter sampling: estimate hit rate before graph traversal. + float estimated_hit_rate = 1.0f; + if (filter_estimate_batch) { + auto ids = impl_->all_ids(); + std::vector id_vec(ids.begin(), ids.end()); + estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); + // Early exit before any search if estimated hit rate is too low. + if (filter_stop > 0 && estimated_hit_rate < filter_stop) { + for (size_t i = 0; i < queries.size(); ++i) { + for (size_t j = 0; j < k; ++j) { + result.set( + Neighbor{Unspecify(), Unspecify()}, i, j + ); + } + } + impl_->set_search_parameters(old_sp); + return; + } + } + auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { // For every query @@ -131,7 +155,12 @@ class DynamicVamanaIndexImpl { auto iterator = impl_->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); + // Use estimated hit rate for smarter initial batch size. + auto batch_size = (estimated_hit_rate > 0) + ? std::min( + static_cast(k / estimated_hit_rate), max_batch_size + ) + : std::max(k, sp.buffer_config_.get_search_window_size()); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 670f5741f..c13fe2e5e 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -460,6 +460,32 @@ should_stop_filtered_search(size_t total_checked, size_t found, float filter_sto return hit_rate < filter_stop; } +// Estimate the filter hit rate by sampling IDs from the index. +// Uses stride-based sampling (every N-th ID) for efficiency. +// Returns the fraction of sampled IDs that pass the filter (0.0 to 1.0). +inline float estimate_filter_hit_rate( + const IDFilter& filter, const std::vector& all_ids, size_t sample_size = 200 +) { + if (all_ids.empty()) { + return 0.0f; + } + size_t n = all_ids.size(); + size_t actual_sample = std::min(sample_size, n); + size_t stride = n / actual_sample; + if (stride == 0) { + stride = 1; + } + size_t hits = 0; + size_t checked = 0; + for (size_t i = 0; i < n && checked < actual_sample; i += stride) { + if (filter.is_member(all_ids[i])) { + hits++; + } + checked++; + } + return static_cast(hits) / checked; +} + inline svs::threads::ThreadPoolHandle default_threadpool() { return svs::threads::ThreadPoolHandle(svs::threads::OMPThreadPool(omp_get_max_threads()) ); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 4dfefed94..0252d03c6 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -38,6 +38,7 @@ #include #include +#include #include #include @@ -128,8 +129,32 @@ class VamanaIndexImpl { if (params) { set_if_specified(filter_stop, params->filter_stop); } + bool filter_estimate_batch = true; + if (params) { + set_if_specified(filter_estimate_batch, params->filter_estimate_batch); + } const auto max_batch_size = get_impl()->size(); + // Pre-search filter sampling: estimate hit rate before graph traversal. + float estimated_hit_rate = 1.0f; + if (filter_estimate_batch) { + // Static Vamana doesn't have all_ids(); generate sequential IDs. + std::vector id_vec(max_batch_size); + std::iota(id_vec.begin(), id_vec.end(), 0); + estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); + // Early exit before any search if estimated hit rate is too low. + if (filter_stop > 0 && estimated_hit_rate < filter_stop) { + for (size_t i = 0; i < queries.size(); ++i) { + for (size_t j = 0; j < k; ++j) { + result.set( + Neighbor{Unspecify(), Unspecify()}, i, j + ); + } + } + return; + } + } + auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { // For every query @@ -137,7 +162,12 @@ class VamanaIndexImpl { auto iterator = get_impl()->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - auto batch_size = std::max(k, sp.buffer_config_.get_search_window_size()); + // Use estimated hit rate for smarter initial batch size. + auto batch_size = (estimated_hit_rate > 0) + ? std::min( + static_cast(k / estimated_hit_rate), max_batch_size + ) + : std::max(k, sp.buffer_config_.get_search_window_size()); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size From ae61893458ee021438fe4a9bd98c58091650e5fc Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 16:38:21 -0700 Subject: [PATCH 17/32] apply clang-format --- bindings/cpp/src/dynamic_vamana_index_impl.h | 15 +++++++-------- bindings/cpp/src/vamana_index_impl.h | 15 +++++++-------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index e0055fb78..70d6f0bb5 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -138,9 +138,7 @@ class DynamicVamanaIndexImpl { if (filter_stop > 0 && estimated_hit_rate < filter_stop) { for (size_t i = 0; i < queries.size(); ++i) { for (size_t j = 0; j < k; ++j) { - result.set( - Neighbor{Unspecify(), Unspecify()}, i, j - ); + result.set(Neighbor{Unspecify(), Unspecify()}, i, j); } } impl_->set_search_parameters(old_sp); @@ -156,11 +154,12 @@ class DynamicVamanaIndexImpl { size_t found = 0; size_t total_checked = 0; // Use estimated hit rate for smarter initial batch size. - auto batch_size = (estimated_hit_rate > 0) - ? std::min( - static_cast(k / estimated_hit_rate), max_batch_size - ) - : std::max(k, sp.buffer_config_.get_search_window_size()); + auto batch_size = + (estimated_hit_rate > 0) + ? std::min( + static_cast(k / estimated_hit_rate), max_batch_size + ) + : std::max(k, sp.buffer_config_.get_search_window_size()); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 0252d03c6..7d676c150 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -146,9 +146,7 @@ class VamanaIndexImpl { if (filter_stop > 0 && estimated_hit_rate < filter_stop) { for (size_t i = 0; i < queries.size(); ++i) { for (size_t j = 0; j < k; ++j) { - result.set( - Neighbor{Unspecify(), Unspecify()}, i, j - ); + result.set(Neighbor{Unspecify(), Unspecify()}, i, j); } } return; @@ -163,11 +161,12 @@ class VamanaIndexImpl { size_t found = 0; size_t total_checked = 0; // Use estimated hit rate for smarter initial batch size. - auto batch_size = (estimated_hit_rate > 0) - ? std::min( - static_cast(k / estimated_hit_rate), max_batch_size - ) - : std::max(k, sp.buffer_config_.get_search_window_size()); + auto batch_size = + (estimated_hit_rate > 0) + ? std::min( + static_cast(k / estimated_hit_rate), max_batch_size + ) + : std::max(k, sp.buffer_config_.get_search_window_size()); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size From 6a4db67767877bc2486aba5fd8cfc55d1fc42f07 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 16:57:44 -0700 Subject: [PATCH 18/32] refactor: move sampling and batch size logic to utility functions - Add estimate_filter_hit_rate() for pre-search sampling - Add should_stop_filtered_search_by_estimate() for pre-search early exit - Add compute_initial_batch_size() using estimated hit rate - Add filter_estimate_batch param to SearchParams (default enabled) - Reuse utilities in both dynamic and static vamana impls --- bindings/cpp/src/dynamic_vamana_index_impl.h | 17 +++++++--------- bindings/cpp/src/svs_runtime_utils.h | 21 ++++++++++++++++++++ bindings/cpp/src/vamana_index_impl.h | 17 +++++++--------- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 70d6f0bb5..245aca0aa 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -134,8 +134,9 @@ class DynamicVamanaIndexImpl { auto ids = impl_->all_ids(); std::vector id_vec(ids.begin(), ids.end()); estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); - // Early exit before any search if estimated hit rate is too low. - if (filter_stop > 0 && estimated_hit_rate < filter_stop) { + if (should_stop_filtered_search_by_estimate( + estimated_hit_rate, filter_stop + )) { for (size_t i = 0; i < queries.size(); ++i) { for (size_t j = 0; j < k; ++j) { result.set(Neighbor{Unspecify(), Unspecify()}, i, j); @@ -145,21 +146,17 @@ class DynamicVamanaIndexImpl { return; } } + const auto sws = sp.buffer_config_.get_search_window_size(); auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { - // For every query auto query = queries.get_datum(i); auto iterator = impl_->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - // Use estimated hit rate for smarter initial batch size. - auto batch_size = - (estimated_hit_rate > 0) - ? std::min( - static_cast(k / estimated_hit_rate), max_batch_size - ) - : std::max(k, sp.buffer_config_.get_search_window_size()); + auto batch_size = compute_initial_batch_size( + estimated_hit_rate, k, sws, max_batch_size + ); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index c13fe2e5e..2f9a8e6b9 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -460,6 +460,12 @@ should_stop_filtered_search(size_t total_checked, size_t found, float filter_sto return hit_rate < filter_stop; } +// Check if the search should stop before starting, based on the estimated hit rate. +inline bool +should_stop_filtered_search_by_estimate(float estimated_hit_rate, float filter_stop) { + return filter_stop > 0 && estimated_hit_rate < filter_stop; +} + // Estimate the filter hit rate by sampling IDs from the index. // Uses stride-based sampling (every N-th ID) for efficiency. // Returns the fraction of sampled IDs that pass the filter (0.0 to 1.0). @@ -486,6 +492,21 @@ inline float estimate_filter_hit_rate( return static_cast(hits) / checked; } +// Compute initial batch size from estimated filter hit rate. +// If hit rate is known, use k / hit_rate (capped at max_value). +// Otherwise fall back to max(k, search_window_size). +inline size_t compute_initial_batch_size( + float estimated_hit_rate, + size_t k, + size_t search_window_size, + size_t max_value +) { + if (estimated_hit_rate > 0) { + return std::min(static_cast(k / estimated_hit_rate), max_value); + } + return std::max(k, search_window_size); +} + inline svs::threads::ThreadPoolHandle default_threadpool() { return svs::threads::ThreadPoolHandle(svs::threads::OMPThreadPool(omp_get_max_threads()) ); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 7d676c150..7410d1cb0 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -142,8 +142,9 @@ class VamanaIndexImpl { std::vector id_vec(max_batch_size); std::iota(id_vec.begin(), id_vec.end(), 0); estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); - // Early exit before any search if estimated hit rate is too low. - if (filter_stop > 0 && estimated_hit_rate < filter_stop) { + if (should_stop_filtered_search_by_estimate( + estimated_hit_rate, filter_stop + )) { for (size_t i = 0; i < queries.size(); ++i) { for (size_t j = 0; j < k; ++j) { result.set(Neighbor{Unspecify(), Unspecify()}, i, j); @@ -152,21 +153,17 @@ class VamanaIndexImpl { return; } } + const auto sws = sp.buffer_config_.get_search_window_size(); auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { - // For every query auto query = queries.get_datum(i); auto iterator = get_impl()->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - // Use estimated hit rate for smarter initial batch size. - auto batch_size = - (estimated_hit_rate > 0) - ? std::min( - static_cast(k / estimated_hit_rate), max_batch_size - ) - : std::max(k, sp.buffer_config_.get_search_window_size()); + auto batch_size = compute_initial_batch_size( + estimated_hit_rate, k, sws, max_batch_size + ); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size From d009f97578dfd700fd639a35d346b084af0d7013 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 16:59:36 -0700 Subject: [PATCH 19/32] apply clang-format --- bindings/cpp/src/dynamic_vamana_index_impl.h | 9 +++------ bindings/cpp/src/svs_runtime_utils.h | 5 +---- bindings/cpp/src/vamana_index_impl.h | 9 +++------ 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 245aca0aa..58fe42da1 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -134,9 +134,7 @@ class DynamicVamanaIndexImpl { auto ids = impl_->all_ids(); std::vector id_vec(ids.begin(), ids.end()); estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); - if (should_stop_filtered_search_by_estimate( - estimated_hit_rate, filter_stop - )) { + if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { for (size_t i = 0; i < queries.size(); ++i) { for (size_t j = 0; j < k; ++j) { result.set(Neighbor{Unspecify(), Unspecify()}, i, j); @@ -154,9 +152,8 @@ class DynamicVamanaIndexImpl { auto iterator = impl_->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - auto batch_size = compute_initial_batch_size( - estimated_hit_rate, k, sws, max_batch_size - ); + auto batch_size = + compute_initial_batch_size(estimated_hit_rate, k, sws, max_batch_size); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 2f9a8e6b9..1ad2008ec 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -496,10 +496,7 @@ inline float estimate_filter_hit_rate( // If hit rate is known, use k / hit_rate (capped at max_value). // Otherwise fall back to max(k, search_window_size). inline size_t compute_initial_batch_size( - float estimated_hit_rate, - size_t k, - size_t search_window_size, - size_t max_value + float estimated_hit_rate, size_t k, size_t search_window_size, size_t max_value ) { if (estimated_hit_rate > 0) { return std::min(static_cast(k / estimated_hit_rate), max_value); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 7410d1cb0..cf8c116f7 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -142,9 +142,7 @@ class VamanaIndexImpl { std::vector id_vec(max_batch_size); std::iota(id_vec.begin(), id_vec.end(), 0); estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); - if (should_stop_filtered_search_by_estimate( - estimated_hit_rate, filter_stop - )) { + if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { for (size_t i = 0; i < queries.size(); ++i) { for (size_t j = 0; j < k; ++j) { result.set(Neighbor{Unspecify(), Unspecify()}, i, j); @@ -161,9 +159,8 @@ class VamanaIndexImpl { auto iterator = get_impl()->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - auto batch_size = compute_initial_batch_size( - estimated_hit_rate, k, sws, max_batch_size - ); + auto batch_size = + compute_initial_batch_size(estimated_hit_rate, k, sws, max_batch_size); do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size From 3d972d89851ce90dcc721c061adbdd03cea30321 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 17:07:48 -0700 Subject: [PATCH 20/32] combine duplicate params check into single if block --- bindings/cpp/src/dynamic_vamana_index_impl.h | 4 +--- bindings/cpp/src/vamana_index_impl.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 58fe42da1..2da639e8d 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -119,11 +119,9 @@ class DynamicVamanaIndexImpl { auto old_sp = impl_->get_search_parameters(); impl_->set_search_parameters(sp); float filter_stop = 0.1f; - if (params) { - set_if_specified(filter_stop, params->filter_stop); - } bool filter_estimate_batch = true; if (params) { + set_if_specified(filter_stop, params->filter_stop); set_if_specified(filter_estimate_batch, params->filter_estimate_batch); } const auto max_batch_size = impl_->size(); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index cf8c116f7..54866c44d 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -126,11 +126,9 @@ class VamanaIndexImpl { }); get_impl()->set_search_parameters(sp); float filter_stop = 0.1f; - if (params) { - set_if_specified(filter_stop, params->filter_stop); - } bool filter_estimate_batch = true; if (params) { + set_if_specified(filter_stop, params->filter_stop); set_if_specified(filter_estimate_batch, params->filter_estimate_batch); } const auto max_batch_size = get_impl()->size(); From 086953dcbb907e1b134c3351b16d459f061a77d8 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 17:22:44 -0700 Subject: [PATCH 21/32] simplify filter sampling: take first N IDs instead of full copy - Template estimate_filter_hit_rate to accept any ID range - Pass all_ids() directly without vector copy for dynamic vamana - Only generate sample_size sequential IDs for static vamana --- bindings/cpp/src/dynamic_vamana_index_impl.h | 4 +--- bindings/cpp/src/svs_runtime_utils.h | 25 +++++++++----------- bindings/cpp/src/vamana_index_impl.h | 3 ++- 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 2da639e8d..8f9b0db8d 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -129,9 +129,7 @@ class DynamicVamanaIndexImpl { // Pre-search filter sampling: estimate hit rate before graph traversal. float estimated_hit_rate = 1.0f; if (filter_estimate_batch) { - auto ids = impl_->all_ids(); - std::vector id_vec(ids.begin(), ids.end()); - estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); + estimated_hit_rate = estimate_filter_hit_rate(*filter, impl_->all_ids()); if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { for (size_t i = 0; i < queries.size(); ++i) { for (size_t j = 0; j < k; ++j) { diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 1ad2008ec..6556c4adb 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -466,29 +466,26 @@ should_stop_filtered_search_by_estimate(float estimated_hit_rate, float filter_s return filter_stop > 0 && estimated_hit_rate < filter_stop; } -// Estimate the filter hit rate by sampling IDs from the index. -// Uses stride-based sampling (every N-th ID) for efficiency. +// Estimate the filter hit rate by checking the first sample_size IDs. // Returns the fraction of sampled IDs that pass the filter (0.0 to 1.0). +template inline float estimate_filter_hit_rate( - const IDFilter& filter, const std::vector& all_ids, size_t sample_size = 200 + const IDFilter& filter, const IdRange& ids, size_t sample_size = 200 ) { - if (all_ids.empty()) { - return 0.0f; - } - size_t n = all_ids.size(); - size_t actual_sample = std::min(sample_size, n); - size_t stride = n / actual_sample; - if (stride == 0) { - stride = 1; - } size_t hits = 0; size_t checked = 0; - for (size_t i = 0; i < n && checked < actual_sample; i += stride) { - if (filter.is_member(all_ids[i])) { + for (auto id : ids) { + if (checked >= sample_size) { + break; + } + if (filter.is_member(id)) { hits++; } checked++; } + if (checked == 0) { + return 0.0f; + } return static_cast(hits) / checked; } diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 54866c44d..3265c8248 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -137,7 +137,8 @@ class VamanaIndexImpl { float estimated_hit_rate = 1.0f; if (filter_estimate_batch) { // Static Vamana doesn't have all_ids(); generate sequential IDs. - std::vector id_vec(max_batch_size); + size_t sample = std::min(max_batch_size, size_t{200}); + std::vector id_vec(sample); std::iota(id_vec.begin(), id_vec.end(), 0); estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { From cb8b98c40f9ee53c69cae848865c732162c5b336 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 17:42:30 -0700 Subject: [PATCH 22/32] extract kFilterSampleSize constant and pad_empty_results utility - Define kFilterSampleSize = 200 used by both static and dynamic vamana - Add pad_empty_results() to avoid duplicated result padding logic - Add comment explaining why padding is needed on early exit --- bindings/cpp/src/dynamic_vamana_index_impl.h | 6 +----- bindings/cpp/src/svs_runtime_utils.h | 17 ++++++++++++++++- bindings/cpp/src/vamana_index_impl.h | 8 ++------ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 8f9b0db8d..fe5f77b7a 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -131,11 +131,7 @@ class DynamicVamanaIndexImpl { if (filter_estimate_batch) { estimated_hit_rate = estimate_filter_hit_rate(*filter, impl_->all_ids()); if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { - for (size_t i = 0; i < queries.size(); ++i) { - for (size_t j = 0; j < k; ++j) { - result.set(Neighbor{Unspecify(), Unspecify()}, i, j); - } - } + pad_empty_results(result, queries.size(), k); impl_->set_search_parameters(old_sp); return; } diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 6556c4adb..d6dac629c 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -466,11 +466,14 @@ should_stop_filtered_search_by_estimate(float estimated_hit_rate, float filter_s return filter_stop > 0 && estimated_hit_rate < filter_stop; } +// Default number of IDs to sample when estimating filter hit rate. +constexpr size_t kFilterSampleSize = 200; + // Estimate the filter hit rate by checking the first sample_size IDs. // Returns the fraction of sampled IDs that pass the filter (0.0 to 1.0). template inline float estimate_filter_hit_rate( - const IDFilter& filter, const IdRange& ids, size_t sample_size = 200 + const IDFilter& filter, const IdRange& ids, size_t sample_size = kFilterSampleSize ) { size_t hits = 0; size_t checked = 0; @@ -501,6 +504,18 @@ inline size_t compute_initial_batch_size( return std::max(k, search_window_size); } +// Fill all result slots with unspecified values. +// Required when early-exiting before search: the caller-allocated result buffer +// may contain uninitialized data, so we must write valid "no result" markers. +inline void +pad_empty_results(svs::QueryResultView& result, size_t num_queries, size_t k) { + for (size_t i = 0; i < num_queries; ++i) { + for (size_t j = 0; j < k; ++j) { + result.set(Neighbor{Unspecify(), Unspecify()}, i, j); + } + } +} + inline svs::threads::ThreadPoolHandle default_threadpool() { return svs::threads::ThreadPoolHandle(svs::threads::OMPThreadPool(omp_get_max_threads()) ); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 3265c8248..d96d3f785 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -137,16 +137,12 @@ class VamanaIndexImpl { float estimated_hit_rate = 1.0f; if (filter_estimate_batch) { // Static Vamana doesn't have all_ids(); generate sequential IDs. - size_t sample = std::min(max_batch_size, size_t{200}); + size_t sample = std::min(max_batch_size, kFilterSampleSize); std::vector id_vec(sample); std::iota(id_vec.begin(), id_vec.end(), 0); estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { - for (size_t i = 0; i < queries.size(); ++i) { - for (size_t j = 0; j < k; ++j) { - result.set(Neighbor{Unspecify(), Unspecify()}, i, j); - } - } + pad_empty_results(result, queries.size(), k); return; } } From e5964f8ffd247d0735dcdcd26f100dedbb29318d Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 17:55:02 -0700 Subject: [PATCH 23/32] use stride-based sampling for filter hit rate estimation Sample every N-th ID across the full range instead of first 200. Avoids bias from ID ordering. Both impls now just pass index size. --- bindings/cpp/src/dynamic_vamana_index_impl.h | 2 +- bindings/cpp/src/svs_runtime_utils.h | 19 ++++++++++++------- bindings/cpp/src/vamana_index_impl.h | 7 +------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index fe5f77b7a..98475aa70 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -129,7 +129,7 @@ class DynamicVamanaIndexImpl { // Pre-search filter sampling: estimate hit rate before graph traversal. float estimated_hit_rate = 1.0f; if (filter_estimate_batch) { - estimated_hit_rate = estimate_filter_hit_rate(*filter, impl_->all_ids()); + estimated_hit_rate = estimate_filter_hit_rate(*filter, max_batch_size); if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { pad_empty_results(result, queries.size(), k); impl_->set_search_parameters(old_sp); diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index d6dac629c..fab94e8b2 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -469,18 +469,23 @@ should_stop_filtered_search_by_estimate(float estimated_hit_rate, float filter_s // Default number of IDs to sample when estimating filter hit rate. constexpr size_t kFilterSampleSize = 200; -// Estimate the filter hit rate by checking the first sample_size IDs. +// Estimate the filter hit rate by sampling IDs evenly across the range. +// Uses stride-based sampling to avoid bias from ID ordering. // Returns the fraction of sampled IDs that pass the filter (0.0 to 1.0). -template inline float estimate_filter_hit_rate( - const IDFilter& filter, const IdRange& ids, size_t sample_size = kFilterSampleSize + const IDFilter& filter, size_t total_ids, size_t sample_size = kFilterSampleSize ) { + if (total_ids == 0) { + return 0.0f; + } + size_t actual_sample = std::min(sample_size, total_ids); + size_t stride = total_ids / actual_sample; + if (stride == 0) { + stride = 1; + } size_t hits = 0; size_t checked = 0; - for (auto id : ids) { - if (checked >= sample_size) { - break; - } + for (size_t id = 0; id < total_ids && checked < actual_sample; id += stride) { if (filter.is_member(id)) { hits++; } diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index d96d3f785..69d6c7277 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -38,7 +38,6 @@ #include #include -#include #include #include @@ -136,11 +135,7 @@ class VamanaIndexImpl { // Pre-search filter sampling: estimate hit rate before graph traversal. float estimated_hit_rate = 1.0f; if (filter_estimate_batch) { - // Static Vamana doesn't have all_ids(); generate sequential IDs. - size_t sample = std::min(max_batch_size, kFilterSampleSize); - std::vector id_vec(sample); - std::iota(id_vec.begin(), id_vec.end(), 0); - estimated_hit_rate = estimate_filter_hit_rate(*filter, id_vec); + estimated_hit_rate = estimate_filter_hit_rate(*filter, max_batch_size); if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { pad_empty_results(result, queries.size(), k); return; From 77f7a57f1e429c2b6eb6024123f2e40948653e76 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 19:13:31 -0700 Subject: [PATCH 24/32] use random sampling for filter hit rate estimation --- bindings/cpp/src/svs_runtime_utils.h | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index fab94e8b2..7da522fea 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -55,6 +55,7 @@ inline bool lvq_leanvec_enabled() { return false; } #include #include #include +#include #include #include #include @@ -469,8 +470,7 @@ should_stop_filtered_search_by_estimate(float estimated_hit_rate, float filter_s // Default number of IDs to sample when estimating filter hit rate. constexpr size_t kFilterSampleSize = 200; -// Estimate the filter hit rate by sampling IDs evenly across the range. -// Uses stride-based sampling to avoid bias from ID ordering. +// Estimate the filter hit rate by randomly sampling IDs from the index. // Returns the fraction of sampled IDs that pass the filter (0.0 to 1.0). inline float estimate_filter_hit_rate( const IDFilter& filter, size_t total_ids, size_t sample_size = kFilterSampleSize @@ -479,22 +479,15 @@ inline float estimate_filter_hit_rate( return 0.0f; } size_t actual_sample = std::min(sample_size, total_ids); - size_t stride = total_ids / actual_sample; - if (stride == 0) { - stride = 1; - } + std::mt19937 rng(42); + std::uniform_int_distribution dist(0, total_ids - 1); size_t hits = 0; - size_t checked = 0; - for (size_t id = 0; id < total_ids && checked < actual_sample; id += stride) { - if (filter.is_member(id)) { + for (size_t i = 0; i < actual_sample; ++i) { + if (filter.is_member(dist(rng))) { hits++; } - checked++; - } - if (checked == 0) { - return 0.0f; } - return static_cast(hits) / checked; + return static_cast(hits) / actual_sample; } // Compute initial batch size from estimated filter hit rate. From bdce86e27398a2690c0dd29547686fc66a8d8bbf Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 22:22:41 -0700 Subject: [PATCH 25/32] sample from valid IDs for dynamic vamana, sequential for static Dynamic vamana uses all_ids() to sample only valid IDs (handles deletions). Static vamana uses sequential IDs via separate function. Both use random sampling with mt19937. --- bindings/cpp/src/dynamic_vamana_index_impl.h | 2 +- bindings/cpp/src/svs_runtime_utils.h | 28 ++++++++++++++++++-- bindings/cpp/src/vamana_index_impl.h | 3 ++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 98475aa70..fe5f77b7a 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -129,7 +129,7 @@ class DynamicVamanaIndexImpl { // Pre-search filter sampling: estimate hit rate before graph traversal. float estimated_hit_rate = 1.0f; if (filter_estimate_batch) { - estimated_hit_rate = estimate_filter_hit_rate(*filter, max_batch_size); + estimated_hit_rate = estimate_filter_hit_rate(*filter, impl_->all_ids()); if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { pad_empty_results(result, queries.size(), k); impl_->set_search_parameters(old_sp); diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 7da522fea..c0909bfc5 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -470,9 +470,33 @@ should_stop_filtered_search_by_estimate(float estimated_hit_rate, float filter_s // Default number of IDs to sample when estimating filter hit rate. constexpr size_t kFilterSampleSize = 200; -// Estimate the filter hit rate by randomly sampling IDs from the index. -// Returns the fraction of sampled IDs that pass the filter (0.0 to 1.0). +// Estimate the filter hit rate by randomly sampling from a list of valid IDs. +// Use this for dynamic indices where IDs may have gaps from deletions. +template inline float estimate_filter_hit_rate( + const IDFilter& filter, + const IdContainer& all_ids, + size_t sample_size = kFilterSampleSize +) { + size_t n = all_ids.size(); + if (n == 0) { + return 0.0f; + } + size_t actual_sample = std::min(sample_size, n); + std::mt19937 rng(42); + std::uniform_int_distribution dist(0, n - 1); + size_t hits = 0; + for (size_t i = 0; i < actual_sample; ++i) { + if (filter.is_member(all_ids[dist(rng)])) { + hits++; + } + } + return static_cast(hits) / actual_sample; +} + +// Estimate the filter hit rate assuming sequential IDs [0, total_ids). +// Use this for static indices where IDs are always 0 to size-1. +inline float estimate_filter_hit_rate_sequential( const IDFilter& filter, size_t total_ids, size_t sample_size = kFilterSampleSize ) { if (total_ids == 0) { diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 69d6c7277..002d4f5bd 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -135,7 +135,8 @@ class VamanaIndexImpl { // Pre-search filter sampling: estimate hit rate before graph traversal. float estimated_hit_rate = 1.0f; if (filter_estimate_batch) { - estimated_hit_rate = estimate_filter_hit_rate(*filter, max_batch_size); + estimated_hit_rate = + estimate_filter_hit_rate_sequential(*filter, max_batch_size); if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { pad_empty_results(result, queries.size(), k); return; From 22a484c5c3a7c25f7791b23f422114134bf68e40 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 15 Apr 2026 22:29:02 -0700 Subject: [PATCH 26/32] deduplicate sampling into shared estimate_filter_hit_rate_impl --- bindings/cpp/src/svs_runtime_utils.h | 51 ++++++++++++++-------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index c0909bfc5..bee49e5cf 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -470,48 +470,49 @@ should_stop_filtered_search_by_estimate(float estimated_hit_rate, float filter_s // Default number of IDs to sample when estimating filter hit rate. constexpr size_t kFilterSampleSize = 200; -// Estimate the filter hit rate by randomly sampling from a list of valid IDs. -// Use this for dynamic indices where IDs may have gaps from deletions. -template -inline float estimate_filter_hit_rate( +// Estimate the filter hit rate by randomly sampling IDs. +// id_lookup(index) maps a random index to an actual ID to check. +template +inline float estimate_filter_hit_rate_impl( const IDFilter& filter, - const IdContainer& all_ids, + size_t pool_size, + IdLookup id_lookup, size_t sample_size = kFilterSampleSize ) { - size_t n = all_ids.size(); - if (n == 0) { + if (pool_size == 0) { return 0.0f; } - size_t actual_sample = std::min(sample_size, n); + size_t actual_sample = std::min(sample_size, pool_size); std::mt19937 rng(42); - std::uniform_int_distribution dist(0, n - 1); + std::uniform_int_distribution dist(0, pool_size - 1); size_t hits = 0; for (size_t i = 0; i < actual_sample; ++i) { - if (filter.is_member(all_ids[dist(rng)])) { + if (filter.is_member(id_lookup(dist(rng)))) { hits++; } } return static_cast(hits) / actual_sample; } -// Estimate the filter hit rate assuming sequential IDs [0, total_ids). -// Use this for static indices where IDs are always 0 to size-1. +// Estimate filter hit rate from a container of valid IDs (dynamic index). +template +inline float estimate_filter_hit_rate( + const IDFilter& filter, + const IdContainer& all_ids, + size_t sample_size = kFilterSampleSize +) { + return estimate_filter_hit_rate_impl( + filter, all_ids.size(), [&](size_t i) { return all_ids[i]; }, sample_size + ); +} + +// Estimate filter hit rate assuming sequential IDs [0, total_ids) (static index). inline float estimate_filter_hit_rate_sequential( const IDFilter& filter, size_t total_ids, size_t sample_size = kFilterSampleSize ) { - if (total_ids == 0) { - return 0.0f; - } - size_t actual_sample = std::min(sample_size, total_ids); - std::mt19937 rng(42); - std::uniform_int_distribution dist(0, total_ids - 1); - size_t hits = 0; - for (size_t i = 0; i < actual_sample; ++i) { - if (filter.is_member(dist(rng))) { - hits++; - } - } - return static_cast(hits) / actual_sample; + return estimate_filter_hit_rate_impl( + filter, total_ids, [](size_t i) { return i; }, sample_size + ); } // Compute initial batch size from estimated filter hit rate. From 94124481f4d1413919939d1f0e11ee8c4fda2d9b Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Fri, 17 Apr 2026 13:36:29 -0700 Subject: [PATCH 27/32] set internal filter_stop default to 0 (disabled) Keep pre-search sampling enabled but disable early exit by default. Benchmarks can isolate the effect of adaptive batch sizing + smart initial batch size from hit-rate estimation. --- bindings/cpp/src/dynamic_vamana_index_impl.h | 2 +- bindings/cpp/src/vamana_index_impl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index fe5f77b7a..254db78d7 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -118,7 +118,7 @@ class DynamicVamanaIndexImpl { // Selective search with IDSelector auto old_sp = impl_->get_search_parameters(); impl_->set_search_parameters(sp); - float filter_stop = 0.1f; + float filter_stop = 0.0f; bool filter_estimate_batch = true; if (params) { set_if_specified(filter_stop, params->filter_stop); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 002d4f5bd..a5a7a3c44 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -124,7 +124,7 @@ class VamanaIndexImpl { get_impl()->set_search_parameters(old_sp); }); get_impl()->set_search_parameters(sp); - float filter_stop = 0.1f; + float filter_stop = 0.0f; bool filter_estimate_batch = true; if (params) { set_if_specified(filter_stop, params->filter_stop); From c3ab78a20d394c75bf75c47b53d75ddcce09263d Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 20 Apr 2026 14:56:16 -0700 Subject: [PATCH 28/32] reuse predict_further_processing and should_stop_filtered_search for sampling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sampling functions now return (actual_sample, hits) pair instead of hit rate. The existing predict_further_processing and should_stop_ filtered_search functions accept these directly — same math as ratio comparisons, fewer helper functions. - Rename estimate_filter_hit_rate -> sample_filter_hits - Remove compute_initial_batch_size (use predict_further_processing) - Remove should_stop_filtered_search_by_estimate (use should_stop_filtered_search) --- bindings/cpp/src/dynamic_vamana_index_impl.h | 17 ++++++--- bindings/cpp/src/svs_runtime_utils.h | 40 ++++++-------------- bindings/cpp/src/vamana_index_impl.h | 19 ++++++---- 3 files changed, 35 insertions(+), 41 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 254db78d7..abf7888be 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -127,16 +127,22 @@ class DynamicVamanaIndexImpl { const auto max_batch_size = impl_->size(); // Pre-search filter sampling: estimate hit rate before graph traversal. - float estimated_hit_rate = 1.0f; + size_t sampled = 0; + size_t sample_hits = 0; + const auto sws = sp.buffer_config_.get_search_window_size(); + const auto initial_batch_hint = std::max(k, sws); + auto initial_batch_size = initial_batch_hint; if (filter_estimate_batch) { - estimated_hit_rate = estimate_filter_hit_rate(*filter, impl_->all_ids()); - if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { + std::tie(sampled, sample_hits) = sample_filter_hits(*filter, impl_->all_ids()); + if (should_stop_filtered_search(sampled, sample_hits, filter_stop)) { pad_empty_results(result, queries.size(), k); impl_->set_search_parameters(old_sp); return; } + initial_batch_size = predict_further_processing( + sampled, sample_hits, k, initial_batch_hint, max_batch_size + ); } - const auto sws = sp.buffer_config_.get_search_window_size(); auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { @@ -144,8 +150,7 @@ class DynamicVamanaIndexImpl { auto iterator = impl_->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - auto batch_size = - compute_initial_batch_size(estimated_hit_rate, k, sws, max_batch_size); + auto batch_size = initial_batch_size; do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index bee49e5cf..3325ae231 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -461,26 +461,22 @@ should_stop_filtered_search(size_t total_checked, size_t found, float filter_sto return hit_rate < filter_stop; } -// Check if the search should stop before starting, based on the estimated hit rate. -inline bool -should_stop_filtered_search_by_estimate(float estimated_hit_rate, float filter_stop) { - return filter_stop > 0 && estimated_hit_rate < filter_stop; -} - // Default number of IDs to sample when estimating filter hit rate. constexpr size_t kFilterSampleSize = 200; -// Estimate the filter hit rate by randomly sampling IDs. +// Sample IDs and count filter hits. Returns (actual_sample, hits). // id_lookup(index) maps a random index to an actual ID to check. +// Returned pair can be fed directly to predict_further_processing() and +// should_stop_filtered_search() — same math as hit rate comparisons. template -inline float estimate_filter_hit_rate_impl( +inline std::pair sample_filter_hits_impl( const IDFilter& filter, size_t pool_size, IdLookup id_lookup, size_t sample_size = kFilterSampleSize ) { if (pool_size == 0) { - return 0.0f; + return {0, 0}; } size_t actual_sample = std::min(sample_size, pool_size); std::mt19937 rng(42); @@ -491,42 +487,30 @@ inline float estimate_filter_hit_rate_impl( hits++; } } - return static_cast(hits) / actual_sample; + return {actual_sample, hits}; } -// Estimate filter hit rate from a container of valid IDs (dynamic index). +// Sample from a container of valid IDs (dynamic index). template -inline float estimate_filter_hit_rate( +inline std::pair sample_filter_hits( const IDFilter& filter, const IdContainer& all_ids, size_t sample_size = kFilterSampleSize ) { - return estimate_filter_hit_rate_impl( + return sample_filter_hits_impl( filter, all_ids.size(), [&](size_t i) { return all_ids[i]; }, sample_size ); } -// Estimate filter hit rate assuming sequential IDs [0, total_ids) (static index). -inline float estimate_filter_hit_rate_sequential( +// Sample assuming sequential IDs [0, total_ids) (static index). +inline std::pair sample_filter_hits_sequential( const IDFilter& filter, size_t total_ids, size_t sample_size = kFilterSampleSize ) { - return estimate_filter_hit_rate_impl( + return sample_filter_hits_impl( filter, total_ids, [](size_t i) { return i; }, sample_size ); } -// Compute initial batch size from estimated filter hit rate. -// If hit rate is known, use k / hit_rate (capped at max_value). -// Otherwise fall back to max(k, search_window_size). -inline size_t compute_initial_batch_size( - float estimated_hit_rate, size_t k, size_t search_window_size, size_t max_value -) { - if (estimated_hit_rate > 0) { - return std::min(static_cast(k / estimated_hit_rate), max_value); - } - return std::max(k, search_window_size); -} - // Fill all result slots with unspecified values. // Required when early-exiting before search: the caller-allocated result buffer // may contain uninitialized data, so we must write valid "no result" markers. diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index a5a7a3c44..7da1b6cbd 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -133,16 +133,22 @@ class VamanaIndexImpl { const auto max_batch_size = get_impl()->size(); // Pre-search filter sampling: estimate hit rate before graph traversal. - float estimated_hit_rate = 1.0f; + size_t sampled = 0; + size_t sample_hits = 0; + const auto sws = sp.buffer_config_.get_search_window_size(); + const auto initial_batch_hint = std::max(k, sws); + auto initial_batch_size = initial_batch_hint; if (filter_estimate_batch) { - estimated_hit_rate = - estimate_filter_hit_rate_sequential(*filter, max_batch_size); - if (should_stop_filtered_search_by_estimate(estimated_hit_rate, filter_stop)) { + std::tie(sampled, sample_hits) = + sample_filter_hits_sequential(*filter, max_batch_size); + if (should_stop_filtered_search(sampled, sample_hits, filter_stop)) { pad_empty_results(result, queries.size(), k); return; } + initial_batch_size = predict_further_processing( + sampled, sample_hits, k, initial_batch_hint, max_batch_size + ); } - const auto sws = sp.buffer_config_.get_search_window_size(); auto search_closure = [&](const auto& range, uint64_t SVS_UNUSED(tid)) { for (auto i : range) { @@ -150,8 +156,7 @@ class VamanaIndexImpl { auto iterator = get_impl()->batch_iterator(query); size_t found = 0; size_t total_checked = 0; - auto batch_size = - compute_initial_batch_size(estimated_hit_rate, k, sws, max_batch_size); + auto batch_size = initial_batch_size; do { batch_size = predict_further_processing( total_checked, found, k, batch_size, max_batch_size From b008b66ee3795883926fdbc49d3418b2b7b1d29a Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 22 Apr 2026 12:30:10 -0700 Subject: [PATCH 29/32] avoid O(N) all_ids() in sampling; use has_id check instead The all_ids() call materializes all vector IDs into a std::vector, which is O(N) with large heap allocation per search() call. For 10M vectors that overhead can exceed the savings from skipping one graph round. Replace with random ID generation in [0, size()) and skip invalid IDs via has_id() callback. For static index, pass always-true. --- bindings/cpp/src/dynamic_vamana_index_impl.h | 5 +- bindings/cpp/src/svs_runtime_utils.h | 57 ++++++++------------ bindings/cpp/src/vamana_index_impl.h | 2 +- 3 files changed, 27 insertions(+), 37 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index c87a4e64b..a81a4c7ec 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -133,7 +133,10 @@ class DynamicVamanaIndexImpl { const auto initial_batch_hint = std::max(k, sws); auto initial_batch_size = initial_batch_hint; if (filter_estimate_batch) { - std::tie(sampled, sample_hits) = sample_filter_hits(*filter, impl_->all_ids()); + std::tie(sampled, sample_hits) = + sample_filter_hits(*filter, max_batch_size, [this](size_t id) { + return impl_->has_id(id); + }); if (should_stop_filtered_search(sampled, sample_hits, filter_stop)) { pad_empty_results(result, queries.size(), k); impl_->set_search_parameters(old_sp); diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 81858fb69..0ec56714b 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -436,51 +436,38 @@ should_stop_filtered_search(size_t total_checked, size_t found, float filter_sto // Default number of IDs to sample when estimating filter hit rate. constexpr size_t kFilterSampleSize = 200; -// Sample IDs and count filter hits. Returns (actual_sample, hits). -// id_lookup(index) maps a random index to an actual ID to check. -// Returned pair can be fed directly to predict_further_processing() and -// should_stop_filtered_search() — same math as hit rate comparisons. -template -inline std::pair sample_filter_hits_impl( +// Sample random IDs from [0, total_ids) and count filter hits. +// is_valid(id) is checked first; invalid IDs are skipped (for dynamic indices +// where IDs may be deleted). Keeps sampling until sample_size valid IDs checked +// or max_tries exhausted. Returns (checked, hits) — fed directly to +// predict_further_processing() and should_stop_filtered_search(). +template +inline std::pair sample_filter_hits( const IDFilter& filter, - size_t pool_size, - IdLookup id_lookup, + size_t total_ids, + IsValid is_valid, size_t sample_size = kFilterSampleSize ) { - if (pool_size == 0) { + if (total_ids == 0) { return {0, 0}; } - size_t actual_sample = std::min(sample_size, pool_size); + size_t target = std::min(sample_size, total_ids); + size_t max_tries = target * 4; std::mt19937 rng(42); - std::uniform_int_distribution dist(0, pool_size - 1); + std::uniform_int_distribution dist(0, total_ids - 1); size_t hits = 0; - for (size_t i = 0; i < actual_sample; ++i) { - if (filter.is_member(id_lookup(dist(rng)))) { + size_t checked = 0; + for (size_t tries = 0; checked < target && tries < max_tries; ++tries) { + size_t id = dist(rng); + if (!is_valid(id)) { + continue; + } + if (filter.is_member(id)) { hits++; } + checked++; } - return {actual_sample, hits}; -} - -// Sample from a container of valid IDs (dynamic index). -template -inline std::pair sample_filter_hits( - const IDFilter& filter, - const IdContainer& all_ids, - size_t sample_size = kFilterSampleSize -) { - return sample_filter_hits_impl( - filter, all_ids.size(), [&](size_t i) { return all_ids[i]; }, sample_size - ); -} - -// Sample assuming sequential IDs [0, total_ids) (static index). -inline std::pair sample_filter_hits_sequential( - const IDFilter& filter, size_t total_ids, size_t sample_size = kFilterSampleSize -) { - return sample_filter_hits_impl( - filter, total_ids, [](size_t i) { return i; }, sample_size - ); + return {checked, hits}; } // Fill all result slots with unspecified values. diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 54514a03b..93ae7f444 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -140,7 +140,7 @@ class VamanaIndexImpl { auto initial_batch_size = initial_batch_hint; if (filter_estimate_batch) { std::tie(sampled, sample_hits) = - sample_filter_hits_sequential(*filter, max_batch_size); + sample_filter_hits(*filter, max_batch_size, [](size_t) { return true; }); if (should_stop_filtered_search(sampled, sample_hits, filter_stop)) { pad_empty_results(result, queries.size(), k); return; From 2fe43021ab3ad5140c4f28d1dcf4fb02ff55ea84 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 22 Apr 2026 14:28:20 -0700 Subject: [PATCH 30/32] scale sample size by filter_stop threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With a fixed sample of 200, very low filter_stop values (e.g., 0.001) can't be reliably estimated — at that rate we'd see 0 hits most of the time and can't distinguish 0.1% from 0.2%. Sample size is now max(200, 1/filter_stop), ensuring at least one expected hit at the threshold so noise doesn't dominate the decision. --- bindings/cpp/src/dynamic_vamana_index_impl.h | 10 ++++++---- bindings/cpp/src/svs_runtime_utils.h | 11 +++++++++++ bindings/cpp/src/vamana_index_impl.h | 8 ++++++-- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index a81a4c7ec..516a16539 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -133,10 +133,12 @@ class DynamicVamanaIndexImpl { const auto initial_batch_hint = std::max(k, sws); auto initial_batch_size = initial_batch_hint; if (filter_estimate_batch) { - std::tie(sampled, sample_hits) = - sample_filter_hits(*filter, max_batch_size, [this](size_t id) { - return impl_->has_id(id); - }); + std::tie(sampled, sample_hits) = sample_filter_hits( + *filter, + max_batch_size, + [this](size_t id) { return impl_->has_id(id); }, + sample_size_for_filter_stop(filter_stop) + ); if (should_stop_filtered_search(sampled, sample_hits, filter_stop)) { pad_empty_results(result, queries.size(), k); impl_->set_search_parameters(old_sp); diff --git a/bindings/cpp/src/svs_runtime_utils.h b/bindings/cpp/src/svs_runtime_utils.h index 0ec56714b..b081c91b1 100644 --- a/bindings/cpp/src/svs_runtime_utils.h +++ b/bindings/cpp/src/svs_runtime_utils.h @@ -470,6 +470,17 @@ inline std::pair sample_filter_hits( return {checked, hits}; } +// Compute sample size for filter hit rate estimation based on filter_stop. +// Need at least 1/filter_stop samples to reliably distinguish hit rates around +// the threshold (below that, noise dominates — e.g., 0.1% vs 0.2% both look +// like 0 hits at sample_size=200). +inline size_t sample_size_for_filter_stop(float filter_stop) { + if (filter_stop <= 0) { + return kFilterSampleSize; + } + return std::max(kFilterSampleSize, static_cast(1.0f / filter_stop)); +} + // Fill all result slots with unspecified values. // Required when early-exiting before search: the caller-allocated result buffer // may contain uninitialized data, so we must write valid "no result" markers. diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 93ae7f444..550257d4f 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -139,8 +139,12 @@ class VamanaIndexImpl { const auto initial_batch_hint = std::max(k, sws); auto initial_batch_size = initial_batch_hint; if (filter_estimate_batch) { - std::tie(sampled, sample_hits) = - sample_filter_hits(*filter, max_batch_size, [](size_t) { return true; }); + std::tie(sampled, sample_hits) = sample_filter_hits( + *filter, + max_batch_size, + [](size_t) { return true; }, + sample_size_for_filter_stop(filter_stop) + ); if (should_stop_filtered_search(sampled, sample_hits, filter_stop)) { pad_empty_results(result, queries.size(), k); return; From 71350dcd2182b25d5c4e3d8fe4eb4ac8382273cb Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 22 Apr 2026 15:41:23 -0700 Subject: [PATCH 31/32] set internal filter_stop default to 0.1 for benchmarking --- bindings/cpp/src/dynamic_vamana_index_impl.h | 2 +- bindings/cpp/src/vamana_index_impl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index 516a16539..a6db69dc8 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -118,7 +118,7 @@ class DynamicVamanaIndexImpl { // Selective search with IDSelector auto old_sp = impl_->get_search_parameters(); impl_->set_search_parameters(sp); - float filter_stop = 0.0f; + float filter_stop = 0.1f; bool filter_estimate_batch = true; if (params) { set_if_specified(filter_stop, params->filter_stop); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 550257d4f..59f33a0fb 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -124,7 +124,7 @@ class VamanaIndexImpl { get_impl()->set_search_parameters(old_sp); }); get_impl()->set_search_parameters(sp); - float filter_stop = 0.0f; + float filter_stop = 0.1f; bool filter_estimate_batch = true; if (params) { set_if_specified(filter_stop, params->filter_stop); From 30a051cc78bb1b9998948315be9708e36d7648ab Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 27 Apr 2026 09:05:17 -0700 Subject: [PATCH 32/32] revert internal filter_stop default to 0 --- bindings/cpp/src/dynamic_vamana_index_impl.h | 2 +- bindings/cpp/src/vamana_index_impl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/cpp/src/dynamic_vamana_index_impl.h b/bindings/cpp/src/dynamic_vamana_index_impl.h index a6db69dc8..516a16539 100644 --- a/bindings/cpp/src/dynamic_vamana_index_impl.h +++ b/bindings/cpp/src/dynamic_vamana_index_impl.h @@ -118,7 +118,7 @@ class DynamicVamanaIndexImpl { // Selective search with IDSelector auto old_sp = impl_->get_search_parameters(); impl_->set_search_parameters(sp); - float filter_stop = 0.1f; + float filter_stop = 0.0f; bool filter_estimate_batch = true; if (params) { set_if_specified(filter_stop, params->filter_stop); diff --git a/bindings/cpp/src/vamana_index_impl.h b/bindings/cpp/src/vamana_index_impl.h index 59f33a0fb..550257d4f 100644 --- a/bindings/cpp/src/vamana_index_impl.h +++ b/bindings/cpp/src/vamana_index_impl.h @@ -124,7 +124,7 @@ class VamanaIndexImpl { get_impl()->set_search_parameters(old_sp); }); get_impl()->set_search_parameters(sp); - float filter_stop = 0.1f; + float filter_stop = 0.0f; bool filter_estimate_batch = true; if (params) { set_if_specified(filter_stop, params->filter_stop);