diff --git a/c/src/neighbors/ivf_sq.cpp b/c/src/neighbors/ivf_sq.cpp index 2656338d31..c4d6c3a412 100644 --- a/c/src/neighbors/ivf_sq.cpp +++ b/c/src/neighbors/ivf_sq.cpp @@ -339,6 +339,8 @@ extern "C" cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, extern "C" cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* n_lists) { return cuvs::core::translate_exceptions([=] { + RAFT_EXPECTS(index != nullptr, "index cannot be null"); + RAFT_EXPECTS(index->addr != 0, "index must be built before getting n_lists"); auto index_ptr = reinterpret_cast*>(index->addr); *n_lists = index_ptr->n_lists(); @@ -348,6 +350,8 @@ extern "C" cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, int64_t* extern "C" cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim) { return cuvs::core::translate_exceptions([=] { + RAFT_EXPECTS(index != nullptr, "index cannot be null"); + RAFT_EXPECTS(index->addr != 0, "index must be built before getting dim"); auto index_ptr = reinterpret_cast*>(index->addr); *dim = index_ptr->dim(); @@ -357,6 +361,8 @@ extern "C" cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t* dim extern "C" cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t* size) { return cuvs::core::translate_exceptions([=] { + RAFT_EXPECTS(index != nullptr, "index cannot be null"); + RAFT_EXPECTS(index->addr != 0, "index must be built before getting size"); auto index_ptr = reinterpret_cast*>(index->addr); *size = index_ptr->size(); diff --git a/fern/docs.yml b/fern/docs.yml index db239ce019..6a7260306d 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -409,6 +409,8 @@ navigation: path: "./pages/python_api/python-api-neighbors-ivf-flat.md" - page: "Neighbors IVF PQ" path: "./pages/python_api/python-api-neighbors-ivf-pq.md" + - page: "Neighbors IVF SQ" + path: "./pages/python_api/python-api-neighbors-ivf-sq.md" - page: "Neighbors NN Descent" path: "./pages/python_api/python-api-neighbors-nn-descent.md" - page: "Neighbors" diff --git a/fern/pages/python_api/index.md b/fern/pages/python_api/index.md index 41773574fa..8522cf0a8f 100644 --- a/fern/pages/python_api/index.md +++ b/fern/pages/python_api/index.md @@ -29,6 +29,7 @@ These pages are generated from the Python and Cython sources under `python/cuvs/ - [HNSW](/api-reference/python-api-neighbors-hnsw) - [IVF Flat](/api-reference/python-api-neighbors-ivf-flat) - [IVF PQ](/api-reference/python-api-neighbors-ivf-pq) +- [IVF SQ](/api-reference/python-api-neighbors-ivf-sq) - [NN Descent](/api-reference/python-api-neighbors-nn-descent) - [Neighbors](/api-reference/python-api-neighbors) - [Tiered Index](/api-reference/python-api-neighbors-tiered-index) diff --git a/fern/pages/python_api/python-api-neighbors-ivf-sq.md b/fern/pages/python_api/python-api-neighbors-ivf-sq.md new file mode 100644 index 0000000000..81ef078a08 --- /dev/null +++ b/fern/pages/python_api/python-api-neighbors-ivf-sq.md @@ -0,0 +1,393 @@ +--- +slug: api-reference/python-api-neighbors-ivf-sq +--- + +# IVF SQ + +_Python module: `cuvs.neighbors.ivf_sq`_ + +## Index + +```python +cdef class Index +``` + +IvfSq index object. This object stores the trained IvfSq index state +which can be used to perform nearest neighbors searches. + +**Members** + +| Name | Kind | +| --- | --- | +| `trained` | property | +| `n_lists` | property | +| `dim` | property | +| `centers` | property | + +### trained + +```python +def trained(self) +``` + +### n_lists + +```python +def n_lists(self) +``` + +The number of inverted lists (clusters) + +### dim + +```python +def dim(self) +``` + +dimensionality of the cluster centers + +### centers + +```python +def centers(self) +``` + +Get the cluster centers corresponding to the lists in the +original space + +## IndexParams + +```python +cdef class IndexParams +``` + +Parameters to build index for IvfSq nearest neighbor search + +**Parameters** + +| Name | Type | Description | +| --- | --- | --- | +| `n_lists` | `int, default = 1024` | The number of clusters used in the coarse quantizer. | +| `metric` | `str, default = "sqeuclidean"` | String denoting the metric type. Valid values for metric: ["sqeuclidean", "inner_product", "euclidean", "cosine"], where

- sqeuclidean is the euclidean distance without the square root operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2,
- euclidean is the euclidean distance
- inner product distance is defined as distance(a, b) = \\sum_i a_i * b_i.
- cosine distance is defined as distance(a, b) = 1 - \\sum_i a_i * b_i / ( \|\|a\|\|_2 * \|\|b\|\|_2). | +| `kmeans_n_iters` | `int, default = 20` | The number of iterations searching for kmeans centers during index building. | +| `max_train_points_per_cluster` | `int, default = 256` | The number of data vectors per cluster to use during iterative kmeans building. The index uses at most n_lists * max_train_points_per_cluster rows for training. | +| `add_data_on_build` | `bool, default = True` | After training the coarse and fine quantizers, we will populate the index with the dataset if add_data_on_build == True, otherwise the index is left empty, and the extend method can be used to add new vectors to the index. | +| `conservative_memory_allocation` | `bool, default = False` | By default, the algorithm allocates more space than necessary for individual clusters (`list_data`). This allows to amortize the cost of memory allocation and reduce the number of data copies during repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flag to `True`. | + +**Constructor** + +```python +def __init__(self, *, n_lists=1024, metric="sqeuclidean", metric_arg=2.0, kmeans_n_iters=20, max_train_points_per_cluster=256, add_data_on_build=True, conservative_memory_allocation=False) +``` + +**Members** + +| Name | Kind | +| --- | --- | +| `get_handle` | method | +| `metric` | property | +| `metric_arg` | property | +| `add_data_on_build` | property | +| `n_lists` | property | +| `kmeans_n_iters` | property | +| `max_train_points_per_cluster` | property | +| `conservative_memory_allocation` | property | + +### get_handle + +```python +def get_handle(self) +``` + +### metric + +```python +def metric(self) +``` + +### metric_arg + +```python +def metric_arg(self) +``` + +### add_data_on_build + +```python +def add_data_on_build(self) +``` + +### n_lists + +```python +def n_lists(self) +``` + +### kmeans_n_iters + +```python +def kmeans_n_iters(self) +``` + +### max_train_points_per_cluster + +```python +def max_train_points_per_cluster(self) +``` + +### conservative_memory_allocation + +```python +def conservative_memory_allocation(self) +``` + +## SearchParams + +```python +cdef class SearchParams +``` + +Supplemental parameters to search IVF-SQ index + +**Parameters** + +| Name | Type | Description | +| --- | --- | --- | +| `n_probes` | `int` | The number of clusters to search. | + +**Constructor** + +```python +def __init__(self, *, n_probes=20) +``` + +**Members** + +| Name | Kind | +| --- | --- | +| `get_handle` | method | +| `n_probes` | property | + +### get_handle + +```python +def get_handle(self) +``` + +### n_probes + +```python +def n_probes(self) +``` + +## build + +`@auto_sync_resources` + +```python +def build(IndexParams index_params, dataset, resources=None) +``` + +Build the IvfSq index from the dataset for efficient search. + +IVF-SQ (Scalar Quantization) combines an IVF coarse quantizer with +per-dimension scalar quantization. Each vector's residual is encoded +as one byte per dimension, providing ~4x memory reduction vs IVF-Flat +with higher recall than IVF-PQ at similar memory budgets. + +**Parameters** + +| Name | Type | Description | +| --- | --- | --- | +| `index_params` | `cuvs.neighbors.ivf_sq.IndexParams` | | +| `dataset` | `CUDA array interface compliant matrix shape (n_samples, dim)` | Supported dtype [float32, float16] | +| `resources` | `cuvs.common.Resources, optional` | | + +**Returns** + +| Name | Type | Description | +| --- | --- | --- | +| `index` | `cuvs.neighbors.ivf_sq.Index` | | + +**Examples** + +```python +>>> import cupy as cp +>>> from cuvs.neighbors import ivf_sq +>>> n_samples = 50000 +>>> n_features = 50 +>>> n_queries = 1000 +>>> k = 10 +>>> dataset = cp.random.random_sample((n_samples, n_features), +... dtype=cp.float32) +>>> build_params = ivf_sq.IndexParams(metric="sqeuclidean") +>>> index = ivf_sq.build(build_params, dataset) +>>> distances, neighbors = ivf_sq.search(ivf_sq.SearchParams(), +... index, dataset, +... k) +>>> distances = cp.asarray(distances) +>>> neighbors = cp.asarray(neighbors) +``` + +## extend + +`@auto_sync_resources` + +```python +def extend(Index index, new_vectors, new_indices, resources=None) +``` + +Extend an existing index with new vectors. + +The input array can be either CUDA array interface compliant matrix or +array interface compliant matrix in host memory. + +**Parameters** + +| Name | Type | Description | +| --- | --- | --- | +| `index` | `ivf_sq.Index` | Trained ivf_sq object. | +| `new_vectors` | `array interface compliant matrix shape (n_samples, dim)` | Supported dtype [float32, float16] | +| `new_indices` | `array interface compliant vector shape (n_samples)` | Supported dtype [int64] | +| `resources` | `cuvs.common.Resources, optional` | | + +**Returns** + +| Name | Type | Description | +| --- | --- | --- | +| `index` | `cuvs.neighbors.ivf_sq.Index` | | + +**Examples** + +```python +>>> import cupy as cp +>>> from cuvs.neighbors import ivf_sq +>>> n_samples = 50000 +>>> n_features = 50 +>>> n_queries = 1000 +>>> dataset = cp.random.random_sample((n_samples, n_features), +... dtype=cp.float32) +>>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) +>>> n_rows = 100 +>>> more_data = cp.random.random_sample((n_rows, n_features), +... dtype=cp.float32) +>>> indices = n_samples + cp.arange(n_rows, dtype=cp.int64) +>>> index = ivf_sq.extend(index, more_data, indices) +>>> # Search using the built index +>>> queries = cp.random.random_sample((n_queries, n_features), +... dtype=cp.float32) +>>> distances, neighbors = ivf_sq.search(ivf_sq.SearchParams(), +... index, queries, +... k=10) +``` + +## load + +`@auto_sync_resources` + +```python +def load(filename, resources=None) +``` + +Loads index from file. + +Saving / loading the index is experimental. The serialization format is +subject to change, therefore loading an index saved with a previous +version of cuvs is not guaranteed to work. + +**Parameters** + +| Name | Type | Description | +| --- | --- | --- | +| `filename` | `string` | Name of the file. | +| `resources` | `cuvs.common.Resources, optional` | | + +**Returns** + +| Name | Type | Description | +| --- | --- | --- | +| `index` | `Index` | | + +## save + +`@auto_sync_resources` + +```python +def save(filename, Index index, bool include_dataset=True, resources=None) +``` + +Saves the index to a file. + +Saving / loading the index is experimental. The serialization format is +subject to change. + +**Parameters** + +| Name | Type | Description | +| --- | --- | --- | +| `filename` | `string` | Name of the file. | +| `index` | `Index` | Trained IVF-SQ index. | +| `resources` | `cuvs.common.Resources, optional` | | + +**Examples** + +```python +>>> import cupy as cp +>>> from cuvs.neighbors import ivf_sq +>>> n_samples = 50000 +>>> n_features = 50 +>>> dataset = cp.random.random_sample((n_samples, n_features), +... dtype=cp.float32) +>>> # Build index +>>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) +>>> # Serialize and deserialize the ivf_sq index built +>>> ivf_sq.save("my_index.bin", index) +>>> index_loaded = ivf_sq.load("my_index.bin") +``` + +## search + +`@auto_sync_resources` +`@auto_convert_output` + +```python +def search(SearchParams search_params, Index index, queries, k, neighbors=None, distances=None, resources=None, filter=None) +``` + +Find the k nearest neighbors for each query. + +**Parameters** + +| Name | Type | Description | +| --- | --- | --- | +| `search_params` | `cuvs.neighbors.ivf_sq.SearchParams` | | +| `index` | `cuvs.neighbors.ivf_sq.Index` | Trained IvfSq index. | +| `queries` | `CUDA array interface compliant matrix shape (n_samples, dim)` | Supported dtype [float32, float16] | +| `k` | `int` | The number of neighbors. | +| `neighbors` | `Optional CUDA array interface compliant matrix shape` | (n_queries, k), dtype int64_t. If supplied, neighbor indices will be written here in-place. (default None) | +| `distances` | `Optional CUDA array interface compliant matrix shape` | (n_queries, k) If supplied, the distances to the neighbors will be written here in-place. (default None) | +| `filter` | `Optional cuvs.neighbors.cuvsFilter can be used to filter` | neighbors based on a given bitset. (default None) | +| `resources` | `cuvs.common.Resources, optional` | | + +**Examples** + +```python +>>> import cupy as cp +>>> from cuvs.neighbors import ivf_sq +>>> n_samples = 50000 +>>> n_features = 50 +>>> n_queries = 1000 +>>> dataset = cp.random.random_sample((n_samples, n_features), +... dtype=cp.float32) +>>> # Build the index +>>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) +>>> +>>> # Search using the built index +>>> queries = cp.random.random_sample((n_queries, n_features), +... dtype=cp.float32) +>>> k = 10 +>>> search_params = ivf_sq.SearchParams(n_probes=20) +>>> +>>> distances, neighbors = ivf_sq.search(search_params, index, queries, +... k) +``` diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index 6a48508be5..8124962cd4 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -1,6 +1,6 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # @@ -9,6 +9,7 @@ add_subdirectory(cagra) add_subdirectory(hnsw) add_subdirectory(ivf_flat) add_subdirectory(ivf_pq) +add_subdirectory(ivf_sq) add_subdirectory(filters) add_subdirectory(nn_descent) add_subdirectory(tiered_index) diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py index 016f98ce60..6b4966bc18 100644 --- a/python/cuvs/cuvs/neighbors/__init__.py +++ b/python/cuvs/cuvs/neighbors/__init__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 @@ -9,6 +9,7 @@ filters, ivf_flat, ivf_pq, + ivf_sq, mg, nn_descent, vamana, @@ -22,6 +23,7 @@ "filters", "ivf_flat", "ivf_pq", + "ivf_sq", "mg", "nn_descent", "all_neighbors", diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt b/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt new file mode 100644 index 0000000000..5fe85c9de5 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/ivf_sq/CMakeLists.txt @@ -0,0 +1,17 @@ +# ============================================================================= +# cmake-format: off +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# cmake-format: on +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources ivf_sq.pyx) +set(linked_libraries cuvs::cuvs cuvs::c_api) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX neighbors_ivf_sq_ +) diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py b/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py new file mode 100644 index 0000000000..bec1a652db --- /dev/null +++ b/python/cuvs/cuvs/neighbors/ivf_sq/__init__.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + + +from .ivf_sq import ( + Index, + IndexParams, + SearchParams, + build, + extend, + load, + save, + search, +) + +__all__ = [ + "Index", + "IndexParams", + "SearchParams", + "build", + "extend", + "load", + "save", + "search", +] diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd new file mode 100644 index 0000000000..b8a5096ec4 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pxd @@ -0,0 +1,94 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# +# cython: language_level=3 + +from libc.stdint cimport int64_t, uint32_t, uintptr_t +from libcpp cimport bool + +from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t +from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor +from cuvs.distance_type cimport cuvsDistanceType +from cuvs.neighbors.filters.filters cimport cuvsFilter + + +cdef extern from "cuvs/neighbors/ivf_sq.h" nogil: + + ctypedef struct cuvsIvfSqIndexParams: + cuvsDistanceType metric + float metric_arg + bool add_data_on_build + uint32_t n_lists + uint32_t kmeans_n_iters + uint32_t max_train_points_per_cluster + bool conservative_memory_allocation + + ctypedef cuvsIvfSqIndexParams* cuvsIvfSqIndexParams_t + + ctypedef struct cuvsIvfSqSearchParams: + uint32_t n_probes + + ctypedef cuvsIvfSqSearchParams* cuvsIvfSqSearchParams_t + + ctypedef struct cuvsIvfSqIndex: + uintptr_t addr + DLDataType dtype + + ctypedef cuvsIvfSqIndex* cuvsIvfSqIndex_t + + cuvsError_t cuvsIvfSqIndexParamsCreate(cuvsIvfSqIndexParams_t* params) + + cuvsError_t cuvsIvfSqIndexParamsDestroy(cuvsIvfSqIndexParams_t index) + + cuvsError_t cuvsIvfSqSearchParamsCreate( + cuvsIvfSqSearchParams_t* params) + + cuvsError_t cuvsIvfSqSearchParamsDestroy(cuvsIvfSqSearchParams_t index) + + cuvsError_t cuvsIvfSqIndexCreate(cuvsIvfSqIndex_t* index) + + cuvsError_t cuvsIvfSqIndexDestroy(cuvsIvfSqIndex_t index) + + cuvsError_t cuvsIvfSqIndexGetNLists(cuvsIvfSqIndex_t index, + int64_t * n_lists) + + cuvsError_t cuvsIvfSqIndexGetDim(cuvsIvfSqIndex_t index, int64_t * dim) + + cuvsError_t cuvsIvfSqIndexGetSize(cuvsIvfSqIndex_t index, int64_t * size) + + cuvsError_t cuvsIvfSqIndexGetCenters(cuvsIvfSqIndex_t index, + DLManagedTensor * centers) + + cuvsError_t cuvsIvfSqBuild(cuvsResources_t res, + cuvsIvfSqIndexParams* params, + DLManagedTensor* dataset, + cuvsIvfSqIndex_t index) except + + + cuvsError_t cuvsIvfSqSearch(cuvsResources_t res, + cuvsIvfSqSearchParams* params, + cuvsIvfSqIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances, + cuvsFilter filter) except + + + cuvsError_t cuvsIvfSqSerialize(cuvsResources_t res, + const char * filename, + cuvsIvfSqIndex_t index) except + + + cuvsError_t cuvsIvfSqDeserialize(cuvsResources_t res, + const char * filename, + cuvsIvfSqIndex_t index) except + + + cuvsError_t cuvsIvfSqExtend(cuvsResources_t res, + DLManagedTensor* new_vectors, + DLManagedTensor* new_indices, + cuvsIvfSqIndex_t index) except + + + +cdef class IndexParams: + cdef cuvsIvfSqIndexParams* params + +cdef class SearchParams: + cdef cuvsIvfSqSearchParams* params diff --git a/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx new file mode 100644 index 0000000000..2d80defb98 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/ivf_sq/ivf_sq.pyx @@ -0,0 +1,542 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# +# cython: language_level=3 + +import numpy as np + +cimport cuvs.common.cydlpack + +from cuvs.common.resources import auto_sync_resources + +from cython.operator cimport dereference as deref +from libcpp cimport bool, cast +from libcpp.string cimport string + +from cuvs.common cimport cydlpack +from cuvs.distance_type cimport cuvsDistanceType + +from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray +from pylibraft.common.cai_wrapper import wrap_array +from pylibraft.common.interruptible import cuda_interruptible + +from cuvs.common.device_tensor_view import DeviceTensorView +from cuvs.distance import DISTANCE_NAMES, DISTANCE_TYPES +from cuvs.neighbors.common import _check_input_array +from cuvs.neighbors.filters import no_filter + +from libc.stdint cimport ( + int8_t, + int64_t, + uint8_t, + uint32_t, + uint64_t, + uintptr_t, +) + +from cuvs.common.exceptions import check_cuvs + + +cdef class IndexParams: + """ + Parameters to build index for IvfSq nearest neighbor search + + Note: IVF-SQ currently uses fixed 8-bit residual scalar quantization. + There are no additional SQ-specific tuning knobs. + + Parameters + ---------- + n_lists : int, default = 1024 + The number of clusters used in the coarse quantizer. + metric : str, default = "sqeuclidean" + String denoting the metric type. + Valid values for metric: ["sqeuclidean", "inner_product", + "euclidean", "cosine"], where + + - sqeuclidean is the euclidean distance without the square root + operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2, + - euclidean is the euclidean distance + - inner product distance is defined as + distance(a, b) = \\sum_i a_i * b_i. + - cosine distance is defined as + distance(a, b) = 1 - \\sum_i a_i * b_i / ( ||a||_2 * ||b||_2). + metric_arg : float, default = 2.0 + Additional metric argument forwarded to cuVS distance computations. + kmeans_n_iters : int, default = 20 + The number of iterations searching for kmeans centers during index + building. + max_train_points_per_cluster : int, default = 256 + The number of data vectors per cluster to use during iterative + kmeans building. The index uses at most + n_lists * max_train_points_per_cluster rows for training. + add_data_on_build : bool, default = True + After training the coarse clustering model and residual scalar + quantization parameters, we populate the index with the dataset + if add_data_on_build == True. Otherwise, the index is left empty, + and the extend method can be used to add new vectors to the index. + conservative_memory_allocation : bool, default = False + By default, the algorithm allocates more space than necessary for + individual clusters (`list_data`). This allows to amortize the cost + of memory allocation and reduce the number of data copies during + repeated calls to `extend` (extending the database). + To disable this behavior and use as little GPU memory for the + database as possible, set this flag to `True`. + """ + + def __cinit__(self): + cuvsIvfSqIndexParamsCreate(&self.params) + + def __dealloc__(self): + if self.params != NULL: + check_cuvs(cuvsIvfSqIndexParamsDestroy(self.params)) + + def __init__(self, *, + n_lists=1024, + metric="sqeuclidean", + metric_arg=2.0, + kmeans_n_iters=20, + max_train_points_per_cluster=256, + add_data_on_build=True, + conservative_memory_allocation=False): + self.params.metric = DISTANCE_TYPES[metric] + self.params.metric_arg = metric_arg + self.params.add_data_on_build = add_data_on_build + self.params.n_lists = n_lists + self.params.kmeans_n_iters = kmeans_n_iters + self.params.max_train_points_per_cluster = max_train_points_per_cluster + self.params.conservative_memory_allocation = \ + conservative_memory_allocation + + def get_handle(self): + return self.params + + @property + def metric(self): + return DISTANCE_NAMES[self.params.metric] + + @property + def metric_arg(self): + return self.params.metric_arg + + @property + def add_data_on_build(self): + return self.params.add_data_on_build + + @property + def n_lists(self): + return self.params.n_lists + + @property + def kmeans_n_iters(self): + return self.params.kmeans_n_iters + + @property + def max_train_points_per_cluster(self): + return self.params.max_train_points_per_cluster + + @property + def conservative_memory_allocation(self): + return self.params.conservative_memory_allocation + + +cdef class Index: + """ + IvfSq index object. This object stores the trained IvfSq index state + which can be used to perform nearest neighbors searches. + """ + + cdef cuvsIvfSqIndex_t index + cdef bool trained + + def __cinit__(self): + self.trained = False + check_cuvs(cuvsIvfSqIndexCreate(&self.index)) + + def __dealloc__(self): + check_cuvs(cuvsIvfSqIndexDestroy(self.index)) + + @property + def trained(self): + return self.trained + + def __repr__(self): + return "Index(type=IvfSq)" + + @property + def n_lists(self): + """ The number of inverted lists (clusters) """ + if self.index == NULL or self.index.addr == 0: + raise ValueError("Index needs to be built before getting n_lists") + cdef int64_t n_lists = 0 + check_cuvs(cuvsIvfSqIndexGetNLists(self.index, &n_lists)) + return n_lists + + @property + def dim(self): + """ dimensionality of the cluster centers """ + if self.index == NULL or self.index.addr == 0: + raise ValueError("Index needs to be built before getting dim") + cdef int64_t dim = 0 + check_cuvs(cuvsIvfSqIndexGetDim(self.index, &dim)) + return dim + + def __len__(self): + if self.index == NULL or self.index.addr == 0: + raise ValueError("Index needs to be built before getting len") + cdef int64_t size = 0 + check_cuvs(cuvsIvfSqIndexGetSize(self.index, &size)) + return size + + @property + def centers(self): + """ Get the cluster centers corresponding to the lists in the + original space """ + if self.index == NULL or self.index.addr == 0: + raise ValueError("Index needs to be built before getting centers") + + output = DeviceTensorView() + cdef cydlpack.DLManagedTensor * tensor = \ + output.get_handle() + check_cuvs(cuvsIvfSqIndexGetCenters(self.index, tensor)) + output.parent = self + + return output + + +@auto_sync_resources +def build(IndexParams index_params, dataset, resources=None): + """ + Build the IvfSq index from the dataset for efficient search. + + IVF-SQ (Scalar Quantization) uses IVF partitioning together with + per-dimension scalar quantization. Each vector's residual is encoded + as one byte per dimension, which can reduce vector-storage memory by + about 4x vs IVF-Flat for float32 inputs (about 2x for float16 inputs), + excluding IVF structural overhead. Recall and speed trade-offs versus + IVF-PQ are dataset and tuning dependent. + + Parameters + ---------- + index_params : :py:class:`cuvs.neighbors.ivf_sq.IndexParams` + dataset : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [float32, float16] + {resources_docstring} + + Returns + ------- + index: py:class:`cuvs.neighbors.ivf_sq.Index` + + Examples + -------- + + >>> import cupy as cp + >>> from cuvs.neighbors import ivf_sq + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> k = 10 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> build_params = ivf_sq.IndexParams(metric="sqeuclidean") + >>> index = ivf_sq.build(build_params, dataset) + >>> distances, neighbors = ivf_sq.search(ivf_sq.SearchParams(), + ... index, dataset, + ... k) + >>> distances = cp.asarray(distances) + >>> neighbors = cp.asarray(neighbors) + """ + + dataset_ai = wrap_array(dataset) + _check_input_array(dataset_ai, [np.dtype('float32'), + np.dtype('float16')]) + + cdef Index idx = Index() + cdef cydlpack.DLManagedTensor* dataset_dlpack = \ + cydlpack.dlpack_c(dataset_ai) + cdef cuvsIvfSqIndexParams* params = index_params.params + + cdef cuvsResources_t res = resources.get_c_obj() + + with cuda_interruptible(): + check_cuvs(cuvsIvfSqBuild( + res, + params, + dataset_dlpack, + idx.index + )) + idx.trained = True + + return idx + + +cdef class SearchParams: + """ + Supplemental parameters to search IVF-SQ index + + Parameters + ---------- + n_probes: int + The number of clusters to search. + """ + + def __cinit__(self): + cuvsIvfSqSearchParamsCreate(&self.params) + + def __dealloc__(self): + if self.params != NULL: + check_cuvs(cuvsIvfSqSearchParamsDestroy(self.params)) + + def __init__(self, *, n_probes=20): + self.params.n_probes = n_probes + + def get_handle(self): + return self.params + + @property + def n_probes(self): + return self.params.n_probes + + +@auto_sync_resources +@auto_convert_output +def search(SearchParams search_params, + Index index, + queries, + k, + neighbors=None, + distances=None, + resources=None, + filter=None): + """ + Find the k nearest neighbors for each query. + + Parameters + ---------- + search_params : py:class:`cuvs.neighbors.ivf_sq.SearchParams` + index : py:class:`cuvs.neighbors.ivf_sq.Index` + Trained IvfSq index. + queries : CUDA array interface compliant matrix shape (n_samples, dim) + Supported dtype [float32, float16] + k : int + The number of neighbors. + neighbors : Optional CUDA array interface compliant matrix shape + (n_queries, k), dtype int64_t. If supplied, neighbor + indices will be written here in-place. (default None) + distances : Optional CUDA array interface compliant matrix shape + (n_queries, k) If supplied, the distances to the + neighbors will be written here in-place. (default None) + filter: Optional cuvs.neighbors.cuvsFilter can be used to filter + neighbors based on a given bitset. (default None) + {resources_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import ivf_sq + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build the index + >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) + >>> + >>> # Search using the built index + >>> queries = cp.random.random_sample((n_queries, n_features), + ... dtype=cp.float32) + >>> k = 10 + >>> search_params = ivf_sq.SearchParams(n_probes=20) + >>> + >>> distances, neighbors = ivf_sq.search(search_params, index, queries, + ... k) + """ + if not index.trained: + raise ValueError("Index needs to be built before calling search.") + + queries_cai = wrap_array(queries) + _check_input_array(queries_cai, [np.dtype('float32'), + np.dtype('float16')]) + + cdef uint32_t n_queries = queries_cai.shape[0] + + if neighbors is None: + neighbors = device_ndarray.empty((n_queries, k), dtype='int64') + + neighbors_cai = wrap_array(neighbors) + _check_input_array(neighbors_cai, [np.dtype('int64')], + exp_rows=n_queries, exp_cols=k) + + if distances is None: + distances = device_ndarray.empty((n_queries, k), dtype='float32') + + distances_cai = wrap_array(distances) + _check_input_array(distances_cai, [np.dtype('float32')], + exp_rows=n_queries, exp_cols=k) + + cdef cuvsIvfSqSearchParams* params = search_params.params + cdef cydlpack.DLManagedTensor* queries_dlpack = \ + cydlpack.dlpack_c(queries_cai) + cdef cydlpack.DLManagedTensor* neighbors_dlpack = \ + cydlpack.dlpack_c(neighbors_cai) + cdef cydlpack.DLManagedTensor* distances_dlpack = \ + cydlpack.dlpack_c(distances_cai) + cdef cuvsResources_t res = resources.get_c_obj() + + if filter is None: + filter = no_filter() + + with cuda_interruptible(): + check_cuvs(cuvsIvfSqSearch( + res, + params, + index.index, + queries_dlpack, + neighbors_dlpack, + distances_dlpack, + filter.prefilter + )) + + return (distances, neighbors) + + +@auto_sync_resources +def save(filename, Index index, resources=None): + """ + Saves the index to a file. + + Saving / loading the index is experimental. The serialization format is + subject to change. + + Parameters + ---------- + filename : string + Name of the file. + index : Index + Trained IVF-SQ index. + {resources_docstring} + + Examples + -------- + >>> import cupy as cp + >>> from cuvs.neighbors import ivf_sq + >>> n_samples = 50000 + >>> n_features = 50 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) + >>> # Serialize and deserialize the ivf_sq index built + >>> ivf_sq.save("my_index.bin", index) + >>> index_loaded = ivf_sq.load("my_index.bin") + """ + cdef string c_filename = filename.encode('utf-8') + cdef cuvsResources_t res = resources.get_c_obj() + check_cuvs(cuvsIvfSqSerialize(res, + c_filename.c_str(), + index.index)) + + +@auto_sync_resources +def load(filename, resources=None): + """ + Loads index from file. + + Saving / loading the index is experimental. The serialization format is + subject to change, therefore loading an index saved with a previous + version of cuvs is not guaranteed to work. + + Parameters + ---------- + filename : string + Name of the file. + {resources_docstring} + + Returns + ------- + index : Index + + """ + cdef Index idx = Index() + cdef cuvsResources_t res = resources.get_c_obj() + cdef string c_filename = filename.encode('utf-8') + + check_cuvs(cuvsIvfSqDeserialize( + res, + c_filename.c_str(), + idx.index + )) + idx.trained = True + return idx + + +@auto_sync_resources +def extend(Index index, new_vectors, new_indices, resources=None): + """ + Extend an existing index with new vectors. + + The input array can be either CUDA array interface compliant matrix or + array interface compliant matrix in host memory. + + + Parameters + ---------- + index : ivf_sq.Index + Trained ivf_sq object. + new_vectors : array interface compliant matrix shape (n_samples, dim) + Supported dtype [float32, float16] + new_indices : array interface compliant vector shape (n_samples) + Supported dtype [int64] + {resources_docstring} + + Returns + ------- + index: py:class:`cuvs.neighbors.ivf_sq.Index` + + Examples + -------- + + >>> import cupy as cp + >>> from cuvs.neighbors import ivf_sq + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> index = ivf_sq.build(ivf_sq.IndexParams(), dataset) + >>> n_rows = 100 + >>> more_data = cp.random.random_sample((n_rows, n_features), + ... dtype=cp.float32) + >>> indices = n_samples + cp.arange(n_rows, dtype=cp.int64) + >>> index = ivf_sq.extend(index, more_data, indices) + >>> # Search using the built index + >>> queries = cp.random.random_sample((n_queries, n_features), + ... dtype=cp.float32) + >>> distances, neighbors = ivf_sq.search(ivf_sq.SearchParams(), + ... index, queries, + ... k=10) + """ + + new_vectors_ai = wrap_array(new_vectors) + _check_input_array(new_vectors_ai, + [np.dtype('float32'), np.dtype('float16')]) + + new_indices_ai = wrap_array(new_indices) + _check_input_array(new_indices_ai, [np.dtype('int64')]) + cdef cuvsResources_t res = resources.get_c_obj() + + cdef cydlpack.DLManagedTensor* new_vectors_dlpack = \ + cydlpack.dlpack_c(new_vectors_ai) + + cdef cydlpack.DLManagedTensor* new_indices_dlpack = \ + cydlpack.dlpack_c(new_indices_ai) + + with cuda_interruptible(): + check_cuvs(cuvsIvfSqExtend( + res, + new_vectors_dlpack, + new_indices_dlpack, + index.index + )) + + return index diff --git a/python/cuvs/cuvs/tests/test_ivf_sq.py b/python/cuvs/cuvs/tests/test_ivf_sq.py new file mode 100644 index 0000000000..f67784d8bd --- /dev/null +++ b/python/cuvs/cuvs/tests/test_ivf_sq.py @@ -0,0 +1,230 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 +# + +import os +import tempfile + +import numpy as np +import pytest +from pylibraft.common import device_ndarray +from sklearn.neighbors import NearestNeighbors +from sklearn.preprocessing import normalize + +from cuvs.neighbors import ivf_sq +from cuvs.tests.ann_utils import ( + calc_recall, + generate_data, + run_filtered_search_test, +) + + +def run_ivf_sq_build_search_test( + n_rows=10000, + n_cols=10, + n_queries=100, + k=10, + dtype=np.float32, + add_data_on_build=True, + metric="euclidean", + compare=True, + inplace=True, + search_params={}, + serialize=False, + extend_after_build=False, + n_extend_rows=0, +): + dataset = generate_data((n_rows, n_cols), dtype) + if metric == "inner_product": + dataset = normalize(dataset, norm="l2", axis=1) + dataset_device = device_ndarray(dataset) + + build_params = ivf_sq.IndexParams( + metric=metric, + add_data_on_build=add_data_on_build, + ) + + index = ivf_sq.build(build_params, dataset_device) + + if serialize: + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: + temp_filename = f.name + ivf_sq.save(temp_filename, index) + index = ivf_sq.load(temp_filename) + + if not add_data_on_build: + dataset_1 = dataset[: n_rows // 2, :] + dataset_2 = dataset[n_rows // 2 :, :] + indices_1 = np.arange(n_rows // 2, dtype=np.int64) + indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.int64) + + dataset_1_device = device_ndarray(dataset_1) + dataset_2_device = device_ndarray(dataset_2) + indices_1_device = device_ndarray(indices_1) + indices_2_device = device_ndarray(indices_2) + index = ivf_sq.extend(index, dataset_1_device, indices_1_device) + index = ivf_sq.extend(index, dataset_2_device, indices_2_device) + elif extend_after_build: + assert n_extend_rows > 0 + extend_data = generate_data((n_extend_rows, n_cols), dtype) + if metric == "inner_product": + extend_data = normalize(extend_data, norm="l2", axis=1) + + extend_indices = np.arange( + n_rows, n_rows + n_extend_rows, dtype=np.int64 + ) + index = ivf_sq.extend( + index, + device_ndarray(extend_data), + device_ndarray(extend_indices), + ) + dataset = np.concatenate((dataset, extend_data), axis=0) + n_rows += n_extend_rows + + queries = generate_data((n_queries, n_cols), dtype) + out_idx = np.zeros((n_queries, k), dtype=np.int64) + out_dist = np.zeros((n_queries, k), dtype=np.float32) + + queries_device = device_ndarray(queries) + out_idx_device = device_ndarray(out_idx) if inplace else None + out_dist_device = device_ndarray(out_dist) if inplace else None + + search_params = ivf_sq.SearchParams(**search_params) + + ret_output = ivf_sq.search( + search_params, + index, + queries_device, + k, + neighbors=out_idx_device, + distances=out_dist_device, + ) + + if not inplace: + out_dist_device, out_idx_device = ret_output + + if not compare: + return + + out_idx = out_idx_device.copy_to_host() + out_dist = out_dist_device.copy_to_host() + + skl_metric = { + "sqeuclidean": "sqeuclidean", + "inner_product": "cosine", + "cosine": "cosine", + "euclidean": "euclidean", + }[metric] + nn_skl = NearestNeighbors( + n_neighbors=k, algorithm="brute", metric=skl_metric + ) + nn_skl.fit(dataset) + skl_idx = nn_skl.kneighbors(queries, return_distance=False) + + recall = calc_recall(out_idx, skl_idx) + assert recall > 0.7 + + assert len(index) == n_rows + assert index.dim == n_cols + assert index.n_lists == build_params.n_lists + + centers = index.centers + assert centers.shape[0] == build_params.n_lists + assert centers.shape[1] == n_cols + + +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("dtype", [np.float32, np.float16]) +@pytest.mark.parametrize( + "metric", ["sqeuclidean", "inner_product", "euclidean", "cosine"] +) +def test_ivf_sq(inplace, dtype, metric): + run_ivf_sq_build_search_test( + dtype=dtype, + inplace=inplace, + metric=metric, + ) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float16]) +@pytest.mark.parametrize("serialize", [True, False]) +def test_extend(dtype, serialize): + run_ivf_sq_build_search_test( + n_rows=10000, + n_cols=10, + n_queries=100, + k=10, + metric="sqeuclidean", + dtype=dtype, + add_data_on_build=False, + serialize=serialize, + ) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float16]) +def test_extend_after_build_with_data(dtype): + run_ivf_sq_build_search_test( + n_rows=10000, + n_cols=10, + n_queries=100, + k=10, + metric="sqeuclidean", + dtype=dtype, + add_data_on_build=True, + extend_after_build=True, + n_extend_rows=2000, + ) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float16]) +def test_serialization(dtype): + n_rows, n_cols = 5000, 16 + dataset = generate_data((n_rows, n_cols), dtype) + index = ivf_sq.build( + ivf_sq.IndexParams(metric="sqeuclidean"), device_ndarray(dataset) + ) + + expected_n_lists = index.n_lists + expected_dim = index.dim + expected_centers = index.centers.copy_to_host() + + with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f: + temp_filename = f.name + try: + ivf_sq.save(temp_filename, index) + loaded = ivf_sq.load(temp_filename) + finally: + os.unlink(temp_filename) + + assert loaded.n_lists == expected_n_lists + assert loaded.dim == expected_dim + np.testing.assert_allclose(loaded.centers.copy_to_host(), expected_centers) + + +@pytest.mark.parametrize("sparsity", [0.5, 0.7, 1.0]) +def test_filtered_ivf_sq(sparsity): + run_filtered_search_test(ivf_sq, sparsity) + + +def test_untrained_index_accessors_raise(): + index = ivf_sq.Index() + + with pytest.raises( + ValueError, match="Index needs to be built before getting n_lists" + ): + _ = index.n_lists + + with pytest.raises( + ValueError, match="Index needs to be built before getting dim" + ): + _ = index.dim + + with pytest.raises( + ValueError, match="Index needs to be built before getting len" + ): + len(index) + + with pytest.raises( + ValueError, match="Index needs to be built before getting centers" + ): + _ = index.centers