mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-05-17 03:32:08 +08:00
Summary: IndexPQ and IndexIVFPQ implementations with AVX shuffle instructions. The training and computing of the codes does not change wrt. the original PQ versions but the code layout is "packed" so that it can be used efficiently by the SIMD computation kernels. The main changes are: - new IndexPQFastScan and IndexIVFPQFastScan objects - simdib.h for an abstraction above the AVX2 intrinsics - BlockInvertedLists for invlists that are 32-byte aligned and where codes are not sequential - pq4_fast_scan.h/.cpp: for packing codes and look-up tables + optmized distance comptuation kernels - simd_result_hander.h: SIMD version of result collection in heaps / reservoirs Misc changes: - added contrib.inspect_tools to access fields in C++ objects - moved .h and .cpp code for inverted lists to an invlists/ subdirectory, and made a .h/.cpp for InvertedListsIOHook - added a new inverted lists type with 32-byte aligned codes (for consumption by SIMD) - moved Windows-specific intrinsics to platfrom_macros.h Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1542 Test Plan: ``` buck test mode/opt -j 4 //faiss/tests/:test_fast_scan_ivf //faiss/tests/:test_fast_scan buck test mode/opt //faiss/manifold/... ``` Reviewed By: wickedfoo Differential Revision: D25175439 Pulled By: mdouze fbshipit-source-id: ad1a40c0df8c10f4b364bdec7172e43d71b56c34
126 lines
3.5 KiB
Python
126 lines
3.5 KiB
Python
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
import faiss
|
|
import unittest
|
|
import numpy as np
|
|
import platform
|
|
|
|
from faiss.contrib import datasets
|
|
from faiss.contrib import inspect_tools
|
|
|
|
from common import get_dataset_2
|
|
try:
|
|
from faiss.contrib.exhaustive_search import knn_ground_truth, knn
|
|
|
|
except:
|
|
pass # Submodule import broken in python 2.
|
|
|
|
@unittest.skipIf(platform.python_version_tuple()[0] < '3', \
|
|
'Submodule import broken in python 2.')
|
|
class TestComputeGT(unittest.TestCase):
|
|
|
|
def test_compute_GT(self):
|
|
d = 64
|
|
xt, xb, xq = get_dataset_2(d, 0, 10000, 100)
|
|
|
|
index = faiss.IndexFlatL2(d)
|
|
index.add(xb)
|
|
Dref, Iref = index.search(xq, 10)
|
|
|
|
# iterator function on the matrix
|
|
|
|
def matrix_iterator(xb, bs):
|
|
for i0 in range(0, xb.shape[0], bs):
|
|
yield xb[i0:i0 + bs]
|
|
|
|
Dnew, Inew = knn_ground_truth(xq, matrix_iterator(xb, 1000), 10)
|
|
|
|
np.testing.assert_array_equal(Iref, Inew)
|
|
# decimal = 4 required when run on GPU
|
|
np.testing.assert_almost_equal(Dref, Dnew, decimal=4)
|
|
|
|
|
|
class TestDatasets(unittest.TestCase):
|
|
"""here we test only the synthetic dataset. Datasets that require
|
|
disk or manifold access are in
|
|
//deeplearning/projects/faiss-forge/test_faiss_datasets/:test_faiss_datasets
|
|
"""
|
|
|
|
def test_synthetic(self):
|
|
ds = datasets.SyntheticDataset(32, 1000, 2000, 10)
|
|
xq = ds.get_queries()
|
|
self.assertEqual(xq.shape, (10, 32))
|
|
xb = ds.get_database()
|
|
self.assertEqual(xb.shape, (2000, 32))
|
|
ds.check_sizes()
|
|
|
|
def test_synthetic_ip(self):
|
|
ds = datasets.SyntheticDataset(32, 1000, 2000, 10, "IP")
|
|
index = faiss.IndexFlatIP(32)
|
|
index.add(ds.get_database())
|
|
np.testing.assert_array_equal(
|
|
ds.get_groundtruth(100),
|
|
index.search(ds.get_queries(), 100)[1]
|
|
)
|
|
|
|
|
|
def test_synthetic_iterator(self):
|
|
ds = datasets.SyntheticDataset(32, 1000, 2000, 10)
|
|
xb = ds.get_database()
|
|
xb2 = []
|
|
for xbi in ds.database_iterator():
|
|
xb2.append(xbi)
|
|
xb2 = np.vstack(xb2)
|
|
np.testing.assert_array_equal(xb, xb2)
|
|
|
|
|
|
class TestExhaustiveSearch(unittest.TestCase):
|
|
|
|
def test_knn_cpu(self):
|
|
|
|
xb = np.random.rand(200, 32).astype('float32')
|
|
xq = np.random.rand(100, 32).astype('float32')
|
|
|
|
|
|
index = faiss.IndexFlatL2(32)
|
|
index.add(xb)
|
|
Dref, Iref = index.search(xq, 10)
|
|
|
|
Dnew, Inew = knn(xq, xb, 10)
|
|
|
|
assert np.all(Inew == Iref)
|
|
assert np.allclose(Dref, Dnew)
|
|
|
|
|
|
index = faiss.IndexFlatIP(32)
|
|
index.add(xb)
|
|
Dref, Iref = index.search(xq, 10)
|
|
|
|
Dnew, Inew = knn(xq, xb, 10, distance_type=faiss.METRIC_INNER_PRODUCT)
|
|
|
|
assert np.all(Inew == Iref)
|
|
assert np.allclose(Dref, Dnew)
|
|
|
|
|
|
class TestInspect(unittest.TestCase):
|
|
|
|
def test_LinearTransform(self):
|
|
# training data
|
|
xt = np.random.rand(1000, 20).astype('float32')
|
|
# test data
|
|
x = np.random.rand(10, 20).astype('float32')
|
|
# make the PCA matrix
|
|
pca = faiss.PCAMatrix(20, 10)
|
|
pca.train(xt)
|
|
# apply it to test data
|
|
yref = pca.apply_py(x)
|
|
|
|
A, b = inspect_tools.get_LinearTransform_matrix(pca)
|
|
|
|
# verify
|
|
ynew = x @ A.T + b
|
|
np.testing.assert_array_almost_equal(yref, ynew)
|