diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e01e50c..a140a6bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ add_executable(search ${SRC_FILES} src/main/main.cpp) add_executable(benchmark ${SRC_FILES} src/main/benchmark.cpp) add_executable(typesense_test ${SRC_FILES} test/array_test.cpp test/sorted_array_test.cpp test/art_test.cpp test/collection_test.cpp test/collection_manager_test.cpp - test/topster_test.cpp test/match_score_test.cpp test/store_test.cpp) + test/topster_test.cpp test/match_score_test.cpp test/store_test.cpp test/array_utils_test.cpp) target_compile_definitions(typesense-server PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/") target_compile_definitions(search PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/") diff --git a/include/array_utils.h b/include/array_utils.h index c0bd5cc1..05390238 100644 --- a/include/array_utils.h +++ b/include/array_utils.h @@ -10,7 +10,7 @@ class ArrayUtils { public: // Fast scalar scheme designed by N. Kurz. Returns the size of out (intersected set) - static size_t and_scalar(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t *out); + static size_t and_scalar(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t **out); static size_t or_scalar(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t **out); }; \ No newline at end of file diff --git a/src/array_utils.cpp b/src/array_utils.cpp index c4c4df98..ee458cf2 100644 --- a/src/array_utils.cpp +++ b/src/array_utils.cpp @@ -2,11 +2,15 @@ #include size_t ArrayUtils::and_scalar(const uint32_t *A, const size_t lenA, - const uint32_t *B, const size_t lenB, uint32_t *out) { - const uint32_t *const initout(out); - if (lenA == 0 || lenB == 0) + const uint32_t *B, const size_t lenB, uint32_t **results) { + if (lenA == 0 || lenB == 0) { return 0; + } + *results = new uint32_t[std::min(lenA, lenB)]; + uint32_t *out = *results; + + const uint32_t *const initout(out); const uint32_t *endA = A + lenA; const uint32_t *endB = B + lenB; diff --git a/src/collection.cpp b/src/collection.cpp index 0c1789b0..22d0bb2c 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -455,9 +455,9 @@ void Collection::search_candidates(uint32_t* filter_ids, size_t filter_ids_lengt if(filter_ids != nullptr) { // intersect once again with filter ids - uint32_t* filtered_result_ids = new uint32_t[std::min(filter_ids_length, result_size)]; + uint32_t* filtered_result_ids = nullptr; size_t filtered_results_size = ArrayUtils::and_scalar(filter_ids, filter_ids_length, result_ids, - result_size, filtered_result_ids); + result_size, &filtered_result_ids); uint32_t* new_all_result_ids; all_result_ids_len = ArrayUtils::or_scalar(*all_result_ids, all_result_ids_len, filtered_result_ids, @@ -649,9 +649,9 @@ Option Collection::do_filtering(uint32_t** filter_ids_out, const std:: filter_ids = result_ids; filter_ids_length = result_ids_length; } else { - uint32_t* filtered_results = new uint32_t[std::min((size_t)filter_ids_length, result_ids_length)]; + uint32_t* filtered_results = nullptr; filter_ids_length = ArrayUtils::and_scalar(filter_ids, filter_ids_length, result_ids, - result_ids_length, filtered_results); + result_ids_length, &filtered_results); delete [] filter_ids; delete [] result_ids; filter_ids = filtered_results; diff --git a/src/sorted_array.cpp b/src/sorted_array.cpp index 8c8310fa..6ac6ae32 100644 --- a/src/sorted_array.cpp +++ b/src/sorted_array.cpp @@ -175,9 +175,9 @@ void sorted_array::remove_values(uint32_t *sorted_values, uint32_t values_length size_t sorted_array::intersect(uint32_t* arr, const size_t arr_length, uint32_t** results_out) { uint32_t* curr = uncompress(); - uint32_t* results = new uint32_t[std::min(arr_length, (size_t) length)]; + uint32_t* results = nullptr; - size_t results_length = ArrayUtils::and_scalar(arr, arr_length, curr, length, results); + size_t results_length = ArrayUtils::and_scalar(arr, arr_length, curr, length, &results); delete[] curr; *results_out = results; diff --git a/test/array_utils_test.cpp b/test/array_utils_test.cpp new file mode 100644 index 00000000..5b64746f --- /dev/null +++ b/test/array_utils_test.cpp @@ -0,0 +1,95 @@ +#include +#include "array_utils.h" + +TEST(SortedArrayTest, AndScalar) { + const size_t size1 = 9; + uint32_t *arr1 = new uint32_t[size1]; + for(size_t i = 0; i < size1; i++) { + arr1[i] = i; + } + + const size_t size2 = 10; + uint32_t *arr2 = new uint32_t[size2]; + size_t arr2_len = 0; + for(size_t i = 2; i < size2; i++) { + if(i % 3 == 0) { + arr2[arr2_len++] = i; + } + } + + // arr1: [0..8] , arr2: [3, 6, 9] + uint32_t *results = new uint32_t[std::max(size1, size2)]; + uint32_t results_size = ArrayUtils::and_scalar(arr1, size1, arr2, arr2_len, &results); + ASSERT_EQ(2, results_size); + + std::vector expected = {3, 6}; + + for(size_t i = 0; i < results_size; i++) { + ASSERT_EQ(expected[i], results[i]); + } + + delete [] results; + delete [] arr1; + delete [] arr2; +} + +TEST(SortedArrayTest, OrScalarMergeShouldRemoveDuplicates) { + const size_t size1 = 9; + uint32_t *arr1 = new uint32_t[size1]; + for(size_t i = 0; i < size1; i++) { + arr1[i] = i; + } + + const size_t size2 = 10; + uint32_t *arr2 = new uint32_t[size2]; + size_t arr2_len = 0; + for(size_t i = 2; i < size2; i++) { + if(i % 3 == 0) { + arr2[arr2_len++] = i; + } + } + + // arr1: [0..8] , arr2: [3, 6, 9] + uint32_t *results = nullptr; + uint32_t results_size = ArrayUtils::or_scalar(arr1, size1, arr2, arr2_len, &results); + ASSERT_EQ(10, results_size); + + std::vector expected = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + for(size_t i = 0; i < results_size; i++) { + ASSERT_EQ(expected[i], results[i]); + } + + delete[] results; + delete[] arr1; + delete[] arr2; +} + +TEST(SortedArrayTest, OrScalarMergeShouldRemoveDuplicatesAtBoundary) { + const size_t size1 = 9; + uint32_t *arr1 = new uint32_t[size1]; + for(auto i = 0; i < 9; i++) { + arr1[i] = i; + } + + std::vector vec2 = {0, 4, 5}; + uint32_t *arr2 = new uint32_t[vec2.size()]; + auto j = 0; + for(auto i: vec2) { + arr2[j++] = i; + } + + uint32_t *results = nullptr; + uint32_t results_size = ArrayUtils::or_scalar(arr1, size1, arr2, vec2.size(), &results); + ASSERT_EQ(9, results_size); + + std::vector expected = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + + for(size_t i = 0; i < results_size; i++) { + ASSERT_EQ(expected[i], results[i]); + } + + delete[] results; + delete[] arr1; + delete[] arr2; +} \ No newline at end of file