diff --git a/include/sorted_array.h b/include/sorted_array.h index 4e84166c..b2fd07ab 100644 --- a/include/sorted_array.h +++ b/include/sorted_array.h @@ -52,5 +52,7 @@ public: bool insert(size_t index, uint32_t value); + void remove_value(uint32_t value); + void remove_values(uint32_t *sorted_values, uint32_t sorted_values_length); }; \ No newline at end of file diff --git a/src/index.cpp b/src/index.cpp index 29fe1a57..addd2e5b 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1911,7 +1911,6 @@ Option Index::remove(const uint32_t seq_id, const nlohmann::json & doc art_leaf* leaf = (art_leaf *) art_search(search_index.at(field_name), key, key_len); if(leaf != nullptr) { - uint32_t seq_id_values[1] = {seq_id}; uint32_t doc_index = leaf->values->ids.indexOf(seq_id); if(doc_index == leaf->values->ids.getLength()) { @@ -1928,7 +1927,7 @@ Option Index::remove(const uint32_t seq_id, const nlohmann::json & doc remove_and_shift_offset_index(leaf->values->offset_index, doc_indices, 1); leaf->values->offsets.remove_index(start_offset, end_offset); - leaf->values->ids.remove_values(seq_id_values, 1); + leaf->values->ids.remove_value(seq_id); /*len = leaf->values->offset_index.getLength(); for(auto i=0; ii; j--) { + // find the index of the element which is >= to `value` + uint32_t found_val; + uint32_t gte_index = for_lower_bound_search(in, length, value, &found_val); + + for(size_t j=length; j>gte_index; j--) { arr[j] = arr[j-1]; } - arr[i] = value; + arr[gte_index] = value; load(arr, length+1); delete [] arr; - return i; + return gte_index; } else { uint32_t size_required = sorted_append_size_required(value, length+1); size_t min_expected_size = size_required + FOR_ELE_SIZE; @@ -104,7 +101,11 @@ uint32_t sorted_array::indexOf(uint32_t value) { uint32_t actual; uint32_t index = for_lower_bound_search(in, length, value, &actual); - if(actual == value) return index; + + if(actual == value) { + return index; + } + return length; } @@ -193,6 +194,28 @@ void sorted_array::indexOf(const uint32_t *values, const size_t values_len, uint binary_search_indices(values, head, tail, low_index, high_index, base, bits, indices); } +void sorted_array::remove_value(uint32_t value) { + // A lower bound search returns the first element in the sequence that is >= `value` + // So, `found_val` will be either equal or greater than `value` + uint32_t found_val; + uint32_t found_index = for_lower_bound_search(in, length, value, &found_val); + + if(found_val != value) { + return ; + } + + uint32_t *curr_array = uncompress(); + + if(found_index + 1 < length) { + memmove(&curr_array[found_index], &curr_array[found_index+1], sizeof(uint32_t) * (length - found_index - 1)); + } + + size_t new_length = (length == 0) ? 0 : (length - 1); + load(curr_array, new_length); + + delete [] curr_array; +} + void sorted_array::remove_values(uint32_t *sorted_values, uint32_t sorted_values_length) { uint32_t *curr_array = uncompress(); diff --git a/test/collection_test.cpp b/test/collection_test.cpp index 2f652267..0ae833b6 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -14,6 +14,9 @@ protected: CollectionManager & collectionManager = CollectionManager::get_instance(); std::vector sort_fields; + // used for generating random text + std::vector words; + void setupCollection() { std::string state_dir_path = "/tmp/typesense_test/collection"; LOG(INFO) << "Truncating and creating: " << state_dir_path; @@ -48,6 +51,12 @@ protected: } infile.close(); + + std::ifstream words_file(std::string(ROOT_DIR)+"test/resources/common100_english.txt"); + std::stringstream strstream; + strstream << words_file.rdbuf(); + words_file.close(); + StringUtils::split(strstream.str(), words, "\n"); } virtual void SetUp() { @@ -59,6 +68,18 @@ protected: collectionManager.dispose(); delete store; } + + std::string get_text(size_t num_words) { + time_t t; + srand((unsigned) time(&t)); + std::vector strs; + + for(size_t i = 0 ; i < num_words ; i++ ) { + int word_index = rand() % 100; + strs.push_back(words[word_index]); + } + return StringUtils::join(strs, " "); + } }; TEST_F(CollectionTest, VerifyCountOfDocuments) { @@ -1410,6 +1431,90 @@ TEST_F(CollectionTest, ImportDocumentsUpsert) { ASSERT_EQ(70, results["hits"][0]["document"]["points"].get()); } + +TEST_F(CollectionTest, ImportDocumentsUpsertOptional) { + Collection *coll1; + std::vector fields = { + field("title", field_types::STRING_ARRAY, false, true), + field("points", field_types::INT32, false) + }; + + coll1 = collectionManager.get_collection("coll1"); + if(coll1 == nullptr) { + coll1 = collectionManager.create_collection("coll1", 4, fields, "points").get(); + } + + std::vector records; + + size_t NUM_RECORDS = 1000; + + for(size_t i=0; iadd_many(records, document, false); + ASSERT_TRUE(import_response["success"].get()); + ASSERT_EQ(1000, import_response["num_imported"].get()); + + // upsert documents with title + + records.clear(); + + for(size_t i=0; iadd_many(records, document, true); + auto time_micros = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - begin).count(); + + //LOG(INFO) << "Time taken for first upsert: " << time_micros; + + ASSERT_TRUE(import_response["success"].get()); + ASSERT_EQ(1000, import_response["num_imported"].get()); + + // run upsert again with title override + + records.clear(); + + for(size_t i=0; iadd_many(records, document, true); + time_micros = std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - begin).count(); + + //LOG(INFO) << "Time taken for second upsert: " << time_micros; + + ASSERT_TRUE(import_response["success"].get()); + ASSERT_EQ(1000, import_response["num_imported"].get()); +} + TEST_F(CollectionTest, ImportDocuments) { Collection *coll_mul_fields; diff --git a/test/resources/common100_english.txt b/test/resources/common100_english.txt new file mode 100644 index 00000000..af98316b --- /dev/null +++ b/test/resources/common100_english.txt @@ -0,0 +1,100 @@ +the +of +to +and +a +in +is +it +you +that +he +was +for +on +are +with +as +I +his +they +be +at +one +have +this +from +or +had +by +not +word +but +what +some +we +can +out +other +were +all +there +when +up +use +your +how +said +an +each +she +which +do +their +time +if +will +way +about +many +then +them +write +would +like +so +these +her +long +make +thing +see +him +two +has +look +more +day +could +go +come +did +number +sound +no +most +people +my +over +know +water +than +call +first +who +may +down +side +been +now +find \ No newline at end of file diff --git a/test/sorted_array_test.cpp b/test/sorted_array_test.cpp index 93351649..aadcd52c 100644 --- a/test/sorted_array_test.cpp +++ b/test/sorted_array_test.cpp @@ -12,7 +12,8 @@ TEST(SortedArrayTest, Append) { EXPECT_EQ(arr.indexOf(100), 0); // when not found must be equal to length (0 in this case) for(uint32_t i=0; i < SIZE; i++) { - arr.append(i); + size_t appended_index = arr.append(i); + ASSERT_EQ(i, appended_index); } EXPECT_EQ(arr.getLength(), SIZE); @@ -28,7 +29,8 @@ TEST(SortedArrayTest, Append) { EXPECT_EQ(arr.indexOf(SIZE+1), SIZE); sorted_array arr_small; - arr_small.append(100); + size_t appended_index = arr_small.append(100); + EXPECT_EQ(0, appended_index); EXPECT_EQ(arr_small.getLength(), 1); EXPECT_EQ(arr_small.at(0), 100); } @@ -36,18 +38,34 @@ TEST(SortedArrayTest, Append) { TEST(SortedArrayTest, AppendOutOfOrder) { sorted_array arr; for(size_t i=5; i<=10; i++) { - arr.append(i); + size_t appended_index = arr.append(i); + ASSERT_EQ(i-5, appended_index); } EXPECT_EQ(6, arr.getLength()); - arr.append(1); - arr.append(3); - arr.append(2); - arr.append(4); - arr.append(11); - arr.append(14); - arr.append(12); + int appended_index = -1; + + appended_index = arr.append(1); + ASSERT_EQ(0, appended_index); + + appended_index = arr.append(3); + ASSERT_EQ(1, appended_index); + + appended_index = arr.append(2); + ASSERT_EQ(1, appended_index); + + appended_index = arr.append(4); + ASSERT_EQ(3, appended_index); + + appended_index = arr.append(11); + ASSERT_EQ(10, appended_index); + + appended_index = arr.append(14); + ASSERT_EQ(11, appended_index); + + appended_index = arr.append(12); + ASSERT_EQ(11, appended_index); EXPECT_EQ(13, arr.getLength()); } @@ -136,6 +154,32 @@ TEST(SortedArrayTest, Uncompress) { delete[] raw_sorted_arr; } +TEST(SortedArrayTest, RemoveValue) { + sorted_array arr; + + const size_t SIZE = 10*1000; + for(size_t i=0; i