mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 21:52:23 +08:00
Ensure that an import batch cannot contain duplicate doc IDs.
This commit is contained in:
parent
ba67efb7da
commit
c0fce41c3b
@ -454,6 +454,9 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_
|
||||
|
||||
size_t num_indexed = 0;
|
||||
|
||||
// ensures that document IDs are not repeated within the same batch
|
||||
std::set<std::string> batch_doc_ids;
|
||||
|
||||
for(auto & index_rec: iter_batch) {
|
||||
if(!index_rec.indexed.ok()) {
|
||||
// some records could have been invalidated upstream
|
||||
@ -461,6 +464,14 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_
|
||||
}
|
||||
|
||||
if(index_rec.operation != DELETE) {
|
||||
const std::string& doc_id = index_rec.doc["id"].get<std::string>();
|
||||
if(batch_doc_ids.find(doc_id) != batch_doc_ids.end()) {
|
||||
index_rec.index_failure(400, "Document with `id` " + doc_id + " already exists in the import batch.");
|
||||
continue;
|
||||
}
|
||||
|
||||
batch_doc_ids.emplace(doc_id);
|
||||
|
||||
Option<uint32_t> validation_op = validate_index_in_memory(index_rec.doc, index_rec.seq_id,
|
||||
default_sorting_field,
|
||||
search_schema, facet_schema,
|
||||
|
@ -1395,3 +1395,68 @@ TEST_F(CollectionSpecificTest, ZeroWeightedFieldCannotPrioritizeExactMatch) {
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, ImportDocumentWithRepeatingIDInTheSameBatch) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["name"] = "Levis";
|
||||
doc1["points"] = 3;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "0";
|
||||
doc2["name"] = "Amazing from Levis";
|
||||
doc2["points"] = 5;
|
||||
|
||||
std::vector<std::string> import_records;
|
||||
import_records.push_back(doc1.dump());
|
||||
import_records.push_back(doc2.dump());
|
||||
|
||||
nlohmann::json document;
|
||||
nlohmann::json import_response = coll1->add_many(import_records, document);
|
||||
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(1, import_response["num_imported"].get<int>());
|
||||
|
||||
ASSERT_TRUE(nlohmann::json::parse(import_records[0])["success"].get<bool>());
|
||||
ASSERT_FALSE(nlohmann::json::parse(import_records[1])["success"].get<bool>());
|
||||
ASSERT_EQ("Document with `id` 0 already exists in the import batch.",
|
||||
nlohmann::json::parse(import_records[1])["error"].get<std::string>());
|
||||
|
||||
auto results = coll1->search("levis", {"name"},
|
||||
"", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {false},
|
||||
2, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {0},
|
||||
1000, true).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("Levis", results["hits"][0]["document"]["name"].get<std::string>());
|
||||
|
||||
// repeated ID is rejected even if the first ID is not indexed due to some error
|
||||
import_records.clear();
|
||||
doc1.erase("name");
|
||||
doc1["id"] = "100";
|
||||
doc2["id"] = "100";
|
||||
|
||||
import_records.push_back(doc1.dump());
|
||||
import_records.push_back(doc2.dump());
|
||||
|
||||
import_response = coll1->add_many(import_records, document);
|
||||
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(0, import_response["num_imported"].get<int>());
|
||||
|
||||
ASSERT_FALSE(nlohmann::json::parse(import_records[0])["success"].get<bool>());
|
||||
ASSERT_FALSE(nlohmann::json::parse(import_records[1])["success"].get<bool>());
|
||||
ASSERT_EQ("Document with `id` 100 already exists in the import batch.",
|
||||
nlohmann::json::parse(import_records[1])["error"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user