Handle large weights.

This commit is contained in:
Kishore Nallan 2022-01-05 16:43:05 +05:30
parent f9942e4358
commit 87e2d6914f
2 changed files with 50 additions and 1 deletions

View File

@ -2447,7 +2447,9 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens,
}
}
verbatim_match_fields = std::min<uint64_t>(255, verbatim_match_fields);
// protect most significant byte from overflow, since topster uses int64_t
verbatim_match_fields = std::min<uint64_t>(INT8_MAX, verbatim_match_fields);
exact_match_fields = std::min<uint64_t>(255, exact_match_fields);
max_weighted_tokens_match = std::min<uint64_t>(255, max_weighted_tokens_match);
total_typos = std::min<uint64_t>(255, total_typos);

View File

@ -2197,3 +2197,50 @@ TEST_F(CollectionSpecificTest, PhraseSearch) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionSpecificTest, HandleLargeWeights) {
std::vector<field> fields = {field("title", field_types::STRING, false),
field("description", field_types::STRING, false),
field("points", field_types::INT32, false),};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
nlohmann::json doc1;
doc1["id"] = "0";
doc1["title"] = "foo same";
doc1["description"] = "bar same";
doc1["points"] = 100;
nlohmann::json doc2;
doc2["id"] = "1";
doc2["title"] = "bar same";
doc2["description"] = "foo same";
doc2["points"] = 200;
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
auto results = coll1->search("foo same", {"title", "description"},
"", {}, {}, {2, 2}, 10,
1, FREQUENCY, {true, true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {1, 2}, 1000, true).get();
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
results = coll1->search("foo same", {"title", "description"},
"", {}, {}, {2, 2}, 10,
1, FREQUENCY, {true, true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {1, 128}, 1000, true).get();
ASSERT_EQ(2, results["hits"].size());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
collectionManager.drop_collection("coll1");
}