mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 20:52:50 +08:00
Repeating tokens in an array: fix relevancy.
This commit is contained in:
parent
21fd7f8e33
commit
bfb122bfec
@ -1736,7 +1736,10 @@ void Index::search(const std::vector<query_tokens_t>& field_query_tokens,
|
||||
total_distances += ((100 - ((match_score >> 8) & 0xFF)) + 1) * weight;
|
||||
verbatim_match_fields += (((match_score & 0xFF)) + 1);
|
||||
|
||||
if(field_typos == 0 && tokens_found == field_query_tokens[i].q_include_tokens.size()) {
|
||||
uint64_t unique_tokens_found =
|
||||
int64_t(__builtin_popcount(existing_field_kvs[field_id]->token_bits)) - 1;
|
||||
|
||||
if(field_typos == 0 && unique_tokens_found == field_query_tokens[i].q_include_tokens.size()) {
|
||||
exact_match_fields++;
|
||||
}
|
||||
|
||||
|
@ -242,6 +242,40 @@ TEST_F(CollectionSpecificTest, FieldWeighting) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, MultiFieldArrayRepeatingTokens) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("description", field_types::STRING, false),
|
||||
field("attrs", field_types::STRING_ARRAY, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["title"] = "E182-72/4";
|
||||
doc1["description"] = "Nexsan Technologies 18 SAN Array - 18 x HDD Supported - 18 x HDD Installed";
|
||||
doc1["attrs"] = {"Hard Drives Supported > 18", "Hard Drives Installed > 18", "SSD Supported > 18"};
|
||||
doc1["points"] = 100;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["title"] = "RV345-K9-NA";
|
||||
doc2["description"] = "Cisco RV345P Router - 18 Ports";
|
||||
doc2["attrs"] = {"Number of Ports > 18", "Product Type > Router"};
|
||||
doc2["points"] = 50;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
|
||||
auto results = coll1->search("rv345 cisco 18", {"title", "description", "attrs"}, "", {}, {}, {1}, 10,
|
||||
1, FREQUENCY, {true, true, true}).get();
|
||||
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, PrefixWithTypos) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
Loading…
x
Reference in New Issue
Block a user