Drop tokens all the way to one token.

This commit is contained in:
Kishore Nallan 2022-01-15 14:44:43 +05:30
parent f461e04168
commit 1db5a260dd
3 changed files with 40 additions and 6 deletions

View File

@ -3162,16 +3162,15 @@ void Index::search_field(const uint8_t & field_id,
std::vector<token_t> truncated_tokens;
num_tokens_dropped++;
size_t mid_index = (query_tokens.size() / 2);
if(num_tokens_dropped <= mid_index) {
if(num_tokens_dropped < query_tokens.size()) {
// drop from right
size_t end_index = (query_tokens.size() - 1) - num_tokens_dropped;
size_t end_index = query_tokens.size() - num_tokens_dropped - 1;
for(size_t i=0; i <= end_index; i++) {
truncated_tokens.emplace_back(query_tokens[i].position, query_tokens[i].value, query_tokens[i].prefix);
}
} else {
// drop from left
size_t start_index = (num_tokens_dropped - mid_index);
size_t start_index = (num_tokens_dropped - query_tokens.size() + 1);
for(size_t i=start_index; i<query_tokens.size(); i++) {
truncated_tokens.emplace_back(query_tokens[i].position, query_tokens[i].value, query_tokens[i].prefix);
}

View File

@ -300,11 +300,12 @@ TEST_F(CollectionGroupingTest, GroupingWithMultiFieldRelevance) {
ASSERT_STREQ("pop", results["grouped_hits"][0]["group_key"][0].get<std::string>().c_str());
ASSERT_EQ(2, results["grouped_hits"][0]["hits"].size());
ASSERT_STREQ("1", results["grouped_hits"][0]["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("4", results["grouped_hits"][0]["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["grouped_hits"][0]["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("rock", results["grouped_hits"][1]["group_key"][0].get<std::string>().c_str());
ASSERT_EQ(1, results["grouped_hits"][1]["hits"].size());
ASSERT_EQ(2, results["grouped_hits"][1]["hits"].size());
ASSERT_STREQ("5", results["grouped_hits"][1]["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("6", results["grouped_hits"][1]["hits"][1]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("country", results["grouped_hits"][2]["group_key"][0].get<std::string>().c_str());
ASSERT_EQ(2, results["grouped_hits"][2]["hits"].size());

View File

@ -2332,3 +2332,37 @@ TEST_F(CollectionSpecificTest, VerbatimMatchShouldNotOverpowerHigherWeightedFiel
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionSpecificTest, DropTokensTillOneToken) {
std::vector<field> fields = {field("title", field_types::STRING, false),
field("description", field_types::STRING, false),};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
nlohmann::json doc1;
doc1["id"] = "0";
doc1["title"] = "Harry";
doc1["description"] = "Malcolm Roscow";
nlohmann::json doc2;
doc2["id"] = "1";
doc2["title"] = "Malcolm";
doc2["description"] = "Something 2";
nlohmann::json doc3;
doc3["id"] = "2";
doc3["title"] = "Roscow";
doc3["description"] = "Something 3";
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
auto results = coll1->search("harry malcolm roscow", {"title"},
"", {}, {}, {2}, 10, 1, FREQUENCY, {true}, 10).get();
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
collectionManager.drop_collection("coll1");
}