Merge pull request #1448 from krunal1313/exact_match_without_closing_symbol

fix open quotes search query bug
This commit is contained in:
Kishore Nallan 2023-12-23 15:15:23 +05:30 committed by GitHub
commit 962e6b758c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 67 additions and 1 deletions

View File

@ -335,4 +335,6 @@ struct StringUtils {
static Option<bool> tokenize_filter_query(const std::string& filter_query, std::queue<std::string>& tokens);
static Option<bool> split_include_fields(const std::string& include_fields, std::vector<std::string>& tokens);
static size_t get_occurence_count(const std::string& str, char symbol);
};

View File

@ -3332,7 +3332,7 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
if(exclude_operator_prior) {
q_exclude_tokens.push_back(phrase);
} else {
q_phrases.push_back(phrase);
q_include_tokens.insert(q_include_tokens.end(), phrase.begin(), phrase.end());
}
}

View File

@ -594,3 +594,7 @@ size_t StringUtils::split_facet(const std::string &s, std::vector<std::string> &
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utf8conv;
return utf8conv.from_bytes(bytes).size();
}*/
size_t StringUtils::get_occurence_count(const std::string &str, char symbol) {
return std::count(str.begin(), str.end(), symbol);
}

View File

@ -2179,6 +2179,10 @@ TEST_F(CollectionSpecificMoreTest, PhraseMatchAcrossArrayElements) {
auto res = coll1->search(R"("state of the art)", {"texts"}, "", {}, {}, {0}, 10, 1,
FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>()).get();
ASSERT_EQ(1, res["hits"].size());
res = coll1->search(R"("state of the art")", {"texts"}, "", {}, {}, {0}, 10, 1,
FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>()).get();
ASSERT_EQ(0, res["hits"].size());
}

View File

@ -2996,3 +2996,59 @@ TEST_F(CollectionSpecificTest, DontHighlightPunctuation) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionSpecificTest, ExactMatchWithoutClosingSymbol) {
std::vector<field> fields = {field("title", field_types::STRING, false),};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
std::vector<std::vector<std::string>> records = {
{"Hampi"},
{"Mahabalipuram"},
{"Taj Mahal"},
{"Mysore Palace"}
};
for(size_t i=0; i<records.size(); i++) {
nlohmann::json doc;
doc["id"] = std::to_string(i);
doc["title"] = records[i][0];
ASSERT_TRUE(coll1->add(doc.dump()).ok());
}
std::map<std::string, std::string> req_params = {
{"collection", "coll1"},
{"q", "\"Hamp"},
{"query_by", "title"},
};
nlohmann::json embedded_params;
std::string json_res;
auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
nlohmann::json result = nlohmann::json::parse(json_res);
ASSERT_EQ(1, result["hits"].size());
ASSERT_EQ("0", result["hits"][0]["document"]["id"]);
ASSERT_EQ("Hampi", result["hits"][0]["document"]["title"]);
req_params = {
{"collection", "coll1"},
{"q", "\"Mah"},
{"query_by", "title"},
};
now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
result = nlohmann::json::parse(json_res);
ASSERT_EQ(2, result["hits"].size());
ASSERT_EQ("2", result["hits"][0]["document"]["id"]);
ASSERT_EQ("Taj Mahal", result["hits"][0]["document"]["title"]);
ASSERT_EQ("1", result["hits"][1]["document"]["id"]);
ASSERT_EQ("Mahabalipuram", result["hits"][1]["document"]["title"]);
}