mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 20:52:50 +08:00
Better highlighting for query tokens match across fields.
This commit is contained in:
parent
b3b47f5651
commit
006ff75154
@ -334,6 +334,7 @@ private:
|
||||
std::string get_seq_id_key(uint32_t seq_id) const;
|
||||
|
||||
void highlight_result(const field &search_field, const std::vector<std::vector<art_leaf *>> &searched_queries,
|
||||
const std::vector<std::string>& q_tokens,
|
||||
const KV* field_order_kv, const nlohmann::json &document,
|
||||
StringUtils & string_utils,
|
||||
const size_t snippet_threshold,
|
||||
|
@ -1037,7 +1037,10 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
fields_highlighted_fully.emplace(highlight_full_field);
|
||||
}
|
||||
|
||||
for(const std::string & field_name: search_fields) {
|
||||
for(size_t i = 0; i < search_fields.size(); i++) {
|
||||
const std::string& field_name = search_fields[i];
|
||||
const std::vector<std::string>& q_tokens = field_query_tokens[i].q_include_tokens;
|
||||
|
||||
// should not pick excluded field for highlighting
|
||||
if(exclude_fields.count(field_name) > 0) {
|
||||
continue;
|
||||
@ -1049,7 +1052,7 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
|
||||
bool highlighted_fully = (fields_highlighted_fully.find(field_name) != fields_highlighted_fully.end());
|
||||
highlight_t highlight;
|
||||
highlight_result(search_field, searched_queries, field_order_kv, document,
|
||||
highlight_result(search_field, searched_queries, q_tokens, field_order_kv, document,
|
||||
string_utils, snippet_threshold, highlight_affix_num_tokens,
|
||||
highlighted_fully, highlight_start_tag, highlight_end_tag, highlight);
|
||||
|
||||
@ -1378,6 +1381,7 @@ bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t
|
||||
|
||||
void Collection::highlight_result(const field &search_field,
|
||||
const std::vector<std::vector<art_leaf *>> &searched_queries,
|
||||
const std::vector<std::string>& q_tokens,
|
||||
const KV* field_order_kv, const nlohmann::json & document,
|
||||
StringUtils & string_utils,
|
||||
const size_t snippet_threshold,
|
||||
@ -1412,6 +1416,24 @@ void Collection::highlight_result(const field &search_field,
|
||||
}
|
||||
}
|
||||
|
||||
if(query_suggestion.empty()) {
|
||||
// can happen for compound query matched across 2 fields: try to use original query tokens
|
||||
for(const std::string& q_token: q_tokens) {
|
||||
Index* index = indices[field_order_kv->key % num_memory_shards];
|
||||
art_leaf *actual_leaf = index->get_token_leaf(search_field.name,
|
||||
reinterpret_cast<const unsigned char *>(q_token.c_str()),
|
||||
q_token.size() + 1);
|
||||
if(actual_leaf != nullptr) {
|
||||
query_suggestion.push_back(actual_leaf);
|
||||
std::vector<uint16_t> positions;
|
||||
uint32_t doc_index = actual_leaf->values->ids.indexOf(field_order_kv->key);
|
||||
auto doc_indices = new uint32_t[1];
|
||||
doc_indices[0] = doc_index;
|
||||
leaf_to_indices.push_back(doc_indices);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(query_suggestion.empty()) {
|
||||
// none of the tokens from the query were found on this field
|
||||
free_leaf_indices(leaf_to_indices);
|
||||
|
@ -880,9 +880,9 @@ void Index::search_candidates(const uint8_t & field_id,
|
||||
query_suggestion, token_bits);
|
||||
|
||||
/*LOG(INFO) << "n: " << n;
|
||||
for(size_t i=0; i < query_suggestion.size(); i++) {
|
||||
LOG(INFO) << "i: " << i << " - " << query_suggestion[i]->key << ", ids: "
|
||||
<< query_suggestion[i]->values->ids.getLength() << ", total_cost: " << total_cost;
|
||||
for(size_t i=0; i < actual_query_suggestion.size(); i++) {
|
||||
LOG(INFO) << "i: " << i << " - " << actual_query_suggestion[i]->key << ", ids: "
|
||||
<< actual_query_suggestion[i]->values->ids.getLength() << ", total_cost: " << total_cost;
|
||||
}*/
|
||||
|
||||
// initialize results with the starting element (for further intersection)
|
||||
@ -1880,7 +1880,7 @@ void Index::search_field(const uint8_t & field_id,
|
||||
const std::string token_cost_hash = token + std::to_string(costs[token_index]);
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
//LOG(INFO) << "\nSearching for field: " << field << ", token:" << token << " - cost: " << costs[token_index];
|
||||
//LOG(INFO) << "Searching for field: " << field << ", token:" << token << " - cost: " << costs[token_index];
|
||||
|
||||
if(token_cost_cache.count(token_cost_hash) != 0) {
|
||||
leaves = token_cost_cache[token_cost_hash];
|
||||
|
@ -3116,6 +3116,10 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
|
||||
{"Best Wireless Vehicle Charger",
|
||||
"Easily replenish your cell phone with this wireless charger.",
|
||||
"Cell Phones > Cell Phone Accessories > Car Chargers"},
|
||||
|
||||
{"Annie's Song",
|
||||
"John Denver",
|
||||
"Album > Compilation"},
|
||||
};
|
||||
|
||||
for(size_t i=0; i<records.size(); i++) {
|
||||
@ -3152,6 +3156,26 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
|
||||
ASSERT_EQ("Easily replenish your cell phone with this wireless <mark>charger.</mark>",
|
||||
results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
results = coll1->search("Annies song John Denver",
|
||||
{"name","description"}, "", {}, {}, 0, 10, 1, FREQUENCY,
|
||||
true, 1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {1, 1}).get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Annie's</mark> <mark>Song</mark>",
|
||||
results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ("description", results["hits"][0]["highlights"][1]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>John</mark> <mark>Denver</mark>",
|
||||
results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user