diff --git a/include/collection.h b/include/collection.h index 211f9eab..b12e7ebc 100644 --- a/include/collection.h +++ b/include/collection.h @@ -334,6 +334,7 @@ private: std::string get_seq_id_key(uint32_t seq_id) const; void highlight_result(const field &search_field, const std::vector> &searched_queries, + const std::vector& q_tokens, const KV* field_order_kv, const nlohmann::json &document, StringUtils & string_utils, const size_t snippet_threshold, diff --git a/src/collection.cpp b/src/collection.cpp index 17b29c39..1904a880 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -1037,7 +1037,10 @@ Option Collection::search(const std::string & query, const std:: fields_highlighted_fully.emplace(highlight_full_field); } - for(const std::string & field_name: search_fields) { + for(size_t i = 0; i < search_fields.size(); i++) { + const std::string& field_name = search_fields[i]; + const std::vector& q_tokens = field_query_tokens[i].q_include_tokens; + // should not pick excluded field for highlighting if(exclude_fields.count(field_name) > 0) { continue; @@ -1049,7 +1052,7 @@ Option Collection::search(const std::string & query, const std:: bool highlighted_fully = (fields_highlighted_fully.find(field_name) != fields_highlighted_fully.end()); highlight_t highlight; - highlight_result(search_field, searched_queries, field_order_kv, document, + highlight_result(search_field, searched_queries, q_tokens, field_order_kv, document, string_utils, snippet_threshold, highlight_affix_num_tokens, highlighted_fully, highlight_start_tag, highlight_end_tag, highlight); @@ -1378,6 +1381,7 @@ bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t void Collection::highlight_result(const field &search_field, const std::vector> &searched_queries, + const std::vector& q_tokens, const KV* field_order_kv, const nlohmann::json & document, StringUtils & string_utils, const size_t snippet_threshold, @@ -1412,6 +1416,24 @@ void Collection::highlight_result(const field &search_field, } } + if(query_suggestion.empty()) { + // can happen for compound query matched across 2 fields: try to use original query tokens + for(const std::string& q_token: q_tokens) { + Index* index = indices[field_order_kv->key % num_memory_shards]; + art_leaf *actual_leaf = index->get_token_leaf(search_field.name, + reinterpret_cast(q_token.c_str()), + q_token.size() + 1); + if(actual_leaf != nullptr) { + query_suggestion.push_back(actual_leaf); + std::vector positions; + uint32_t doc_index = actual_leaf->values->ids.indexOf(field_order_kv->key); + auto doc_indices = new uint32_t[1]; + doc_indices[0] = doc_index; + leaf_to_indices.push_back(doc_indices); + } + } + } + if(query_suggestion.empty()) { // none of the tokens from the query were found on this field free_leaf_indices(leaf_to_indices); diff --git a/src/index.cpp b/src/index.cpp index bd80ee3e..821878fa 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -880,9 +880,9 @@ void Index::search_candidates(const uint8_t & field_id, query_suggestion, token_bits); /*LOG(INFO) << "n: " << n; - for(size_t i=0; i < query_suggestion.size(); i++) { - LOG(INFO) << "i: " << i << " - " << query_suggestion[i]->key << ", ids: " - << query_suggestion[i]->values->ids.getLength() << ", total_cost: " << total_cost; + for(size_t i=0; i < actual_query_suggestion.size(); i++) { + LOG(INFO) << "i: " << i << " - " << actual_query_suggestion[i]->key << ", ids: " + << actual_query_suggestion[i]->values->ids.getLength() << ", total_cost: " << total_cost; }*/ // initialize results with the starting element (for further intersection) @@ -1880,7 +1880,7 @@ void Index::search_field(const uint8_t & field_id, const std::string token_cost_hash = token + std::to_string(costs[token_index]); std::vector leaves; - //LOG(INFO) << "\nSearching for field: " << field << ", token:" << token << " - cost: " << costs[token_index]; + //LOG(INFO) << "Searching for field: " << field << ", token:" << token << " - cost: " << costs[token_index]; if(token_cost_cache.count(token_cost_hash) != 0) { leaves = token_cost_cache[token_cost_hash]; diff --git a/test/collection_test.cpp b/test/collection_test.cpp index cbe2ea2a..21f877a1 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -3116,6 +3116,10 @@ TEST_F(CollectionTest, MultiFieldHighlighting) { {"Best Wireless Vehicle Charger", "Easily replenish your cell phone with this wireless charger.", "Cell Phones > Cell Phone Accessories > Car Chargers"}, + + {"Annie's Song", + "John Denver", + "Album > Compilation"}, }; for(size_t i=0; icharger.", results["hits"][0]["highlights"][1]["snippet"].get()); + results = coll1->search("Annies song John Denver", + {"name","description"}, "", {}, {}, 0, 10, 1, FREQUENCY, + true, 1, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 40, {}, {}, {}, 0, + "", "", {1, 1}).get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + + ASSERT_STREQ("1", results["hits"][0]["document"]["id"].get().c_str()); + + ASSERT_EQ(2, results["hits"][0]["highlights"].size()); + ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"].get()); + ASSERT_EQ("Annie's Song", + results["hits"][0]["highlights"][0]["snippet"].get()); + + ASSERT_EQ("description", results["hits"][0]["highlights"][1]["field"].get()); + ASSERT_EQ("John Denver", + results["hits"][0]["highlights"][1]["snippet"].get()); + collectionManager.drop_collection("coll1"); }