diff --git a/src/collection.cpp b/src/collection.cpp index 96ae52c9..d674783e 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -862,7 +862,7 @@ Option Collection::search(const std::string & query, const std:: std::unique_lock lock_process(m_process); cv_process.wait(lock_process, [&](){ return num_processed == num_indices; }); - // for grouping we have re-aggregate + // for grouping we have to re-aggregate const size_t topster_size = std::max((size_t)1, max_hits); Topster topster(topster_size, group_limit); @@ -1391,8 +1391,9 @@ void Collection::highlight_result(const field &search_field, Index* index = indices[field_order_kv->key % num_memory_shards]; art_leaf *actual_leaf = index->get_token_leaf(search_field.name, &token_leaf->key[0], token_leaf->key_len); + //LOG(INFO) << "field: " << search_field.name << ", key: " << token_leaf->key; + if(actual_leaf != nullptr) { - //LOG(INFO) << "field: " << search_field.name << ", key: " << actual_leaf->key; query_suggestion.push_back(actual_leaf); std::vector positions; uint32_t doc_index = actual_leaf->values->ids.indexOf(field_order_kv->key); diff --git a/src/index.cpp b/src/index.cpp index 6cf0ac48..66227591 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1700,9 +1700,11 @@ void Index::search(const std::vector& q_include_tokens, } } - for(const auto& seq_id_kvs: topster_ids) { + for(auto& seq_id_kvs: topster_ids) { const uint64_t seq_id = seq_id_kvs.first; - const auto& kvs = seq_id_kvs.second; // each `kv` can be from a different field + auto& kvs = seq_id_kvs.second; // each `kv` can be from a different field + + std::sort(kvs.begin(), kvs.end(), Topster::is_greater); // LOG(INFO) << "DOC ID: " << seq_id << ", score: " << kvs[0]->scores[kvs[0]->match_score_index]; diff --git a/test/collection_test.cpp b/test/collection_test.cpp index 12dcf7cf..1419bea3 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -3001,6 +3001,87 @@ TEST_F(CollectionTest, MultiFieldRelevance5) { ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get().c_str()); ASSERT_STREQ("1", results["hits"][2]["document"]["id"].get().c_str()); + results = coll1->search("Canada", + {"company_name","field_a","country"}, "", {}, {}, 2, 10, 1, FREQUENCY, + true, 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 40, {}, {}, {}, 0, + "", "", {1, 1, 1}).get(); + + ASSERT_EQ(3, results["found"].get()); + ASSERT_EQ(3, results["hits"].size()); + + ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("1", results["hits"][2]["document"]["id"].get().c_str()); + + ASSERT_EQ(1, results["hits"][0]["highlights"].size()); + ASSERT_EQ("country", results["hits"][0]["highlights"][0]["field"].get()); + ASSERT_EQ("Canada", results["hits"][0]["highlights"][0]["snippet"].get()); + + ASSERT_EQ(1, results["hits"][1]["highlights"].size()); + ASSERT_EQ("field_a", results["hits"][1]["highlights"][0]["field"].get()); + ASSERT_EQ("Canadoo", results["hits"][1]["highlights"][0]["snippet"].get()); + + ASSERT_EQ(1, results["hits"][2]["highlights"].size()); + ASSERT_EQ("company_name", results["hits"][2]["highlights"][0]["field"].get()); + ASSERT_EQ("Canaida Corp", results["hits"][2]["highlights"][0]["snippet"].get()); + + collectionManager.drop_collection("coll1"); +} + +TEST_F(CollectionTest, MultiFieldHighlighting) { + Collection *coll1; + + std::vector fields = {field("name", field_types::STRING, false), + field("description", field_types::STRING, false), + field("categories", field_types::STRING_ARRAY, false), + field("points", field_types::INT32, false)}; + + coll1 = collectionManager.get_collection("coll1").get(); + if(coll1 == nullptr) { + coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get(); + } + + std::vector> records = { + {"Best Wireless Vehicle Charger", + "Easily replenish your cell phone with this wireless charger.", + "Cell Phones > Cell Phone Accessories > Car Chargers"}, + }; + + for(size_t i=0; i categories; + StringUtils::split(records[i][2], categories, ">"); + + doc["id"] = std::to_string(i); + doc["name"] = records[i][0]; + doc["description"] = records[i][1]; + doc["categories"] = categories; + doc["points"] = i; + + ASSERT_TRUE(coll1->add(doc.dump()).ok()); + } + + auto results = coll1->search("charger", + {"name","description","categories"}, "", {}, {}, 2, 10, 1, FREQUENCY, + true, 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "", 40, {}, {}, {}, 0, + "", "", {1, 1, 1}).get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + + ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + + ASSERT_EQ(2, results["hits"][0]["highlights"].size()); + ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"].get()); + ASSERT_EQ("Best Wireless Vehicle Charger", + results["hits"][0]["highlights"][0]["snippet"].get()); + + ASSERT_EQ("description", results["hits"][0]["highlights"][1]["field"].get()); + ASSERT_EQ("Easily replenish your cell phone with this wireless charger.", + results["hits"][0]["highlights"][1]["snippet"].get()); + collectionManager.drop_collection("coll1"); }