diff --git a/include/topster.h b/include/topster.h index b0b8f125..c7378f2b 100644 --- a/include/topster.h +++ b/include/topster.h @@ -31,6 +31,10 @@ struct KV { this->scores[0] = scores[0]; this->scores[1] = scores[1]; this->scores[2] = scores[2]; + + if(match_score_index >= 0) { + this->text_match_score = scores[match_score_index]; + } } KV() = default; diff --git a/src/collection.cpp b/src/collection.cpp index 1b00fb8a..82811659 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -1950,10 +1950,10 @@ Option Collection::search(std::string raw_query, if(field_order_kv->match_score_index == CURATED_RECORD_IDENTIFIER) { wrapper_doc["curated"] = true; } else if(field_order_kv->match_score_index >= 0) { - wrapper_doc["text_match"] = field_order_kv->scores[field_order_kv->match_score_index]; + wrapper_doc["text_match"] = field_order_kv->text_match_score; wrapper_doc["text_match_info"] = nlohmann::json::object(); populate_text_match_info(wrapper_doc["text_match_info"], - field_order_kv->scores[field_order_kv->match_score_index], match_type); + field_order_kv->text_match_score, match_type); if(!vector_query.field_name.empty()) { wrapper_doc["hybrid_search_info"] = nlohmann::json::object(); wrapper_doc["hybrid_search_info"]["rank_fusion_score"] = Index::int64_t_to_float(field_order_kv->scores[field_order_kv->match_score_index]); diff --git a/src/index.cpp b/src/index.cpp index a87f245c..bc1845de 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -3213,6 +3213,7 @@ Option Index::search(std::vector& field_query_tokens, cons auto result = result_it->second; // old_score + (1 / rank_of_document) * WEIGHT) result->vector_distance = vec_result.second; + result->text_match_score = result->scores[result->match_score_index]; int64_t match_score = float_to_int64_t( (int64_t_to_float(result->scores[result->match_score_index])) + ((1.0 / (res_index + 1)) * VECTOR_SEARCH_WEIGHT)); @@ -3234,6 +3235,7 @@ Option Index::search(std::vector& field_query_tokens, cons int64_t match_score_index = -1; compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, doc_id, 0, match_score, scores, match_score_index, vec_result.second); KV kv(searched_queries.size(), doc_id, doc_id, match_score_index, scores); + kv.text_match_score = 0; kv.vector_distance = vec_result.second; topster->add(&kv); vec_search_ids.push_back(doc_id); @@ -4163,6 +4165,7 @@ void Index::search_across_fields(const std::vector& query_tokens, KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores); if(match_score_index != -1) { kv.scores[match_score_index] = aggregated_score; + kv.text_match_score = aggregated_score; } int ret = topster->add(&kv); diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp index e3f82f69..3e5b43a7 100644 --- a/test/collection_specific_more_test.cpp +++ b/test/collection_specific_more_test.cpp @@ -2530,4 +2530,56 @@ TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) { delete filter_tree_root; collectionManager.drop_collection("Collection"); +} + +TEST_F(CollectionSpecificMoreTest, HybridSearchTextMatchInfo) { + auto schema_json = + R"({ + "name": "Products", + "fields": [ + {"name": "product_id", "type": "string"}, + {"name": "product_name", "type": "string", "infix": true}, + {"name": "product_description", "type": "string"}, + {"name": "embedding", "type":"float[]", "embed":{"from": ["product_description"], "model_config": {"model_name": "ts/e5-small"}}} + ] + })"_json; + std::vector documents = { + R"({ + "product_id": "product_a", + "product_name": "shampoo", + "product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair." + })"_json, + R"({ + "product_id": "product_b", + "product_name": "soap", + "product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients." + })"_json + }; + + TextEmbedderManager::set_model_dir("/tmp/typesense_test/models"); + + auto collection_create_op = collectionManager.create_collection(schema_json); + ASSERT_TRUE(collection_create_op.ok()); + for (auto const &json: documents) { + auto add_op = collection_create_op.get()->add(json.dump()); + ASSERT_TRUE(add_op.ok()); + } + + auto coll1 = collection_create_op.get(); + auto results = coll1->search("natural products", {"product_name", "embedding"}, + "", {}, {}, {2}, 10, + 1, FREQUENCY, {true}, + 0, spp::sparse_hash_set()).get(); + + ASSERT_EQ(2, results["hits"].size()); + + // It's a hybrid search with only vector match + ASSERT_EQ("0", results["hits"][0]["text_match_info"]["score"].get()); + ASSERT_EQ("0", results["hits"][1]["text_match_info"]["score"].get()); + + ASSERT_EQ(0, results["hits"][0]["text_match_info"]["fields_matched"].get()); + ASSERT_EQ(0, results["hits"][1]["text_match_info"]["fields_matched"].get()); + + ASSERT_EQ(0, results["hits"][0]["text_match_info"]["tokens_matched"].get()); + ASSERT_EQ(0, results["hits"][1]["text_match_info"]["tokens_matched"].get()); } \ No newline at end of file