mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 14:12:27 +08:00
Merge pull request #1145 from ozanarmagan/v0.25-join
Fix wrong hybrid search text match score
This commit is contained in:
commit
b0376e5acf
@ -31,6 +31,10 @@ struct KV {
|
||||
this->scores[0] = scores[0];
|
||||
this->scores[1] = scores[1];
|
||||
this->scores[2] = scores[2];
|
||||
|
||||
if(match_score_index >= 0) {
|
||||
this->text_match_score = scores[match_score_index];
|
||||
}
|
||||
}
|
||||
|
||||
KV() = default;
|
||||
|
@ -1950,10 +1950,10 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
if(field_order_kv->match_score_index == CURATED_RECORD_IDENTIFIER) {
|
||||
wrapper_doc["curated"] = true;
|
||||
} else if(field_order_kv->match_score_index >= 0) {
|
||||
wrapper_doc["text_match"] = field_order_kv->scores[field_order_kv->match_score_index];
|
||||
wrapper_doc["text_match"] = field_order_kv->text_match_score;
|
||||
wrapper_doc["text_match_info"] = nlohmann::json::object();
|
||||
populate_text_match_info(wrapper_doc["text_match_info"],
|
||||
field_order_kv->scores[field_order_kv->match_score_index], match_type);
|
||||
field_order_kv->text_match_score, match_type);
|
||||
if(!vector_query.field_name.empty()) {
|
||||
wrapper_doc["hybrid_search_info"] = nlohmann::json::object();
|
||||
wrapper_doc["hybrid_search_info"]["rank_fusion_score"] = Index::int64_t_to_float(field_order_kv->scores[field_order_kv->match_score_index]);
|
||||
|
@ -3213,6 +3213,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
auto result = result_it->second;
|
||||
// old_score + (1 / rank_of_document) * WEIGHT)
|
||||
result->vector_distance = vec_result.second;
|
||||
result->text_match_score = result->scores[result->match_score_index];
|
||||
int64_t match_score = float_to_int64_t(
|
||||
(int64_t_to_float(result->scores[result->match_score_index])) +
|
||||
((1.0 / (res_index + 1)) * VECTOR_SEARCH_WEIGHT));
|
||||
@ -3234,6 +3235,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
int64_t match_score_index = -1;
|
||||
compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, doc_id, 0, match_score, scores, match_score_index, vec_result.second);
|
||||
KV kv(searched_queries.size(), doc_id, doc_id, match_score_index, scores);
|
||||
kv.text_match_score = 0;
|
||||
kv.vector_distance = vec_result.second;
|
||||
topster->add(&kv);
|
||||
vec_search_ids.push_back(doc_id);
|
||||
@ -4163,6 +4165,7 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
|
||||
if(match_score_index != -1) {
|
||||
kv.scores[match_score_index] = aggregated_score;
|
||||
kv.text_match_score = aggregated_score;
|
||||
}
|
||||
|
||||
int ret = topster->add(&kv);
|
||||
|
@ -2530,4 +2530,56 @@ TEST_F(CollectionSpecificMoreTest, ApproxFilterMatchCount) {
|
||||
|
||||
delete filter_tree_root;
|
||||
collectionManager.drop_collection("Collection");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificMoreTest, HybridSearchTextMatchInfo) {
|
||||
auto schema_json =
|
||||
R"({
|
||||
"name": "Products",
|
||||
"fields": [
|
||||
{"name": "product_id", "type": "string"},
|
||||
{"name": "product_name", "type": "string", "infix": true},
|
||||
{"name": "product_description", "type": "string"},
|
||||
{"name": "embedding", "type":"float[]", "embed":{"from": ["product_description"], "model_config": {"model_name": "ts/e5-small"}}}
|
||||
]
|
||||
})"_json;
|
||||
std::vector<nlohmann::json> documents = {
|
||||
R"({
|
||||
"product_id": "product_a",
|
||||
"product_name": "shampoo",
|
||||
"product_description": "Our new moisturizing shampoo is perfect for those with dry or damaged hair."
|
||||
})"_json,
|
||||
R"({
|
||||
"product_id": "product_b",
|
||||
"product_name": "soap",
|
||||
"product_description": "Introducing our all-natural, organic soap bar made with essential oils and botanical ingredients."
|
||||
})"_json
|
||||
};
|
||||
|
||||
TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
|
||||
|
||||
auto collection_create_op = collectionManager.create_collection(schema_json);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
for (auto const &json: documents) {
|
||||
auto add_op = collection_create_op.get()->add(json.dump());
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
}
|
||||
|
||||
auto coll1 = collection_create_op.get();
|
||||
auto results = coll1->search("natural products", {"product_name", "embedding"},
|
||||
"", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
0, spp::sparse_hash_set<std::string>()).get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
// It's a hybrid search with only vector match
|
||||
ASSERT_EQ("0", results["hits"][0]["text_match_info"]["score"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["text_match_info"]["score"].get<std::string>());
|
||||
|
||||
ASSERT_EQ(0, results["hits"][0]["text_match_info"]["fields_matched"].get<size_t>());
|
||||
ASSERT_EQ(0, results["hits"][1]["text_match_info"]["fields_matched"].get<size_t>());
|
||||
|
||||
ASSERT_EQ(0, results["hits"][0]["text_match_info"]["tokens_matched"].get<size_t>());
|
||||
ASSERT_EQ(0, results["hits"][1]["text_match_info"]["tokens_matched"].get<size_t>());
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user