mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 06:02:26 +08:00
Allow text match bucket of 1.
This commit is contained in:
parent
0419a40e6f
commit
38c5c0b035
@ -1549,12 +1549,12 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
return Option<nlohmann::json>(408, "Request Timeout");
|
||||
}
|
||||
|
||||
if(match_score_index >= 0 && sort_fields_std[match_score_index].text_match_buckets > 1) {
|
||||
if(match_score_index >= 0 && sort_fields_std[match_score_index].text_match_buckets > 0) {
|
||||
size_t num_buckets = sort_fields_std[match_score_index].text_match_buckets;
|
||||
const size_t max_kvs_bucketed = std::min<size_t>(DEFAULT_TOPSTER_SIZE, raw_result_kvs.size());
|
||||
|
||||
if(max_kvs_bucketed >= num_buckets) {
|
||||
std::vector<int64_t> result_scores(max_kvs_bucketed);
|
||||
spp::sparse_hash_map<uint64_t, int64_t> result_scores;
|
||||
|
||||
// only first `max_kvs_bucketed` elements are bucketed to prevent pagination issues past 250 records
|
||||
size_t block_len = (max_kvs_bucketed / num_buckets);
|
||||
@ -1563,7 +1563,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
int64_t anchor_score = raw_result_kvs[i][0]->scores[raw_result_kvs[i][0]->match_score_index];
|
||||
size_t j = 0;
|
||||
while(j < block_len && i+j < max_kvs_bucketed) {
|
||||
result_scores[i+j] = raw_result_kvs[i+j][0]->scores[raw_result_kvs[i+j][0]->match_score_index];
|
||||
result_scores[raw_result_kvs[i+j][0]->key] = raw_result_kvs[i+j][0]->scores[raw_result_kvs[i+j][0]->match_score_index];
|
||||
raw_result_kvs[i+j][0]->scores[raw_result_kvs[i+j][0]->match_score_index] = anchor_score;
|
||||
j++;
|
||||
}
|
||||
@ -1577,7 +1577,8 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
|
||||
// restore original scores
|
||||
for(i = 0; i < max_kvs_bucketed; i++) {
|
||||
raw_result_kvs[i][0]->scores[raw_result_kvs[i][0]->match_score_index] = result_scores[i];
|
||||
raw_result_kvs[i][0]->scores[raw_result_kvs[i][0]->match_score_index] =
|
||||
result_scores[raw_result_kvs[i][0]->key];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1636,7 +1636,7 @@ TEST_F(CollectionSortingTest, TextMatchBucketRanking) {
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["title"] = "Mark Antony";
|
||||
doc1["description"] = "Marriage Counsellor";
|
||||
doc1["description"] = "Counsellor";
|
||||
doc1["points"] = 100;
|
||||
|
||||
nlohmann::json doc2;
|
||||
@ -1653,47 +1653,51 @@ TEST_F(CollectionSortingTest, TextMatchBucketRanking) {
|
||||
sort_by("points", "DESC"),
|
||||
};
|
||||
|
||||
auto results = coll1->search("mark", {"title", "description"},
|
||||
"", {}, sort_fields, {2, 2}, 10,
|
||||
1, FREQUENCY, {true, true},
|
||||
auto results = coll1->search("mark", {"title"},
|
||||
"", {}, sort_fields, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {3, 1}, 1000, true).get();
|
||||
"<mark>", "</mark>", {3}, 1000, true).get();
|
||||
|
||||
// when there are more buckets than results, no bucketing will happen
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
// bucketing by 1 produces original text match
|
||||
// bucketing by 1 makes the text match score the same
|
||||
sort_fields = {
|
||||
sort_by("_text_match(buckets: 1)", "DESC"),
|
||||
sort_by("points", "DESC"),
|
||||
};
|
||||
|
||||
results = coll1->search("mark", {"title", "description"},
|
||||
"", {}, sort_fields, {2, 2}, 10,
|
||||
1, FREQUENCY, {true, true},
|
||||
results = coll1->search("mark", {"title"},
|
||||
"", {}, sort_fields, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {3, 1}, 1000, true).get();
|
||||
"<mark>", "</mark>", {3}, 1000, true).get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
// likewise with bucket 0
|
||||
size_t score1 = std::stoul(results["hits"][0]["text_match_info"]["score"].get<std::string>());
|
||||
size_t score2 = std::stoul(results["hits"][1]["text_match_info"]["score"].get<std::string>());
|
||||
ASSERT_TRUE(score1 < score2);
|
||||
|
||||
// bucketing by 0 produces original text match
|
||||
sort_fields = {
|
||||
sort_by("_text_match(buckets: 0)", "DESC"),
|
||||
sort_by("points", "DESC"),
|
||||
};
|
||||
|
||||
results = coll1->search("mark", {"title", "description"},
|
||||
"", {}, sort_fields, {2, 2}, 10,
|
||||
1, FREQUENCY, {true, true},
|
||||
results = coll1->search("mark", {"title"},
|
||||
"", {}, sort_fields, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {3, 1}, 1000, true).get();
|
||||
"<mark>", "</mark>", {3}, 1000, true).get();
|
||||
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
@ -1702,46 +1706,46 @@ TEST_F(CollectionSortingTest, TextMatchBucketRanking) {
|
||||
// don't allow bad parameter name
|
||||
sort_fields[0] = sort_by("_text_match(foobar: 0)", "DESC");
|
||||
|
||||
auto res_op = coll1->search("mark", {"title", "description"},
|
||||
"", {}, sort_fields, {2, 2}, 10,
|
||||
1, FREQUENCY, {true, true},
|
||||
auto res_op = coll1->search("mark", {"title"},
|
||||
"", {}, sort_fields, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {3, 1}, 1000, true);
|
||||
"<mark>", "</mark>", {3}, 1000, true);
|
||||
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Invalid sorting parameter passed for _text_match.", res_op.error());
|
||||
|
||||
// handle bad syntax
|
||||
sort_fields[0] = sort_by("_text_match(foobar:", "DESC");
|
||||
res_op = coll1->search("mark", {"title", "description"},
|
||||
"", {}, sort_fields, {2, 2}, 10,
|
||||
1, FREQUENCY, {true, true},
|
||||
res_op = coll1->search("mark", {"title"},
|
||||
"", {}, sort_fields, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {3, 1}, 1000, true);
|
||||
"<mark>", "</mark>", {3}, 1000, true);
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Could not find a field named `_text_match(foobar:` in the schema for sorting.", res_op.error());
|
||||
|
||||
// handle bad value
|
||||
sort_fields[0] = sort_by("_text_match(buckets: x)", "DESC");
|
||||
res_op = coll1->search("mark", {"title", "description"},
|
||||
"", {}, sort_fields, {2, 2}, 10,
|
||||
1, FREQUENCY, {true, true},
|
||||
res_op = coll1->search("mark", {"title"},
|
||||
"", {}, sort_fields, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {3, 1}, 1000, true);
|
||||
"<mark>", "</mark>", {3}, 1000, true);
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Invalid value passed for _text_match `buckets` configuration.", res_op.error());
|
||||
|
||||
// handle negative value
|
||||
sort_fields[0] = sort_by("_text_match(buckets: -1)", "DESC");
|
||||
res_op = coll1->search("mark", {"title", "description"},
|
||||
"", {}, sort_fields, {2, 2}, 10,
|
||||
1, FREQUENCY, {true, true},
|
||||
res_op = coll1->search("mark", {"title"},
|
||||
"", {}, sort_fields, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {3, 1}, 1000, true);
|
||||
"<mark>", "</mark>", {3}, 1000, true);
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Invalid value passed for _text_match `buckets` configuration.", res_op.error());
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user