Merge pull request #1374 from happy-san/reference_sort_fix

Fix reference sort.
This commit is contained in:
Kishore Nallan 2023-11-14 11:00:02 +05:30 committed by GitHub
commit a41154c068
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 22 deletions

View File

@ -4252,6 +4252,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
}
const int64_t default_score = INT64_MIN; // to handle field that doesn't exist in document (e.g. optional)
uint32_t ref_seq_id;
// avoiding loop
if (sort_fields.size() > 0) {
@ -4267,7 +4268,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
// Joined on ref collection
if (references.count(ref_collection_name) > 0) {
if (references.at(ref_collection_name).count == 1) {
seq_id = references.at(ref_collection_name).docs[0];
ref_seq_id = references.at(ref_collection_name).docs[0];
} else {
return Option<bool>(400, references.at(ref_collection_name).count > 1 ?
multiple_references_error_message : no_references_error_message);
@ -4291,7 +4292,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
return Option<bool>(400, "Could not find a reference for doc " + std::to_string(seq_id));
}
seq_id = sort_index.at(field_name)->at(seq_id);
ref_seq_id = sort_index.at(field_name)->at(seq_id);
}
// Joined collection has a reference
else {
@ -4329,7 +4330,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
return Option<bool>(op.code(), op.error());
}
seq_id = op.get();
ref_seq_id = op.get();
} else {
return Option<bool>(400, count > 1 ? multiple_references_error_message :
no_references_error_message);
@ -4356,7 +4357,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
"` not found.");
}
scores[0] = ref_collection->reference_string_sort_score(sort_fields[0].name, seq_id);
scores[0] = ref_collection->reference_string_sort_score(sort_fields[0].name, ref_seq_id);
}
if(scores[0] == adi_tree_t::NOT_FOUND) {
@ -4412,7 +4413,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
// do nothing
}
} else {
auto it = field_values[0]->find(seq_id);
auto it = field_values[0]->find(sort_fields[0].reference_collection_name.empty() ? seq_id : ref_seq_id);
scores[0] = (it == field_values[0]->end()) ? default_score : it->second;
if(scores[0] == INT64_MIN && sort_fields[0].missing_values == sort_by::missing_values_t::first) {
@ -4442,7 +4443,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
// Joined on ref collection
if (references.count(ref_collection_name) > 0) {
if (references.at(ref_collection_name).count == 1) {
seq_id = references.at(ref_collection_name).docs[0];
ref_seq_id = references.at(ref_collection_name).docs[0];
} else {
return Option<bool>(400, references.at(ref_collection_name).count > 1 ?
multiple_references_error_message : no_references_error_message);
@ -4466,7 +4467,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
return Option<bool>(400, "Could not find a reference for doc " + std::to_string(seq_id));
}
seq_id = sort_index.at(field_name)->at(seq_id);
ref_seq_id = sort_index.at(field_name)->at(seq_id);
}
// Joined collection has a reference
else {
@ -4504,7 +4505,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
return Option<bool>(op.code(), op.error());
}
seq_id = op.get();
ref_seq_id = op.get();
} else {
return Option<bool>(400, count > 1 ? multiple_references_error_message :
no_references_error_message);
@ -4531,7 +4532,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
"` not found.");
}
scores[1] = ref_collection->reference_string_sort_score(sort_fields[1].name, seq_id);
scores[1] = ref_collection->reference_string_sort_score(sort_fields[1].name, ref_seq_id);
}
if(scores[1] == adi_tree_t::NOT_FOUND) {
@ -4588,7 +4589,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
}
} else {
auto it = field_values[1]->find(seq_id);
auto it = field_values[1]->find(sort_fields[1].reference_collection_name.empty() ? seq_id : ref_seq_id);
scores[1] = (it == field_values[1]->end()) ? default_score : it->second;
if(scores[1] == INT64_MIN && sort_fields[1].missing_values == sort_by::missing_values_t::first) {
bool is_asc = (sort_order[1] == -1);
@ -4614,7 +4615,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
// Joined on ref collection
if (references.count(ref_collection_name) > 0) {
if (references.at(ref_collection_name).count == 1) {
seq_id = references.at(ref_collection_name).docs[0];
ref_seq_id = references.at(ref_collection_name).docs[0];
} else {
return Option<bool>(400, references.at(ref_collection_name).count > 1 ?
multiple_references_error_message : no_references_error_message);
@ -4638,7 +4639,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
return Option<bool>(400, "Could not find a reference for doc " + std::to_string(seq_id));
}
seq_id = sort_index.at(field_name)->at(seq_id);
ref_seq_id = sort_index.at(field_name)->at(seq_id);
}
// Joined collection has a reference
else {
@ -4676,7 +4677,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
return Option<bool>(op.code(), op.error());
}
seq_id = op.get();
ref_seq_id = op.get();
} else {
return Option<bool>(400, count > 1 ? multiple_references_error_message :
no_references_error_message);
@ -4703,7 +4704,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
"` not found.");
}
scores[2] = ref_collection->reference_string_sort_score(sort_fields[2].name, seq_id);
scores[2] = ref_collection->reference_string_sort_score(sort_fields[2].name, ref_seq_id);
}
if(scores[2] == adi_tree_t::NOT_FOUND) {
@ -4759,7 +4760,7 @@ Option<bool> Index::compute_sort_scores(const std::vector<sort_by>& sort_fields,
// do nothing
}
} else {
auto it = field_values[2]->find(seq_id);
auto it = field_values[2]->find(sort_fields[2].reference_collection_name.empty() ? seq_id : ref_seq_id);
scores[2] = (it == field_values[2]->end()) ? default_score : it->second;
if(scores[2] == INT64_MIN && sort_fields[2].missing_values == sort_by::missing_values_t::first) {
bool is_asc = (sort_order[2] == -1);

View File

@ -3317,7 +3317,7 @@ TEST_F(CollectionJoinTest, SortByReference) {
"name": "Customers",
"fields": [
{"name": "customer_id", "type": "string"},
{"name": "customer_name", "type": "string"},
{"name": "customer_name", "type": "string", "sort": true},
{"name": "product_price", "type": "float"},
{"name": "product_available", "type": "bool"},
{"name": "product_location", "type": "geopoint"},
@ -3721,12 +3721,41 @@ TEST_F(CollectionJoinTest, SortByReference) {
ASSERT_EQ("soap", res_obj["hits"][1]["document"].at("product_name"));
ASSERT_EQ(73.5, res_obj["hits"][1]["document"].at("product_price"));
req_params = {
{"collection", "Customers"},
{"q", "*"},
{"include_fields", "$Products(product_name:merge), customer_name, id"},
{"sort_by", "$Products(product_name:asc), customer_name:desc"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_TRUE(search_op.ok());
res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(4, res_obj["found"].get<size_t>());
ASSERT_EQ(4, res_obj["hits"].size());
ASSERT_EQ(3, res_obj["hits"][0]["document"].size());
ASSERT_EQ("0", res_obj["hits"][0]["document"].at("id"));
ASSERT_EQ("Joe", res_obj["hits"][0]["document"].at("customer_name"));
ASSERT_EQ("shampoo", res_obj["hits"][0]["document"].at("product_name"));
ASSERT_EQ(3, res_obj["hits"][1]["document"].size());
ASSERT_EQ("2", res_obj["hits"][1]["document"].at("id"));
ASSERT_EQ("Dan", res_obj["hits"][1]["document"].at("customer_name"));
ASSERT_EQ("shampoo", res_obj["hits"][1]["document"].at("product_name"));
ASSERT_EQ(3, res_obj["hits"][2]["document"].size());
ASSERT_EQ("1", res_obj["hits"][2]["document"].at("id"));
ASSERT_EQ("Joe", res_obj["hits"][2]["document"].at("customer_name"));
ASSERT_EQ("soap", res_obj["hits"][2]["document"].at("product_name"));
ASSERT_EQ(3, res_obj["hits"][3]["document"].size());
ASSERT_EQ("3", res_obj["hits"][3]["document"].at("id"));
ASSERT_EQ("Dan", res_obj["hits"][3]["document"].at("customer_name"));
ASSERT_EQ("soap", res_obj["hits"][3]["document"].at("product_name"));
schema_json =
R"({
"name": "Users",
"fields": [
{"name": "user_id", "type": "string"},
{"name": "user_name", "type": "string"}
{"name": "user_name", "type": "string", "sort": true}
]
})"_json;
documents = {
@ -3900,7 +3929,7 @@ TEST_F(CollectionJoinTest, SortByReference) {
{"filter_by", "$Links(repo_id:=[repo_a, repo_d])"},
{"include_fields", "user_id, user_name, $Repos(repo_content, repo_stars:merge), "},
{"exclude_fields", "$Links(*), "},
{"sort_by", "$Repos(repo_stars: desc)"}
{"sort_by", "$Repos(repo_stars: desc), user_name:desc"}
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_TRUE(search_op.ok());
@ -3909,13 +3938,13 @@ TEST_F(CollectionJoinTest, SortByReference) {
ASSERT_EQ(3, res_obj["found"].get<size_t>());
ASSERT_EQ(3, res_obj["hits"].size());
ASSERT_EQ(4, res_obj["hits"][0]["document"].size());
ASSERT_EQ("user_c", res_obj["hits"][0]["document"].at("user_id"));
ASSERT_EQ("Joe", res_obj["hits"][0]["document"].at("user_name"));
ASSERT_EQ("user_b", res_obj["hits"][0]["document"].at("user_id"));
ASSERT_EQ("Ruby", res_obj["hits"][0]["document"].at("user_name"));
ASSERT_EQ("body1", res_obj["hits"][0]["document"].at("repo_content"));
ASSERT_EQ(431, res_obj["hits"][0]["document"].at("repo_stars"));
ASSERT_EQ("user_b", res_obj["hits"][1]["document"].at("user_id"));
ASSERT_EQ("Ruby", res_obj["hits"][1]["document"].at("user_name"));
ASSERT_EQ("user_c", res_obj["hits"][1]["document"].at("user_id"));
ASSERT_EQ("Joe", res_obj["hits"][1]["document"].at("user_name"));
ASSERT_EQ("body1", res_obj["hits"][1]["document"].at("repo_content"));
ASSERT_EQ(431, res_obj["hits"][1]["document"].at("repo_stars"));