Fix crash in sort by eval destructor. (#1685)

* Fix crash in sort by eval destructor.

* Add test for reference sort_by.
This commit is contained in:
Harpreet Sangar 2024-04-25 14:24:05 +05:30 committed by GitHub
parent dccf6eb186
commit 397518cf37
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 75 additions and 38 deletions

View File

@ -578,7 +578,7 @@ struct sort_by {
};
struct eval_t {
filter_node_t* filter_trees = nullptr;
filter_node_t** filter_trees = nullptr; // Array of filter_node_t pointers.
std::vector<uint32_t*> eval_ids_vec;
std::vector<uint32_t> eval_ids_count_vec;
std::vector<int64_t> scores;

View File

@ -2,8 +2,8 @@
#include <string>
#include <map>
#include <tsl/htrie_map.h>
#include <json.hpp>
#include "tsl/htrie_map.h"
#include "json.hpp"
#include "store.h"
enum NUM_COMPARATOR {

View File

@ -36,6 +36,11 @@ struct sort_fields_guard_t {
for (auto& eval_ids: sort_by_clause.eval.eval_ids_vec) {
delete [] eval_ids;
}
for (uint32_t i = 0; i < sort_by_clause.eval_expressions.size(); i++) {
delete sort_by_clause.eval.filter_trees[i];
}
delete [] sort_by_clause.eval.filter_trees;
}
}
@ -1183,23 +1188,25 @@ Option<bool> Collection::validate_and_standardize_sort_fields(const std::vector<
is_group_by_query,
remote_embedding_timeout_ms,
remote_embedding_num_tries);
if (!sort_validation_op.ok()) {
return Option<bool>(sort_validation_op.code(), "Referenced collection `" + ref_collection_name + "`: " +
sort_validation_op.error());
}
for (auto& ref_sort_field_std: ref_sort_fields_std) {
ref_sort_field_std.reference_collection_name = ref_collection_name;
sort_fields_std.emplace_back(ref_sort_field_std);
}
if (!sort_validation_op.ok()) {
return Option<bool>(sort_validation_op.code(), "Referenced collection `" + ref_collection_name + "`: " +
sort_validation_op.error());
}
continue;
} else if (_sort_field.name == sort_field_const::eval) {
sort_by sort_field_std(sort_field_const::eval, _sort_field.order);
sort_fields_std.emplace_back(sort_field_const::eval, _sort_field.order);
auto& sort_field_std = sort_fields_std.back();
auto const& count = _sort_field.eval_expressions.size();
sort_field_std.eval.filter_trees = new filter_node_t[count];
std::unique_ptr<filter_node_t []> filter_trees_guard(sort_field_std.eval.filter_trees);
sort_field_std.eval.filter_trees = new filter_node_t*[count]{nullptr};
sort_field_std.eval_expressions = _sort_field.eval_expressions;
sort_field_std.eval.scores = _sort_field.eval.scores;
for (uint32_t j = 0; j < count; j++) {
auto const& filter_exp = _sort_field.eval_expressions[j];
@ -1207,23 +1214,14 @@ Option<bool> Collection::validate_and_standardize_sort_fields(const std::vector<
return Option<bool>(400, "The eval expression in sort_by is empty.");
}
filter_node_t* filter_tree_root = nullptr;
Option<bool> parse_filter_op = filter::parse_filter_query(filter_exp, search_schema,
store, "", filter_tree_root);
std::unique_ptr<filter_node_t> filter_tree_root_guard(filter_tree_root);
store, "", sort_field_std.eval.filter_trees[j]);
if (!parse_filter_op.ok()) {
return Option<bool>(parse_filter_op.code(), "Error parsing eval expression in sort_by clause.");
}
sort_field_std.eval.filter_trees[j] = std::move(*filter_tree_root);
}
eval_sort_count++;
sort_field_std.eval_expressions = _sort_field.eval_expressions;
sort_field_std.eval.scores = _sort_field.eval.scores;
sort_fields_std.emplace_back(sort_field_std);
filter_trees_guard.release();
continue;
}
@ -1254,22 +1252,6 @@ Option<bool> Collection::validate_and_standardize_sort_fields(const std::vector<
sort_field_std.name = actual_field_name;
sort_field_std.text_match_buckets = std::stoll(match_parts[1]);
} else if(actual_field_name == sort_field_const::eval) {
const std::string& filter_exp = sort_field_std.name.substr(paran_start + 1,
sort_field_std.name.size() - paran_start -
2);
if(filter_exp.empty()) {
return Option<bool>(400, "The eval expression in sort_by is empty.");
}
Option<bool> parse_filter_op = filter::parse_filter_query(filter_exp, search_schema,
store, "", sort_field_std.eval.filter_trees);
if(!parse_filter_op.ok()) {
return Option<bool>(parse_filter_op.code(), "Error parsing eval expression in sort_by clause.");
}
sort_field_std.name = actual_field_name;
num_sort_expressions++;
} else if(actual_field_name == sort_field_const::vector_query) {
const std::string& vector_query_str = sort_field_std.name.substr(paran_start + 1,
sort_field_std.name.size() - paran_start -

View File

@ -6204,7 +6204,7 @@ void Index::populate_sort_mapping(int* sort_order, std::vector<size_t>& geopoint
auto& eval_exp = sort_fields_std[i].eval;
auto count = sort_fields_std[i].eval_expressions.size();
for (uint32_t j = 0; j < count; j++) {
auto filter_result_iterator = filter_result_iterator_t("", this, &eval_exp.filter_trees[j],
auto filter_result_iterator = filter_result_iterator_t("", this, eval_exp.filter_trees[j],
search_begin_us, search_stop_us);
auto filter_init_op = filter_result_iterator.init_status();
if (!filter_init_op.ok()) {

View File

@ -4373,6 +4373,42 @@ TEST_F(CollectionJoinTest, SortByReference) {
ASSERT_EQ(73.5, res_obj["hits"][1]["document"].at("product_price"));
// Sort by reference optional filtering.
req_params = {
{"collection", "Products"},
{"q", "*"},
{"query_by", "product_name"},
{"filter_by", "$Customers(id: *)"},
{"sort_by", "$Customers(_eval(product_available):asc)"},
{"include_fields", "product_id, $Customers(product_price, strategy:merge)"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_FALSE(search_op.ok());
ASSERT_EQ("Referenced collection `Customers`: Error parsing eval expression in sort_by clause.", search_op.error());
req_params = {
{"collection", "Products"},
{"q", "*"},
{"query_by", "product_name"},
{"filter_by", "$Customers(id: *)"},
{"sort_by", "$Customers(_eval([(): 3]):asc)"},
{"include_fields", "product_id, $Customers(product_price, strategy:merge)"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_FALSE(search_op.ok());
ASSERT_EQ("Referenced collection `Customers`: The eval expression in sort_by is empty.", search_op.error());
req_params = {
{"collection", "Products"},
{"q", "*"},
{"query_by", "product_name"},
{"filter_by", "$Customers(id: *)"},
{"sort_by", "$Customers(_eval([(customer_name: Dan && product_price: > 100): 3, (customer_name): 2]):asc)"},
{"include_fields", "product_id, $Customers(product_price, strategy:merge)"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_FALSE(search_op.ok());
ASSERT_EQ("Referenced collection `Customers`: Error parsing eval expression in sort_by clause.", search_op.error());
req_params = {
{"collection", "Products"},
{"q", "*"},
@ -4411,6 +4447,25 @@ TEST_F(CollectionJoinTest, SortByReference) {
ASSERT_EQ("product_b", res_obj["hits"][1]["document"].at("product_id"));
ASSERT_EQ(73.5, res_obj["hits"][1]["document"].at("product_price"));
req_params = {
{"collection", "Products"},
{"q", "*"},
{"query_by", "product_name"},
{"filter_by", "$Customers(customer_id: customer_a)"},
{"sort_by", "$Customers(_eval(product_location:(48.87709, 2.33495, 1km)):desc)"}, // Closer to product_a
{"include_fields", "product_id, $Customers(product_price, strategy:merge)"},
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_TRUE(search_op.ok());
res_obj = nlohmann::json::parse(json_res);
ASSERT_EQ(2, res_obj["found"].get<size_t>());
ASSERT_EQ(2, res_obj["hits"].size());
ASSERT_EQ("product_a", res_obj["hits"][0]["document"].at("product_id"));
ASSERT_EQ(143, res_obj["hits"][0]["document"].at("product_price"));
ASSERT_EQ("product_b", res_obj["hits"][1]["document"].at("product_id"));
ASSERT_EQ(73.5, res_obj["hits"][1]["document"].at("product_price"));
// Text search
req_params = {
{"collection", "Products"},