From 241a6e0476d38161d2fdb5b8a27795a328f5a60f Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Tue, 16 Aug 2022 15:43:40 +0530 Subject: [PATCH 1/2] Fix eval valgrind errors. # Conflicts: # src/collection.cpp --- include/field.h | 1 + include/index.h | 4 ++-- src/collection.cpp | 17 ++++++++++++++++- test/collection_sorting_test.cpp | 22 +++++++++++----------- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/include/field.h b/include/field.h index 2f558129..090e1ebc 100644 --- a/include/field.h +++ b/include/field.h @@ -500,6 +500,7 @@ struct sort_by { exclude_radius = other.exclude_radius; geo_precision = other.geo_precision; missing_values = other.missing_values; + eval = other.eval; return *this; } }; diff --git a/include/index.h b/include/index.h index e91e7efb..61aaabe7 100644 --- a/include/index.h +++ b/include/index.h @@ -93,7 +93,7 @@ struct search_args { std::vector& facets; std::vector>& included_ids; std::vector excluded_ids; - std::vector sort_fields_std; + std::vector& sort_fields_std; facet_query_t facet_query; std::vector num_typos; size_t max_facet_values; @@ -133,7 +133,7 @@ struct search_args { search_args(std::vector field_query_tokens, std::vector search_fields, std::vector filters, std::vector& facets, std::vector>& included_ids, std::vector excluded_ids, - std::vector sort_fields_std, facet_query_t facet_query, const std::vector& num_typos, + std::vector& sort_fields_std, facet_query_t facet_query, const std::vector& num_typos, size_t max_facet_values, size_t max_hits, size_t per_page, size_t page, token_ordering token_order, const std::vector& prefixes, size_t drop_tokens_threshold, size_t typo_tokens_threshold, const std::vector& group_by_fields, size_t group_limit, diff --git a/src/collection.cpp b/src/collection.cpp index b8c04310..337f47de 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -38,6 +38,20 @@ struct match_index_t { } }; +struct sort_fields_guard_t { + std::vector sort_fields_std; + + ~sort_fields_guard_t() { + for(auto& sort_by_clause: sort_fields_std) { + if(sort_by_clause.eval.ids) { + delete [] sort_by_clause.eval.ids; + sort_by_clause.eval.ids = nullptr; + sort_by_clause.eval.size = 0; + } + } + } +}; + Collection::Collection(const std::string& name, const uint32_t collection_id, const uint64_t created_at, const uint32_t next_seq_id, Store *store, const std::vector &fields, const std::string& default_sorting_field, @@ -1039,7 +1053,8 @@ Option Collection::search(const std::string & raw_query, // validate sort fields and standardize - std::vector sort_fields_std; + sort_fields_guard_t sort_fields_guard; + std::vector& sort_fields_std = sort_fields_guard.sort_fields_std; if(curated_sort_by.empty()) { auto sort_validation_op = validate_and_standardize_sort_fields(sort_fields, sort_fields_std); diff --git a/test/collection_sorting_test.cpp b/test/collection_sorting_test.cpp index 4a7202ef..80755c93 100644 --- a/test/collection_sorting_test.cpp +++ b/test/collection_sorting_test.cpp @@ -1835,7 +1835,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) { ASSERT_TRUE(coll1->add(doc.dump()).ok()); } - auto sort_fields = { + std::vector sort_fields = { sort_by("_eval(brand:nike)", "DESC"), sort_by("points", "DESC"), }; @@ -1844,7 +1844,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) { ASSERT_EQ(5, results["hits"].size()); std::vector expected_ids = {"3", "0", "4", "2", "1"}; - for(size_t i = 0; i > expected_ids.size(); i++) { + for(size_t i = 0; i < expected_ids.size(); i++) { ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get()); } @@ -1858,7 +1858,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) { ASSERT_EQ(5, results["hits"].size()); expected_ids = {"0", "4", "3", "2", "1"}; - for(size_t i = 0; i > expected_ids.size(); i++) { + for(size_t i = 0; i < expected_ids.size(); i++) { ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get()); } @@ -1872,7 +1872,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) { ASSERT_EQ(5, results["hits"].size()); expected_ids = {"4", "3", "2", "1", "0"}; - for(size_t i = 0; i > expected_ids.size(); i++) { + for(size_t i = 0; i < expected_ids.size(); i++) { ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get()); } @@ -1933,7 +1933,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingSearch) { ASSERT_TRUE(coll1->add(doc.dump()).ok()); } - auto sort_fields = { + std::vector sort_fields = { sort_by("_eval(brand:nike)", "DESC"), sort_by("points", "DESC"), }; @@ -1942,7 +1942,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingSearch) { ASSERT_EQ(5, results["hits"].size()); std::vector expected_ids = {"3", "0", "4", "2", "1"}; - for(size_t i = 0; i > expected_ids.size(); i++) { + for(size_t i = 0; i < expected_ids.size(); i++) { ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get()); } @@ -1956,7 +1956,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingSearch) { ASSERT_EQ(5, results["hits"].size()); expected_ids = {"0", "4", "3", "2", "1"}; - for(size_t i = 0; i > expected_ids.size(); i++) { + for(size_t i = 0; i < expected_ids.size(); i++) { ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get()); } @@ -1970,7 +1970,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingSearch) { ASSERT_EQ(5, results["hits"].size()); expected_ids = {"4", "3", "2", "1", "0"}; - for(size_t i = 0; i > expected_ids.size(); i++) { + for(size_t i = 0; i < expected_ids.size(); i++) { ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get()); } @@ -2022,7 +2022,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingSecondThirdParams) { ASSERT_TRUE(coll1->add(doc.dump()).ok()); } - auto sort_fields = { + std::vector sort_fields = { sort_by("val", "DESC"), sort_by("_eval(brand:nike)", "DESC"), sort_by("points", "DESC"), @@ -2032,7 +2032,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingSecondThirdParams) { ASSERT_EQ(5, results["hits"].size()); std::vector expected_ids = {"3", "0", "4", "2", "1"}; - for(size_t i = 0; i > expected_ids.size(); i++) { + for(size_t i = 0; i < expected_ids.size(); i++) { ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get()); } @@ -2045,7 +2045,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingSecondThirdParams) { results = coll1->search("title", {"title"}, "", {}, sort_fields, {2}, 10, 1, FREQUENCY, {true}, 10).get(); ASSERT_EQ(5, results["hits"].size()); - for(size_t i = 0; i > expected_ids.size(); i++) { + for(size_t i = 0; i < expected_ids.size(); i++) { ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get()); } } From c1aace925c757d5ed0523a85555cb5da74d0dd07 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Tue, 16 Aug 2022 18:29:15 +0530 Subject: [PATCH 2/2] Handle match score not being part of sort fields. In such a case, match score should not be returned in response. --- include/topster.h | 2 +- src/collection.cpp | 2 +- src/index.cpp | 6 +++--- test/collection_sorting_test.cpp | 31 +++++++++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/include/topster.h b/include/topster.h index 5d43920d..e1af2112 100644 --- a/include/topster.h +++ b/include/topster.h @@ -8,7 +8,7 @@ struct KV { uint8_t field_id{}; - uint8_t match_score_index{}; + int8_t match_score_index{}; uint16_t query_index{}; uint16_t array_index{}; uint32_t token_bits{}; diff --git a/src/collection.cpp b/src/collection.cpp index 337f47de..78832b9d 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -1382,7 +1382,7 @@ Option Collection::search(const std::string & raw_query, if(field_order_kv->match_score_index == CURATED_RECORD_IDENTIFIER) { wrapper_doc["curated"] = true; - } else { + } else if(field_order_kv->match_score_index >= 0) { wrapper_doc["text_match"] = field_order_kv->scores[field_order_kv->match_score_index]; wrapper_doc["text_match_info"] = nlohmann::json::object(); diff --git a/src/index.cpp b/src/index.cpp index 5f9d8eaf..009ce89a 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -3446,7 +3446,7 @@ void Index::compute_sort_scores(const std::vector& sort_fields, const i filter_index = found_index; } - scores[0] = int64_t(found); + scores[2] = int64_t(found); } else { auto it = field_values[2]->find(seq_id); scores[2] = (it == field_values[2]->end()) ? default_score : it->second; @@ -3664,7 +3664,7 @@ void Index::do_infix_search(const size_t num_search_fields, const std::vector& filters, match_score, seq_id, sort_order, false, false, false, 1, -1, plists); int64_t scores[3] = {0}; - int64_t match_score_index = 0; + int64_t match_score_index = -1; compute_sort_scores(sort_fields, sort_order, field_values, geopoint_indices, seq_id, filter_index, 100, scores, match_score_index); diff --git a/test/collection_sorting_test.cpp b/test/collection_sorting_test.cpp index 80755c93..43c3e393 100644 --- a/test/collection_sorting_test.cpp +++ b/test/collection_sorting_test.cpp @@ -1737,6 +1737,37 @@ TEST_F(CollectionSortingTest, RepeatingTokenRanking) { collectionManager.drop_collection("coll1"); } +TEST_F(CollectionSortingTest, SortingDoesNotHaveTextMatchComponent) { + // text_match_score field should not be present in response + std::vector fields = {field("title", field_types::STRING, false), + field("points", field_types::INT32, false),}; + + Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get(); + + nlohmann::json doc1; + doc1["id"] = "0"; + doc1["title"] = "Test Title"; + doc1["points"] = 100; + + ASSERT_TRUE(coll1->add(doc1.dump()).ok()); + + sort_fields = { + sort_by("points", "DESC"), + sort_by("points", "DESC"), + sort_by("points", "DESC"), + }; + + auto results = coll1->search("test", {"title"}, "", {}, sort_fields, {2}, 10, 1, FREQUENCY, {true}).get(); + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ(0, results["hits"][0].count("text_match")); + + results = coll1->search("*", {}, "", {}, sort_fields, {2}, 10, 1, FREQUENCY, {true}).get(); + ASSERT_EQ(1, results["hits"].size()); + ASSERT_EQ(0, results["hits"][0].count("text_match")); + + collectionManager.drop_collection("coll1"); +} + TEST_F(CollectionSortingTest, IntegerFloatAndBoolShouldDefaultSortTrue) { std::string coll_schema = R"( {