From 24cce48bc22d68bafa9d306480187069b97dd196 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Sat, 13 Nov 2021 16:11:47 +0530 Subject: [PATCH] Support use of strict equals + backtick on ID field. --- src/field.cpp | 29 +++++++++++++++++--- src/index.cpp | 2 ++ src/string_utils.cpp | 4 +-- test/collection_filtering_test.cpp | 44 ++++++++++++++++++++++++++++++ test/string_utils_test.cpp | 10 +++---- 5 files changed, 78 insertions(+), 11 deletions(-) diff --git a/src/field.cpp b/src/field.cpp index 4d4e0930..de599949 100644 --- a/src/field.cpp +++ b/src/field.cpp @@ -89,9 +89,27 @@ Option filter::parse_filter_query(const string& simple_filter_query, StringUtils::trim(raw_value); id_filter = {field_name, {}, {}}; + NUM_COMPARATOR id_comparator = EQUALS; + size_t filter_value_index = 0; + + if(raw_value[0] == '=') { + id_comparator = EQUALS; + while(++filter_value_index < raw_value.size() && raw_value[filter_value_index] == ' '); + } else if(raw_value.size() >= 2 && raw_value[0] == '!' && raw_value[1] == '=') { + return Option(400, "Not equals filtering is not supported on the `id` field."); + } + + if(filter_value_index != 0) { + raw_value = raw_value.substr(filter_value_index); + } + + if(filter_value_index == raw_value.size()) { + return Option(400, "Error with filter field `id`: Filter value cannot be empty."); + } + if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') { std::vector doc_ids; - StringUtils::split(raw_value.substr(1, raw_value.size() - 2), doc_ids, ","); + StringUtils::split_to_values(raw_value.substr(1, raw_value.size() - 2), doc_ids); for(std::string& doc_id: doc_ids) { // we have to convert the doc_id to seq id @@ -103,14 +121,17 @@ Option filter::parse_filter_query(const string& simple_filter_query, } id_filter.values.push_back(seq_id_str); - id_filter.comparators.push_back(EQUALS); + id_filter.comparators.push_back(id_comparator); } } else { + std::vector doc_ids; + StringUtils::split_to_values(raw_value, doc_ids); // to handle backticks + std::string seq_id_str; - StoreStatus seq_id_status = store->get(doc_id_prefix + raw_value, seq_id_str); + StoreStatus seq_id_status = store->get(doc_id_prefix + doc_ids[0], seq_id_str); if(seq_id_status == StoreStatus::FOUND) { id_filter.values.push_back(seq_id_str); - id_filter.comparators.push_back(EQUALS); + id_filter.comparators.push_back(id_comparator); } } diff --git a/src/index.cpp b/src/index.cpp index e3c0130d..2599e713 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1222,6 +1222,8 @@ void Index::do_filtering(uint32_t*& filter_ids, uint32_t& filter_ids_length, result_ids.push_back(std::stoul(id_str)); } + std::sort(result_ids.begin(), result_ids.end()); + if(i == 0) { filter_ids = new uint32[result_ids.size()]; std::copy(result_ids.begin(), result_ids.end(), filter_ids); diff --git a/src/string_utils.cpp b/src/string_utils.cpp index 0e7cad64..f542273c 100644 --- a/src/string_utils.cpp +++ b/src/string_utils.cpp @@ -164,7 +164,7 @@ void StringUtils::split_to_values(const std::string& vals_str, std::vector()); + // with backticks + results = coll_str->search("*", query_fields, "starring:= `samuel l. Jackson`", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); + ASSERT_EQ(2, results["hits"].size()); + ASSERT_EQ(2, results["found"].get()); + // contains filter with a single token should work as well results = coll_str->search("*", query_fields, "starring: jackson", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get(); ASSERT_EQ(2, results["hits"].size()); @@ -1517,6 +1522,16 @@ TEST_F(CollectionFilteringTest, FilteringViaDocumentIds) { ASSERT_EQ(1, results["hits"].size()); ASSERT_STREQ("123", results["hits"][0]["document"]["id"].get().c_str()); + // single ID with backtick + + results = coll1->search("*", + {}, "id: `123`", + {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get(); + + ASSERT_EQ(1, results["found"].get()); + ASSERT_EQ(1, results["hits"].size()); + ASSERT_STREQ("123", results["hits"][0]["document"]["id"].get().c_str()); + // single ID with condition results = coll1->search("*", {}, "id: 125 && num_employees: 150", @@ -1537,6 +1552,35 @@ TEST_F(CollectionFilteringTest, FilteringViaDocumentIds) { ASSERT_STREQ("125", results["hits"][1]["document"]["id"].get().c_str()); ASSERT_STREQ("127", results["hits"][2]["document"]["id"].get().c_str()); + // multiple IDs with exact equals operator with IDs not being ordered + results = coll1->search("*", + {}, "id:= [129, 123, 127, 125] && num_employees: <300", + {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get(); + + ASSERT_EQ(3, results["found"].get()); + ASSERT_EQ(3, results["hits"].size()); + ASSERT_STREQ("123", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("125", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("127", results["hits"][2]["document"]["id"].get().c_str()); + + // multiple IDs with exact equals operator and backticks + results = coll1->search("*", + {}, "id:= [`123`, `125`, `127`, `129`] && num_employees: <300", + {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get(); + + ASSERT_EQ(3, results["found"].get()); + ASSERT_EQ(3, results["hits"].size()); + ASSERT_STREQ("123", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("125", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("127", results["hits"][2]["document"]["id"].get().c_str()); + + // not equals is not supported yet + auto res_op = coll1->search("*", + {}, "id:!= [123,125] && num_employees: <300", + {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}); + ASSERT_FALSE(res_op.ok()); + ASSERT_EQ("Not equals filtering is not supported on the `id` field.", res_op.error()); + // when no IDs exist results = coll1->search("*", {}, "id: [1000] && num_employees: <300", diff --git a/test/string_utils_test.cpp b/test/string_utils_test.cpp index f6869df0..a6c33239 100644 --- a/test/string_utils_test.cpp +++ b/test/string_utils_test.cpp @@ -244,29 +244,29 @@ TEST(StringUtilsTest, ShouldParseStringifiedList) { StringUtils::split_to_values(str, strs); ASSERT_EQ(2, strs.size()); ASSERT_EQ("John Galt", strs[0]); - ASSERT_EQ(" Random Jack", strs[1]); + ASSERT_EQ("Random Jack", strs[1]); strs.clear(); str = "`John Galt`, `Random, Jack`"; StringUtils::split_to_values(str, strs); ASSERT_EQ(2, strs.size()); ASSERT_EQ("John Galt", strs[0]); - ASSERT_EQ(" Random, Jack", strs[1]); + ASSERT_EQ("Random, Jack", strs[1]); strs.clear(); str = "`John Galt, `Random, Jack`"; StringUtils::split_to_values(str, strs); ASSERT_EQ(2, strs.size()); ASSERT_EQ("John Galt, Random", strs[0]); - ASSERT_EQ(" Jack", strs[1]); + ASSERT_EQ("Jack", strs[1]); strs.clear(); str = "`Traveller's \\`delight\\`!`, Not wrapped, Last word"; StringUtils::split_to_values(str, strs); ASSERT_EQ(3, strs.size()); ASSERT_EQ("Traveller's \\`delight\\`!", strs[0]); - ASSERT_EQ(" Not wrapped", strs[1]); - ASSERT_EQ(" Last word", strs[2]); + ASSERT_EQ("Not wrapped", strs[1]); + ASSERT_EQ("Last word", strs[2]); strs.clear(); str = "`John Galt`";