Support use of strict equals + backtick on ID field.

2025-05-18 20:52:50 +08:00 · 2021-11-13 16:11:47 +05:30 · 2021-11-13 16:11:47 +05:30 · 24cce48bc2
commit 24cce48bc2
parent 483afa5b34
5 changed files with 78 additions and 11 deletions
--- a/src/field.cpp
+++ b/src/field.cpp
@ -89,9 +89,27 @@ Option<bool> filter::parse_filter_query(const string& simple_filter_query,
            StringUtils::trim(raw_value);
            id_filter = {field_name, {}, {}};

+            NUM_COMPARATOR id_comparator = EQUALS;
+            size_t filter_value_index = 0;
+
+            if(raw_value[0] == '=') {
+                id_comparator = EQUALS;
+                while(++filter_value_index < raw_value.size() && raw_value[filter_value_index] == ' ');
+            } else if(raw_value.size() >= 2 && raw_value[0] == '!' && raw_value[1] == '=') {
+                return Option<bool>(400, "Not equals filtering is not supported on the `id` field.");
+            }
+
+            if(filter_value_index != 0) {
+                raw_value = raw_value.substr(filter_value_index);
+            }
+
+            if(filter_value_index == raw_value.size()) {
+                return Option<bool>(400, "Error with filter field `id`: Filter value cannot be empty.");
+            }
+
            if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
                std::vector<std::string> doc_ids;
-                StringUtils::split(raw_value.substr(1, raw_value.size() - 2), doc_ids, ",");
+                StringUtils::split_to_values(raw_value.substr(1, raw_value.size() - 2), doc_ids);

                for(std::string& doc_id: doc_ids) {
                    // we have to convert the doc_id to seq id
@ -103,14 +121,17 @@ Option<bool> filter::parse_filter_query(const string& simple_filter_query,
                    }

                    id_filter.values.push_back(seq_id_str);
-                    id_filter.comparators.push_back(EQUALS);
+                    id_filter.comparators.push_back(id_comparator);
                }
            } else {
+                std::vector<std::string> doc_ids;
+                StringUtils::split_to_values(raw_value, doc_ids);  // to handle backticks
+
                std::string seq_id_str;
-                StoreStatus seq_id_status = store->get(doc_id_prefix + raw_value, seq_id_str);
+                StoreStatus seq_id_status = store->get(doc_id_prefix + doc_ids[0], seq_id_str);
                if(seq_id_status == StoreStatus::FOUND) {
                    id_filter.values.push_back(seq_id_str);
-                    id_filter.comparators.push_back(EQUALS);
+                    id_filter.comparators.push_back(id_comparator);
                }
            }

--- a/src/index.cpp
+++ b/src/index.cpp
@ -1222,6 +1222,8 @@ void Index::do_filtering(uint32_t*& filter_ids, uint32_t& filter_ids_length,
                result_ids.push_back(std::stoul(id_str));
            }

+            std::sort(result_ids.begin(), result_ids.end());
+
            if(i == 0) {
                filter_ids = new uint32[result_ids.size()];
                std::copy(result_ids.begin(), result_ids.end(), filter_ids);
--- a/src/string_utils.cpp
+++ b/src/string_utils.cpp
@ -164,7 +164,7 @@ void StringUtils::split_to_values(const std::string& vals_str, std::vector<std::
                break;
            case ',':
                if(!inside_tick) {
-                    filter_values.push_back(buffer);
+                    filter_values.push_back(StringUtils::trim(buffer));
                    buffer = "";
                } else {
                    buffer += c;
@ -178,7 +178,7 @@ void StringUtils::split_to_values(const std::string& vals_str, std::vector<std::
    }

    if(!buffer.empty()) {
-        filter_values.push_back(buffer);
+        filter_values.push_back(StringUtils::trim(buffer));
    }
 }

--- a/test/collection_filtering_test.cpp
+++ b/test/collection_filtering_test.cpp
@ -184,6 +184,11 @@ TEST_F(CollectionFilteringTest, FacetFieldStringFiltering) {
    ASSERT_EQ(2, results["hits"].size());
    ASSERT_EQ(2, results["found"].get<size_t>());

+    // with backticks
+    results = coll_str->search("*", query_fields, "starring:= `samuel l. Jackson`", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
+    ASSERT_EQ(2, results["hits"].size());
+    ASSERT_EQ(2, results["found"].get<size_t>());
+
    // contains filter with a single token should work as well
    results = coll_str->search("*", query_fields, "starring: jackson", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
    ASSERT_EQ(2, results["hits"].size());
@ -1517,6 +1522,16 @@ TEST_F(CollectionFilteringTest, FilteringViaDocumentIds) {
    ASSERT_EQ(1, results["hits"].size());
    ASSERT_STREQ("123", results["hits"][0]["document"]["id"].get<std::string>().c_str());

+    // single ID with backtick
+
+    results = coll1->search("*",
+                            {}, "id: `123`",
+                            {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get();
+
+    ASSERT_EQ(1, results["found"].get<size_t>());
+    ASSERT_EQ(1, results["hits"].size());
+    ASSERT_STREQ("123", results["hits"][0]["document"]["id"].get<std::string>().c_str());
+
    // single ID with condition
    results = coll1->search("*",
                            {}, "id: 125 && num_employees: 150",
@ -1537,6 +1552,35 @@ TEST_F(CollectionFilteringTest, FilteringViaDocumentIds) {
    ASSERT_STREQ("125", results["hits"][1]["document"]["id"].get<std::string>().c_str());
    ASSERT_STREQ("127", results["hits"][2]["document"]["id"].get<std::string>().c_str());

+    // multiple IDs with exact equals operator with IDs not being ordered
+    results = coll1->search("*",
+                            {}, "id:= [129, 123, 127, 125] && num_employees: <300",
+                            {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get();
+
+    ASSERT_EQ(3, results["found"].get<size_t>());
+    ASSERT_EQ(3, results["hits"].size());
+    ASSERT_STREQ("123", results["hits"][0]["document"]["id"].get<std::string>().c_str());
+    ASSERT_STREQ("125", results["hits"][1]["document"]["id"].get<std::string>().c_str());
+    ASSERT_STREQ("127", results["hits"][2]["document"]["id"].get<std::string>().c_str());
+
+    // multiple IDs with exact equals operator and backticks
+    results = coll1->search("*",
+                            {}, "id:= [`123`, `125`, `127`, `129`] && num_employees: <300",
+                            {}, sort_fields, {0}, 10, 1, FREQUENCY, {true}).get();
+
+    ASSERT_EQ(3, results["found"].get<size_t>());
+    ASSERT_EQ(3, results["hits"].size());
+    ASSERT_STREQ("123", results["hits"][0]["document"]["id"].get<std::string>().c_str());
+    ASSERT_STREQ("125", results["hits"][1]["document"]["id"].get<std::string>().c_str());
+    ASSERT_STREQ("127", results["hits"][2]["document"]["id"].get<std::string>().c_str());
+
+    // not equals is not supported yet
+    auto res_op = coll1->search("*",
+                            {}, "id:!= [123,125] && num_employees: <300",
+                            {}, sort_fields, {0}, 10, 1, FREQUENCY, {true});
+    ASSERT_FALSE(res_op.ok());
+    ASSERT_EQ("Not equals filtering is not supported on the `id` field.", res_op.error());
+
    // when no IDs exist
    results = coll1->search("*",
                            {}, "id: [1000] && num_employees: <300",
--- a/test/string_utils_test.cpp
+++ b/test/string_utils_test.cpp
@ -244,29 +244,29 @@ TEST(StringUtilsTest, ShouldParseStringifiedList) {
    StringUtils::split_to_values(str, strs);
    ASSERT_EQ(2, strs.size());
    ASSERT_EQ("John Galt", strs[0]);
-    ASSERT_EQ(" Random Jack", strs[1]);
+    ASSERT_EQ("Random Jack", strs[1]);

    strs.clear();
    str = "`John Galt`, `Random, Jack`";
    StringUtils::split_to_values(str, strs);
    ASSERT_EQ(2, strs.size());
    ASSERT_EQ("John Galt", strs[0]);
-    ASSERT_EQ(" Random, Jack", strs[1]);
+    ASSERT_EQ("Random, Jack", strs[1]);

    strs.clear();
    str = "`John Galt, `Random, Jack`";
    StringUtils::split_to_values(str, strs);
    ASSERT_EQ(2, strs.size());
    ASSERT_EQ("John Galt, Random", strs[0]);
-    ASSERT_EQ(" Jack", strs[1]);
+    ASSERT_EQ("Jack", strs[1]);

    strs.clear();
    str = "`Traveller's \\`delight\\`!`, Not wrapped, Last word";
    StringUtils::split_to_values(str, strs);
    ASSERT_EQ(3, strs.size());
    ASSERT_EQ("Traveller's \\`delight\\`!", strs[0]);
-    ASSERT_EQ(" Not wrapped", strs[1]);
-    ASSERT_EQ(" Last word", strs[2]);
+    ASSERT_EQ("Not wrapped", strs[1]);
+    ASSERT_EQ("Last word", strs[2]);

    strs.clear();
    str = "`John Galt`";