Fix typo/prefix regression with weights.

2025-05-21 06:02:26 +08:00 · 2022-12-07 11:28:45 +05:30 · 2022-12-07 11:28:45 +05:30 · 5332d77710
commit 5332d77710
parent 4651b9a6fd
2 changed files with 32 additions and 2 deletions
--- a/src/index.cpp
+++ b/src/index.cpp
@ -3448,8 +3448,8 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,

        for(size_t i = 0; i < num_search_fields; i++) {
            const std::string& field_name = the_fields[i].name;
-            const uint32_t field_num_typos = (i < num_typos.size()) ? num_typos[i] : num_typos[0];
-            const bool field_prefix = (i < prefixes.size()) ? prefixes[i] : prefixes[0];
+            const uint32_t field_num_typos = (i < num_typos.size()) ? num_typos[the_fields[i].orig_index] : num_typos[0];
+            const bool field_prefix = (i < prefixes.size()) ? prefixes[the_fields[i].orig_index] : prefixes[0];

            if(token_num_typos > field_num_typos) {
                // since the token can come from any field, we still have to respect per-field num_typos
--- a/test/collection_specific_more_test.cpp
+++ b/test/collection_specific_more_test.cpp
@ -1736,3 +1736,33 @@ TEST_F(CollectionSpecificMoreTest, SearchCutoffTest) {

    ASSERT_TRUE(res["search_cutoff"].get<bool>());
 }
+
+TEST_F(CollectionSpecificMoreTest, CrossFieldTypoAndPrefixWithWeights) {
+    nlohmann::json schema = R"({
+            "name": "coll1",
+            "fields": [
+                {"name": "title", "type": "string"},
+                {"name": "color", "type": "string"}
+            ]
+        })"_json;
+
+    Collection* coll1 = collectionManager.create_collection(schema).get();
+
+    nlohmann::json doc;
+    doc["id"] = "0";
+    doc["title"] = "Cool trousers";
+    doc["color"] = "blue";
+    ASSERT_TRUE(coll1->add(doc.dump()).ok());
+
+    auto res = coll1->search("trouzers", {"title", "color"}, "", {}, {}, {2, 0}, 10, 1, FREQUENCY, {true}, 0,
+                             spp::sparse_hash_set<std::string>(),
+                             spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
+                             "<mark>", "</mark>", {2, 3}).get();
+    ASSERT_EQ(1, res["hits"].size());
+
+    res = coll1->search("trou", {"title", "color"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true, false}, 0,
+                        spp::sparse_hash_set<std::string>(),
+                        spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
+                        "<mark>", "</mark>", {2, 3}).get();
+    ASSERT_EQ(1, res["hits"].size());
+}