From 309e973de593ae18b6f23a350256bf7a278edb5e Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Sat, 4 Feb 2023 21:05:14 +0530 Subject: [PATCH 1/5] Properly release buffered export body. --- src/core_api.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core_api.cpp b/src/core_api.cpp index 389d3904..43ce94df 100644 --- a/src/core_api.cpp +++ b/src/core_api.cpp @@ -697,7 +697,7 @@ bool get_export_documents(const std::shared_ptr& req, const std::share if(export_state->it != nullptr) { rocksdb::Iterator* it = export_state->it; size_t batch_counter = 0; - res->body.clear(); + std::string().swap(res->body); while(it->Valid() && it->key().ToString().compare(0, seq_id_prefix.size(), seq_id_prefix) == 0) { if(export_state->include_fields.empty() && export_state->exclude_fields.empty()) { From f40637fe643d680792d7953a7d786f352ff03efb Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Thu, 2 Feb 2023 16:48:18 +0530 Subject: [PATCH 2/5] Enabling exhaustive search should automatically drop tokens. --- src/index.cpp | 2 +- test/collection_specific_more_test.cpp | 28 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/index.cpp b/src/index.cpp index 94f084cd..f599eab6 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -2708,7 +2708,7 @@ void Index::search(std::vector& field_query_tokens, const std::v // gather up both original query and synonym queries and do drop tokens - if (all_result_ids_len < drop_tokens_threshold) { + if (exhaustive_search || all_result_ids_len < drop_tokens_threshold) { for (size_t qi = 0; qi < all_queries.size(); qi++) { auto& orig_tokens = all_queries[qi]; size_t num_tokens_dropped = 0; diff --git a/test/collection_specific_more_test.cpp b/test/collection_specific_more_test.cpp index 608c5fea..3527bbec 100644 --- a/test/collection_specific_more_test.cpp +++ b/test/collection_specific_more_test.cpp @@ -1782,6 +1782,34 @@ TEST_F(CollectionSpecificMoreTest, SearchCutoffTest) { ASSERT_EQ(408, coll_op.code()); } +TEST_F(CollectionSpecificMoreTest, ExhaustiveSearchWithoutExplicitDropTokens) { + nlohmann::json schema = R"({ + "name": "coll1", + "fields": [ + {"name": "title", "type": "string"} + ] + })"_json; + + Collection* coll1 = collectionManager.create_collection(schema).get(); + + nlohmann::json doc; + doc["title"] = "alpha beta gamma"; + ASSERT_TRUE(coll1->add(doc.dump()).ok()); + + doc["title"] = "alpha"; + ASSERT_TRUE(coll1->add(doc.dump()).ok()); + + bool exhaustive_search = true; + size_t drop_tokens_threshold = 1; + + auto res = coll1->search("alpha beta", {"title"}, "", {}, {}, {0}, 3, 1, FREQUENCY, {false}, drop_tokens_threshold, + spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0, + "", "", {}, 1000, true, false, true, "", exhaustive_search).get(); + + ASSERT_EQ(2, res["hits"].size()); +} + TEST_F(CollectionSpecificMoreTest, CrossFieldTypoAndPrefixWithWeights) { nlohmann::json schema = R"({ "name": "coll1", From 2c8b8d6af143fc382cca3d911efb4b317761a036 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Fri, 3 Feb 2023 18:34:51 +0530 Subject: [PATCH 3/5] Add test for eval expression validation. --- src/collection.cpp | 4 ++++ test/collection_sorting_test.cpp | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/src/collection.cpp b/src/collection.cpp index 09e539a9..5a1d8a54 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -586,6 +586,10 @@ Option Collection::validate_and_standardize_sort_fields(const std::vector< const std::string& filter_exp = sort_field_std.name.substr(paran_start + 1, sort_field_std.name.size() - paran_start - 2); + if(filter_exp.empty()) { + return Option(400, "The eval expression in sort_by is empty."); + } + Option parse_filter_op = filter::parse_filter_query(filter_exp, search_schema, store, "", sort_field_std.eval.filter_tree_root); if(!parse_filter_op.ok()) { diff --git a/test/collection_sorting_test.cpp b/test/collection_sorting_test.cpp index 80ffbaf5..afd678c1 100644 --- a/test/collection_sorting_test.cpp +++ b/test/collection_sorting_test.cpp @@ -2013,6 +2013,15 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) { ASSERT_FALSE(res_op.ok()); ASSERT_EQ("Error parsing eval expression in sort_by clause.", res_op.error()); + // when eval condition is empty + sort_fields = { + sort_by("_eval()", "DESC"), + sort_by("points", "DESC"), + }; + res_op = coll1->search("*", {"title"}, "", {}, sort_fields, {2}, 10, 1, FREQUENCY, {true}, 10); + ASSERT_FALSE(res_op.ok()); + ASSERT_EQ("The eval expression in sort_by is empty.", res_op.error()); + // more bad syntax! sort_fields = { sort_by(")", "DESC"), From 0579398993311a9278f08ed0d0f03b96c32fbc0c Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Sun, 5 Feb 2023 10:58:40 +0530 Subject: [PATCH 4/5] Allow null values for optional nested fields. --- src/field.cpp | 5 +++++ test/collection_nested_fields_test.cpp | 26 ++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/field.cpp b/src/field.cpp index fb334dc4..97e385af 100644 --- a/src/field.cpp +++ b/src/field.cpp @@ -721,6 +721,11 @@ Option field::flatten_field(nlohmann::json& doc, nlohmann::json& obj, cons // end of path: check if obj matches expected type std::string detected_type; if(!field::get_type(obj, detected_type)) { + if(obj.is_null() && the_field.optional) { + // null values are allowed only if field is optional + return Option(false); + } + return Option(400, "Field `" + the_field.name + "` has an incorrect type."); } diff --git a/test/collection_nested_fields_test.cpp b/test/collection_nested_fields_test.cpp index 2fa978e5..997026a7 100644 --- a/test/collection_nested_fields_test.cpp +++ b/test/collection_nested_fields_test.cpp @@ -1390,7 +1390,8 @@ TEST_F(CollectionNestedFieldsTest, ExplicitSchemaOptionalFieldValidation) { "fields": [ {"name": "details", "type": "object", "optional": true }, {"name": "company.name", "type": "string", "optional": true }, - {"name": "locations", "type": "object[]", "optional": true } + {"name": "locations", "type": "object[]", "optional": true }, + {"name": "blocks.text.description", "type": "string[]", "optional": true } ] })"_json; @@ -1398,14 +1399,31 @@ TEST_F(CollectionNestedFieldsTest, ExplicitSchemaOptionalFieldValidation) { ASSERT_TRUE(op.ok()); Collection* coll1 = op.get(); - // no optional field is present and that should be allowed + // when a nested field is null it should be allowed auto doc1 = R"({ - "foo": "bar" + "company": {"name": null} })"_json; auto add_op = coll1->add(doc1.dump(), CREATE); ASSERT_TRUE(add_op.ok()); + // check the same with nested array type + + doc1 = R"({ + "blocks": {"text": [{"description": null}]} + })"_json; + + add_op = coll1->add(doc1.dump(), CREATE); + ASSERT_TRUE(add_op.ok()); + + // no optional field is present and that should be allowed + doc1 = R"({ + "foo": "bar" + })"_json; + + add_op = coll1->add(doc1.dump(), CREATE); + ASSERT_TRUE(add_op.ok()); + // some parts of an optional field is present in a subsequent doc indexed auto doc2 = R"({ "details": {"name": "foo"} @@ -1421,7 +1439,7 @@ TEST_F(CollectionNestedFieldsTest, ExplicitSchemaOptionalFieldValidation) { // check fields and their properties auto coll_fields = coll1->get_fields(); - ASSERT_EQ(5, coll_fields.size()); + ASSERT_EQ(6, coll_fields.size()); for(auto& coll_field : coll_fields) { ASSERT_TRUE(coll_field.optional); } From 86535e24aa8aa1523480336ae57e7fdf885f0bb2 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Sun, 5 Feb 2023 13:20:02 +0530 Subject: [PATCH 5/5] Improve error message for nested array object string field. --- src/index.cpp | 8 ++++++++ test/collection_nested_fields_test.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/index.cpp b/src/index.cpp index f599eab6..c3db97b7 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -5557,6 +5557,10 @@ Option Index::coerce_string(const DIRTY_VALUES& dirty_values, const st else { if(dirty_values == DIRTY_VALUES::COERCE_OR_DROP) { if(!a_field.optional) { + if(a_field.nested && item.is_array()) { + return Option<>(400, "Field `" + field_name + "` has an incorrect type. " + "Hint: field inside an array of objects must be an array type as well."); + } return Option<>(400, "Field `" + field_name + "` must be " + suffix + " string."); } @@ -5568,6 +5572,10 @@ Option Index::coerce_string(const DIRTY_VALUES& dirty_values, const st } } else { // COERCE_OR_REJECT / non-optional + DROP + if(a_field.nested && item.is_array()) { + return Option<>(400, "Field `" + field_name + "` has an incorrect type. " + "Hint: field inside an array of objects must be an array type as well."); + } return Option<>(400, "Field `" + field_name + "` must be " + suffix + " string."); } } diff --git a/test/collection_nested_fields_test.cpp b/test/collection_nested_fields_test.cpp index 997026a7..87a58145 100644 --- a/test/collection_nested_fields_test.cpp +++ b/test/collection_nested_fields_test.cpp @@ -1445,6 +1445,31 @@ TEST_F(CollectionNestedFieldsTest, ExplicitSchemaOptionalFieldValidation) { } } +TEST_F(CollectionNestedFieldsTest, ExplicitSchemaForNestedArrayTypeValidation) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": "blocks.text", "type": "object[]"}, + {"name": "blocks.text.description", "type": "string"} + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll1 = op.get(); + + auto doc1 = R"({ + "blocks": {"text": [{"description": "Hello world."}]} + })"_json; + + auto add_op = coll1->add(doc1.dump(), CREATE); + + ASSERT_FALSE(add_op.ok()); + ASSERT_EQ("Field `blocks.text.description` has an incorrect type. " + "Hint: field inside an array of objects must be an array type as well.", add_op.error()); +} + TEST_F(CollectionNestedFieldsTest, SortByNestedField) { nlohmann::json schema = R"({ "name": "coll1",