diff --git a/src/api.cpp b/src/api.cpp index 8d9f1e44..93263c73 100644 --- a/src/api.cpp +++ b/src/api.cpp @@ -80,7 +80,7 @@ void post_create_collection(http_req & req, http_res & res) { if(!req_json[DEFAULT_SORTING_FIELD].is_string()) { return res.send_400(std::string("`") + DEFAULT_SORTING_FIELD + - "` should be a string. It should be the name of an unsigned integer field."); + "` should be a string. It should be the name of an int32/float field."); } if(collectionManager.get_collection(req_json["name"]) != nullptr) { diff --git a/src/collection.cpp b/src/collection.cpp index 391e8a83..300fcc0f 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -118,7 +118,7 @@ Option Collection::validate_index_in_memory(const nlohmann::json &docu } if(!document[default_sorting_field].is_number_integer() && !document[default_sorting_field].is_number_float()) { - return Option<>(400, "Default sorting field `" + default_sorting_field + "` must be a number."); + return Option<>(400, "Default sorting field `" + default_sorting_field + "` must be of type int32 or float."); } if(document[default_sorting_field].is_number_integer() && @@ -391,6 +391,15 @@ Option Collection::search(std::string query, const std::vector::max()) : + std::to_string(std::numeric_limits::max()); + filter catch_all_filter = {f.name, {max_value}, LESS_THAN_EQUALS}; + filters.push_back(catch_all_filter); + } + // validate facet fields for(const std::string & field_name: facet_fields) { if(facet_schema.count(field_name) == 0) { @@ -553,7 +562,7 @@ Option Collection::search(std::string query, const std::vector leaf_to_indices; for (const art_leaf *token_leaf : searched_queries[field_order_kv.query_index]) { @@ -640,13 +649,13 @@ Option Collection::search(std::string query, const std::vectorsecond; it->second = nullptr; } - - prune_document(document, include_fields, exclude_fields); - wrapper_doc["document"] = document; - //wrapper_doc["match_score"] = field_order_kv.match_score; - //wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key; } + prune_document(document, include_fields, exclude_fields); + wrapper_doc["document"] = document; + //wrapper_doc["match_score"] = field_order_kv.match_score; + //wrapper_doc["seq_id"] = (uint32_t) field_order_kv.key; + result["hits"].push_back(wrapper_doc); } diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index 937422f6..48e43849 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -156,6 +156,12 @@ Option CollectionManager::create_collection(const std::string name, field_val[fields::type] = field.type; field_val[fields::facet] = field.facet; fields_json.push_back(field_val); + + if(field.name == default_sorting_field && !(field.type == field_types::INT32 || + field.type == field_types::FLOAT)) { + return Option(400, "Default sorting field `" + default_sorting_field + "` must be of type int32 " + "or float."); + } } collection_meta[COLLECTION_NAME_KEY] = name; diff --git a/src/index.cpp b/src/index.cpp index c1bff470..a6942610 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -622,15 +622,22 @@ void Index::search(Option & outcome, std::string query, const std::vec Topster<512> topster; - const size_t num_search_fields = std::min(search_fields.size(), (size_t) FIELD_LIMIT_NUM); - for(size_t i = 0; i < num_search_fields; i++) { - const std::string & field = search_fields[i]; - // proceed to query search only when no filters are provided or when filtering produces results - if(filters.size() == 0 || filter_ids_length > 0) { - uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - i); - search_field(field_id, query, field, filter_ids, filter_ids_length, facets, sort_fields_std, - num_typos, num_results, searched_queries, topster, &all_result_ids, all_result_ids_len, - token_order, prefix, drop_tokens_threshold); + if(query == "*") { + uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - 0); + score_results(sort_fields_std, (uint16_t) searched_queries.size(), field_id, 0, topster, {}, + filter_ids, filter_ids_length); + all_result_ids_len = filter_ids_length; + } else { + const size_t num_search_fields = std::min(search_fields.size(), (size_t) FIELD_LIMIT_NUM); + for(size_t i = 0; i < num_search_fields; i++) { + const std::string & field = search_fields[i]; + // proceed to query search only when no filters are provided or when filtering produces results + if(filters.size() == 0 || filter_ids_length > 0) { + uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - i); + search_field(field_id, query, field, filter_ids, filter_ids_length, facets, sort_fields_std, + num_typos, num_results, searched_queries, topster, &all_result_ids, all_result_ids_len, + token_order, prefix, drop_tokens_threshold); + } } } @@ -897,7 +904,7 @@ void Index::score_results(const std::vector & sort_fields, const uint16 uint64_t match_score = 0; - if(query_suggestion.size() == 1) { + if(query_suggestion.size() <= 1) { match_score = single_token_match_score; } else { std::vector>> array_token_positions; diff --git a/test/art_test.cpp b/test/art_test.cpp index 926dd838..46781fa0 100644 --- a/test/art_test.cpp +++ b/test/art_test.cpp @@ -768,7 +768,7 @@ TEST(ArtTest, test_encode_int32) { } } -TEST(ArtTest, test_int32_range_hundreds) { +TEST(ArtTest, test_int32_overlap) { art_tree t; art_tree_init(&t); @@ -776,7 +776,7 @@ TEST(ArtTest, test_int32_range_hundreds) { const int CHAR_LEN = 8; unsigned char chars[CHAR_LEN]; - std::vector results; + std::vector results; std::vector> values = {{2014, 2015, 2016}, {2015, 2016}, {2016}, {1981, 1985}, {1999, 2000, 2001, 2002}}; @@ -793,7 +793,19 @@ TEST(ArtTest, test_int32_range_hundreds) { ASSERT_TRUE(res == 0); ASSERT_EQ(3, results.size()); - return ; + res = art_tree_destroy(&t); + ASSERT_TRUE(res == 0); +} + +TEST(ArtTest, test_int32_range_hundreds) { + art_tree t; + art_tree_init(&t); + + art_document doc = get_document(1); + const int CHAR_LEN = 8; + unsigned char chars[CHAR_LEN]; + + std::vector results; for(uint32_t i = 100; i < 110; i++) { encode_int32(i, chars); @@ -802,8 +814,7 @@ TEST(ArtTest, test_int32_range_hundreds) { encode_int32(106, chars); - - res = art_int32_search(&t, 106, EQUALS, results); + int res = art_int32_search(&t, 106, EQUALS, results); ASSERT_TRUE(res == 0); ASSERT_EQ(1, results.size()); results.clear(); @@ -832,6 +843,35 @@ TEST(ArtTest, test_int32_range_hundreds) { ASSERT_TRUE(res == 0); } +TEST(ArtTest, test_int32_duplicates) { + art_tree t; + art_tree_init(&t); + + art_document doc = get_document(1); + const int CHAR_LEN = 8; + unsigned char chars[CHAR_LEN]; + + for(size_t i = 0; i < 10000; i++) { + doc.id = i; + int value = 1900 + (rand() % static_cast(2018 - 1900 + 1)); + encode_int32(value, chars); + art_insert(&t, (unsigned char*)chars, CHAR_LEN, &doc, 1); + } + + std::vector results; + + int res = art_int32_search(&t, 0, GREATER_THAN, results); + ASSERT_TRUE(res == 0); + size_t counter = 0; + + for(auto res: results) { + counter += res->values->ids.getLength(); + } + + ASSERT_EQ(10000, counter); + results.clear(); +} + TEST(ArtTest, test_int32_negative) { art_tree t; art_tree_init(&t); diff --git a/test/collection_test.cpp b/test/collection_test.cpp index eb1ec247..f6db59f5 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -417,6 +417,21 @@ TEST_F(CollectionTest, Pagination) { } } +TEST_F(CollectionTest, WildcardQuery) { + nlohmann::json results = collection->search("*", query_fields, "points:>0", {}, sort_fields, 0, 3, 1, FREQUENCY, + false).get(); + ASSERT_EQ(3, results["hits"].size()); + ASSERT_EQ(25, results["found"].get()); + + // when no filter is specified, fall back on default sorting field based catch-all filter + Option results_op = collection->search("*", query_fields, "", {}, sort_fields, 0, 3, 1, FREQUENCY, + false); + + ASSERT_TRUE(results_op.ok()); + ASSERT_EQ(3, results["hits"].size()); + ASSERT_EQ(25, results["found"].get()); +} + TEST_F(CollectionTest, PrefixSearching) { std::vector facets; nlohmann::json results = collection->search("ex", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, true).get(); @@ -1537,6 +1552,20 @@ TEST_F(CollectionTest, SearchingWithMissingFields) { collectionManager.drop_collection("coll_array_fields"); } +TEST_F(CollectionTest, DefaultSortingFieldMustBeInt32OrFloat) { + std::vector fields = {field("name", field_types::STRING, false), + field("tags", field_types::STRING_ARRAY, true), + field("age", field_types::INT32, false), + field("average", field_types::INT32, false) }; + + std::vector sort_fields = { sort_by("age", "DESC"), sort_by("average", "DESC") }; + + Option collection_op = collectionManager.create_collection("sample_collection", fields, "name"); + EXPECT_FALSE(collection_op.ok()); + EXPECT_EQ("Default sorting field `name` must be of type int32 or float.", collection_op.error()); + collectionManager.drop_collection("sample_collection"); +} + TEST_F(CollectionTest, IndexingWithBadData) { // should not crash when document to-be-indexed doesn't match schema Collection *sample_collection; @@ -1588,7 +1617,7 @@ TEST_F(CollectionTest, IndexingWithBadData) { doc_str = "{\"name\": \"foo\", \"age\": \"34\", \"tags\": [], \"average\": 34 }"; const Option & bad_default_sorting_field_op1 = sample_collection->add(doc_str); ASSERT_FALSE(bad_default_sorting_field_op1.ok()); - ASSERT_STREQ("Default sorting field `age` must be a number.", bad_default_sorting_field_op1.error().c_str()); + ASSERT_STREQ("Default sorting field `age` must be of type int32 or float.", bad_default_sorting_field_op1.error().c_str()); doc_str = "{\"name\": \"foo\", \"age\": 343234324234233234, \"tags\": [], \"average\": 34 }"; const Option & bad_default_sorting_field_op2 = sample_collection->add(doc_str);