Speed up wildcard searches further.

This commit is contained in:
kishorenc 2020-08-12 18:09:39 +05:30
parent c70e83cfba
commit 14faa3af4e
4 changed files with 30 additions and 11 deletions

View File

@ -2,6 +2,13 @@
## Pre-alpha
a) ~~Fix memory ratio (decreasing with indexing)~~
b) ~~Speed up wildcard searches further~~
c) Allow int64 in default sorting field
d) Use connection timeout for CURL rather than request timeout
e) Update role to set max memory ration at 0.80
f) Async import
**Search index**
- ~~Proper JSON as input~~

View File

@ -619,15 +619,6 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
filters.push_back(f);
}
// for a wildcard query, if filter is not specified, use default_sorting_field as a catch-all
if(query == "*" && filters.empty()) {
field f = search_schema.at(default_sorting_field);
std::string max_value = f.is_float() ? std::to_string(std::numeric_limits<float>::max()) :
std::to_string(std::numeric_limits<int32_t>::max());
filter catch_all_filter = {f.name, {max_value}, LESS_THAN_EQUALS};
filters.push_back(catch_all_filter);
}
// validate facet fields
for(const std::string & field_name: facet_fields) {
if(facet_schema.count(field_name) == 0) {

View File

@ -1164,11 +1164,32 @@ void Index::search(Option<uint32_t> & outcome,
const uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - 0);
const std::string & field = search_fields[0];
// if a filter is not specified, use the sorting index to generate the list of all document ids
if(filters.empty()) {
std::string all_records_field;
// get the first non-optional field
for(const auto& kv: sort_schema) {
if(!kv.second.optional && kv.first != sort_field_const::text_match) {
all_records_field = kv.first;
break;
}
}
const spp::sparse_hash_map<uint32_t, int64_t> *kvs = sort_index[all_records_field];
filter_ids_length = kvs->size();
filter_ids = new uint32_t[filter_ids_length];
size_t i = 0;
for(const auto& kv: *kvs) {
filter_ids[i++] = kv.first;
}
}
if(!curated_ids.empty()) {
uint32_t *excluded_result_ids = nullptr;
filter_ids_length = ArrayUtils::exclude_scalar(filter_ids, filter_ids_length, &curated_ids_sorted[0],
curated_ids.size(), &excluded_result_ids);
delete [] filter_ids;
filter_ids = excluded_result_ids;
}

View File

@ -2259,7 +2259,7 @@ TEST_F(CollectionTest, OptionalFields) {
infile.close();
// first must be able to fetch all records (i.e. all must have been index)
// first must be able to fetch all records (i.e. all must have been indexed)
auto res = coll1->search("*", {"title"}, "", {}, {}, 0, 10, 1, FREQUENCY, false).get();
ASSERT_EQ(6, res["found"].get<size_t>());