mirror of
https://github.com/typesense/typesense.git
synced 2025-05-17 12:12:35 +08:00
Optimize wildcard query sans filter/sort/facets.
This commit is contained in:
parent
0e39736327
commit
832b519633
@ -34,19 +34,23 @@ public:
|
||||
class iterator_t {
|
||||
private:
|
||||
block_t* curr_block;
|
||||
uint32_t curr_index;
|
||||
int64_t curr_index;
|
||||
|
||||
block_t* end_block;
|
||||
std::map<last_id_t, block_t*>* id_block_map;
|
||||
|
||||
bool reverse;
|
||||
|
||||
public:
|
||||
// uncompressed data structure for performance
|
||||
uint32_t* ids = nullptr;
|
||||
|
||||
explicit iterator_t(block_t* start, block_t* end);
|
||||
explicit iterator_t(block_t* start, block_t* end, std::map<last_id_t, block_t*>* id_block_map, bool reverse);
|
||||
iterator_t(iterator_t&& rhs) noexcept;
|
||||
~iterator_t();
|
||||
[[nodiscard]] bool valid() const;
|
||||
void next();
|
||||
void previous();
|
||||
void skip_to(uint32_t id);
|
||||
[[nodiscard]] uint32_t id() const;
|
||||
[[nodiscard]] inline uint32_t index() const;
|
||||
@ -130,6 +134,8 @@ public:
|
||||
|
||||
iterator_t new_iterator(block_t* start_block = nullptr, block_t* end_block = nullptr);
|
||||
|
||||
iterator_t new_rev_iterator();
|
||||
|
||||
static void merge(const std::vector<id_list_t*>& id_lists, std::vector<uint32_t>& result_ids);
|
||||
|
||||
static void intersect(const std::vector<id_list_t*>& id_lists, std::vector<uint32_t>& result_ids);
|
||||
|
@ -743,7 +743,7 @@ Option<bool> Collection::validate_and_standardize_sort_fields(const std::vector<
|
||||
}
|
||||
}
|
||||
|
||||
if(!found_match_score && sort_fields.size() < 3) {
|
||||
if(!found_match_score && !is_wildcard_query && sort_fields.size() < 3) {
|
||||
sort_fields_std.emplace_back(sort_field_const::text_match, sort_field_const::desc);
|
||||
}
|
||||
|
||||
|
@ -30,16 +30,25 @@ uint32_t id_list_t::block_t::erase(const uint32_t id) {
|
||||
|
||||
/* iterator_t operations */
|
||||
|
||||
id_list_t::iterator_t::iterator_t(id_list_t::block_t* start, id_list_t::block_t* end):
|
||||
curr_block(start), curr_index(0), end_block(end) {
|
||||
id_list_t::iterator_t::iterator_t(id_list_t::block_t* start, id_list_t::block_t* end,
|
||||
std::map<last_id_t, block_t*>* id_block_map, bool reverse):
|
||||
curr_block(start), curr_index(0), end_block(end), id_block_map(id_block_map), reverse(reverse) {
|
||||
|
||||
if(curr_block != end_block) {
|
||||
ids = curr_block->ids.uncompress();
|
||||
|
||||
if(reverse) {
|
||||
curr_index = curr_block->ids.getLength()-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool id_list_t::iterator_t::valid() const {
|
||||
return (curr_block != end_block) && (curr_index < curr_block->size());
|
||||
if(reverse) {
|
||||
return (curr_block != end_block) && (curr_index >= 0);
|
||||
} else {
|
||||
return (curr_block != end_block) && (curr_index < curr_block->size());
|
||||
}
|
||||
}
|
||||
|
||||
void id_list_t::iterator_t::next() {
|
||||
@ -57,6 +66,25 @@ void id_list_t::iterator_t::next() {
|
||||
}
|
||||
}
|
||||
|
||||
void id_list_t::iterator_t::previous() {
|
||||
curr_index--;
|
||||
if(curr_index < 0) {
|
||||
// since block stores only the next pointer, we have to use `id_block_map` for reverse iteration
|
||||
auto last_ele = ids[curr_block->size()-1];
|
||||
auto it = id_block_map->find(last_ele);
|
||||
if(it != id_block_map->end() && it != id_block_map->begin()) {
|
||||
it--;
|
||||
curr_block = it->second;
|
||||
curr_index = curr_block->size()-1;
|
||||
|
||||
delete [] ids;
|
||||
ids = curr_block->ids.uncompress();
|
||||
} else {
|
||||
curr_block = end_block;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t id_list_t::iterator_t::id() const {
|
||||
return ids[curr_index];
|
||||
}
|
||||
@ -103,10 +131,13 @@ id_list_t::iterator_t::iterator_t(iterator_t&& rhs) noexcept {
|
||||
curr_index = rhs.curr_index;
|
||||
end_block = rhs.end_block;
|
||||
ids = rhs.ids;
|
||||
id_block_map = rhs.id_block_map;
|
||||
reverse = rhs.reverse;
|
||||
|
||||
rhs.curr_block = nullptr;
|
||||
rhs.end_block = nullptr;
|
||||
rhs.ids = nullptr;
|
||||
rhs.id_block_map = nullptr;
|
||||
}
|
||||
|
||||
/* id_list_t operations */
|
||||
@ -457,7 +488,17 @@ bool id_list_t::equals2(std::vector<id_list_t::iterator_t>& its) {
|
||||
|
||||
id_list_t::iterator_t id_list_t::new_iterator(block_t* start_block, block_t* end_block) {
|
||||
start_block = (start_block == nullptr) ? &root_block : start_block;
|
||||
return id_list_t::iterator_t(start_block, end_block);
|
||||
return id_list_t::iterator_t(start_block, end_block, &id_block_map, false);
|
||||
}
|
||||
|
||||
id_list_t::iterator_t id_list_t::new_rev_iterator() {
|
||||
block_t* start_block = nullptr;
|
||||
if(!id_block_map.empty()) {
|
||||
start_block = id_block_map.rbegin()->second;
|
||||
}
|
||||
|
||||
auto rev_it = id_list_t::iterator_t(start_block, nullptr, &id_block_map, true);
|
||||
return rev_it;
|
||||
}
|
||||
|
||||
void id_list_t::advance_all(std::vector<id_list_t::iterator_t>& its) {
|
||||
|
@ -412,7 +412,7 @@ void ids_t::block_intersector_t::split_lists(size_t concurrency,
|
||||
}
|
||||
}
|
||||
|
||||
partial_its.emplace_back(p_start_block, p_end_block);
|
||||
partial_its.emplace_back(p_start_block, p_end_block, nullptr, false);
|
||||
}
|
||||
|
||||
start_block = curr_block->next;
|
||||
|
@ -2487,9 +2487,43 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
if (is_wildcard_query) {
|
||||
const uint8_t field_id = (uint8_t)(FIELD_LIMIT_NUM - 0);
|
||||
const std::string& field = the_fields[0].name;
|
||||
bool no_filters_provided = (filters.empty() && filter_ids_length == 0);
|
||||
|
||||
if(no_filters_provided && facets.empty() && curated_ids.empty() && vector_query.field_name.empty() &&
|
||||
sort_fields_std.size() == 1 && sort_fields_std[0].name == sort_field_const::seq_id &&
|
||||
sort_fields_std[0].order == sort_field_const::desc) {
|
||||
// optimize for this path specifically
|
||||
std::vector<uint32_t> result_ids;
|
||||
auto it = seq_ids->new_rev_iterator();
|
||||
while(it.valid()) {
|
||||
uint32_t seq_id = it.id();
|
||||
uint64_t distinct_id = seq_id;
|
||||
if(group_limit != 0) {
|
||||
distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
groups_processed.emplace(distinct_id);
|
||||
}
|
||||
|
||||
int64_t scores[3] = {0};
|
||||
scores[0] = seq_id;
|
||||
int64_t match_score_index = -1;
|
||||
|
||||
result_ids.push_back(seq_id);
|
||||
KV kv(field_id, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
|
||||
topster->add(&kv);
|
||||
|
||||
if(result_ids.size() == page * per_page) {
|
||||
break;
|
||||
}
|
||||
|
||||
it.previous();
|
||||
}
|
||||
|
||||
all_result_ids_len = seq_ids->num_ids();
|
||||
goto process_search_results;
|
||||
}
|
||||
|
||||
// if filters were not provided, use the seq_ids index to generate the list of all document ids
|
||||
if(filters.empty() && filter_ids_length == 0) {
|
||||
if(no_filters_provided) {
|
||||
filter_ids_length = seq_ids->num_ids();
|
||||
filter_ids = seq_ids->uncompress();
|
||||
}
|
||||
@ -2710,6 +2744,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
|
||||
//LOG(INFO) << "topster size: " << topster->size;
|
||||
|
||||
process_search_results:
|
||||
|
||||
delete [] exclude_token_ids;
|
||||
delete [] excluded_result_ids;
|
||||
|
||||
|
@ -1280,14 +1280,30 @@ TEST_F(CollectionSpecificMoreTest, WildcardSearchWithNoSortingField) {
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["title"] = "Sample Title";
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
// search on empty collection
|
||||
auto res_op = coll1->search("*", {}, "", {}, {}, {2}, 10, 1,
|
||||
FREQUENCY, {true});
|
||||
|
||||
ASSERT_TRUE(res_op.ok());
|
||||
auto res = res_op.get();
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
ASSERT_EQ(0, res["found"].get<size_t>());
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["title"] = "Sample Title 1";
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
doc["title"] = "Sample Title 2";
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
res_op = coll1->search("*", {}, "", {}, {}, {2}, 10, 1,
|
||||
FREQUENCY, {true});
|
||||
|
||||
ASSERT_TRUE(res_op.ok());
|
||||
res = res_op.get();
|
||||
ASSERT_EQ(2, res["hits"].size());
|
||||
ASSERT_EQ(2, res["found"].get<size_t>());
|
||||
|
||||
ASSERT_EQ("1", res["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("0", res["hits"][1]["document"]["id"].get<std::string>());
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user