review patch by @kishorenc

This commit is contained in:
krunal1313 2023-04-19 14:58:29 +05:30
parent 1f68572179
commit 574a46d77d
4 changed files with 58 additions and 62 deletions

View File

@ -51,7 +51,7 @@ public:
size_t get_facet_count(const std::string& field);
int intersect(const std::string& val, const uint32_t* result_ids, int result_id_len,
size_t intersect(const std::string& val, const uint32_t* result_ids, int result_id_len,
int max_facet_count, std::map<std::string, uint32_t>& found, bool is_wildcard_no_filter_query);
std::string get_facet_by_count_index(const std::string& field, uint32_t count_index);

View File

@ -506,16 +506,16 @@ private:
void insert_doc(const int64_t score, art_tree *t, uint32_t seq_id,
const std::unordered_map<std::string, std::vector<uint32_t>> &token_to_offsets) const;
static void tokenize_string_with_facets(const std::string& text, bool is_facet, const field& a_field,
const std::vector<char>& symbols_to_index,
const std::vector<char>& token_separators,
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets);
static void tokenize_string(const std::string& text, bool is_facet, const field& a_field,
const std::vector<char>& symbols_to_index,
const std::vector<char>& token_separators,
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets);
static void tokenize_string_array_with_facets(const std::vector<std::string>& strings, bool is_facet,
const field& a_field,
const std::vector<char>& symbols_to_index,
const std::vector<char>& token_separators,
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets);
static void tokenize_string_array(const std::vector<std::string>& strings, bool is_facet,
const field& a_field,
const std::vector<char>& symbols_to_index,
const std::vector<char>& token_separators,
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets);
void collate_included_ids(const std::vector<token_t>& q_included_tokens,
const std::map<size_t, std::map<size_t, uint32_t>> & included_ids_map,

View File

@ -7,6 +7,7 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu
uint32_t index;
const auto sv = value.substr(0, 100);
const auto it = facet_index_map.find(sv);
if(it == facet_index_map.end()) {
index = ++count_index;
@ -14,7 +15,7 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu
fis.id_list_ptr = SET_COMPACT_IDS(compact_id_list_t::create(1, {id}));
fis.index = index;
facet_index_map.emplace(sv, fis);
}else {
} else {
auto ids = it->id_list_ptr;
if (!ids_t::contains(ids, id)) {
ids_t::upsert(ids, id);
@ -25,17 +26,16 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu
const auto facet_count = ids_t::num_ids(facet_index_map.at(sv).id_list_ptr);
//LOG(INFO) << "Facet count in facet " << sv << " : " << facet_count;
auto& counter_list = facet_field_map[field].counter_list;
count_list node(sv, facet_count);
if(counter_list.empty()) {
counter_list.emplace_back(count_list(sv, facet_count));
counter_list.emplace_back(sv, facet_count);
} else {
auto it = counter_list.begin();
auto counter_it = counter_list.begin();
//remove node from list
for(it = counter_list.begin(); it != counter_list.end(); ++it) {
if(it->facet_value == sv) {
for(counter_it = counter_list.begin(); counter_it != counter_list.end(); ++counter_it) {
if(counter_it->facet_value == sv) {
//found facet in first node
counter_list.erase(it);
counter_list.erase(counter_it);
break;
}
}
@ -43,15 +43,15 @@ uint32_t facet_index_t::insert(const std::string& field, const std::string& valu
//find position in list and add node with updated count
count_list node(sv, facet_count);
for(it = counter_list.begin(); it != counter_list.end(); ++it) {
for(counter_it = counter_list.begin(); counter_it != counter_list.end(); ++counter_it) {
// LOG (INFO) << "inserting in middle or front facet " << node.facet_value
// << " with count " << node.count;
if(it->count <= facet_count) {
counter_list.emplace(it, node);
if(counter_it->count <= facet_count) {
counter_list.emplace(counter_it, node);
break;
}
}
if(it == counter_list.end()) {
if(counter_it == counter_list.end()) {
// LOG (INFO) << "inserting at last facet " << node.facet_value
// << " with count " << node.count;
counter_list.emplace_back(node);
@ -89,7 +89,7 @@ size_t facet_index_t::get_facet_count(const std::string& field) {
}
//returns the count of matching seq_ids from result array
int facet_index_t::intersect(const std::string& field, const uint32_t* result_ids,
size_t facet_index_t::intersect(const std::string& field, const uint32_t* result_ids,
int result_ids_len, int max_facet_count,
std::map<std::string, uint32_t>& found, bool is_wildcard_no_filter_query) {
//LOG (INFO) << "intersecting field " << field;
@ -109,7 +109,7 @@ int facet_index_t::intersect(const std::string& field, const uint32_t* result_id
for(const auto& counter_list_it : counter_list) {
// LOG (INFO) << "checking ids in facet_value " << counter_list_it.facet_value
// << " having total count " << counter_list_it.count;
int count = 0;
uint32_t count = 0;
if(is_wildcard_no_filter_query) {
count = counter_list_it.count;

View File

@ -266,7 +266,7 @@ void Index::compute_token_offsets_facets(index_record& record,
continue;
}
offsets_facet_hashes_t offset_facet_hashes;
std::unordered_map<std::string, std::vector<uint32_t>> offsets;
bool is_facet = search_schema.at(field_name).facet;
@ -293,9 +293,9 @@ void Index::compute_token_offsets_facets(index_record& record,
}
}
tokenize_string_array_with_facets(strings, is_facet, the_field,
local_symbols_to_index, local_token_separators,
offset_facet_hashes.offsets/*, offset_facet_hashes.facet_hashes*/);
tokenize_string_array(strings, is_facet, the_field,
local_symbols_to_index, local_token_separators,
offsets);
} else {
std::string text;
@ -309,27 +309,27 @@ void Index::compute_token_offsets_facets(index_record& record,
text = std::to_string(document[field_name].get<bool>());
}
tokenize_string_with_facets(text, is_facet, the_field,
local_symbols_to_index, local_token_separators,
offset_facet_hashes.offsets/*, offset_facet_hashes.facet_hashes*/);
tokenize_string(text, is_facet, the_field,
local_symbols_to_index, local_token_separators,
offsets);
}
}
if(the_field.is_string()) {
if(the_field.type == field_types::STRING) {
tokenize_string_with_facets(document[field_name], is_facet, the_field,
local_symbols_to_index, local_token_separators,
offset_facet_hashes.offsets);
tokenize_string(document[field_name], is_facet, the_field,
local_symbols_to_index, local_token_separators,
offsets);
} else {
tokenize_string_array_with_facets(document[field_name], is_facet, the_field,
local_symbols_to_index, local_token_separators,
offset_facet_hashes.offsets);
tokenize_string_array(document[field_name], is_facet, the_field,
local_symbols_to_index, local_token_separators,
offsets);
}
}
if(!offset_facet_hashes.offsets.empty()) {
record.field_index.emplace(field_name, std::move(offset_facet_hashes));
if(!offsets.empty()) {
record.field_index.emplace(field_name, std::move(offsets));
}
}
}
@ -659,38 +659,34 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
facet_hash_values_t fhashvalues;
if(afield.type == field_types::INT32_ARRAY) {
for(int i = 0; i < document[afield.name].size(); ++i) {
for(size_t i = 0; i < document[afield.name].size(); ++i) {
int32_t raw_val = document[afield.name][i].get<int32_t>();
value = std::to_string(raw_val);
auto index = facet_index_v4->insert(afield.name, value, seq_id);
fhashvalues.hashes.emplace_back(index);
}
}
else if(afield.type == field_types::INT64_ARRAY) {
for(int i = 0; i < document[afield.name].size(); ++i) {
} else if(afield.type == field_types::INT64_ARRAY) {
for(size_t i = 0; i < document[afield.name].size(); ++i) {
int64_t raw_val = document[afield.name][i].get<int64_t>();
value = std::to_string(raw_val);
auto index = facet_index_v4->insert(afield.name, value, seq_id);
fhashvalues.hashes.emplace_back(index);
}
}
else if(afield.type == field_types::STRING_ARRAY) {
for(int i = 0; i < document[afield.name].size(); ++i) {
} else if(afield.type == field_types::STRING_ARRAY) {
for(size_t i = 0; i < document[afield.name].size(); ++i) {
value = document[afield.name][i];
auto index = facet_index_v4->insert(afield.name, value, seq_id);
fhashvalues.hashes.emplace_back(index);
}
}
else if(afield.type == field_types::FLOAT_ARRAY) {
for(int i = 0; i < document[afield.name].size(); ++i) {
} else if(afield.type == field_types::FLOAT_ARRAY) {
for(size_t i = 0; i < document[afield.name].size(); ++i) {
float raw_val = document[afield.name][i].get<float>();
value = StringUtils::float_to_str(raw_val);
auto index = facet_index_v4->insert(afield.name, value, seq_id);
fhashvalues.hashes.emplace_back(index);
}
}
else if(afield.type == field_types::BOOL_ARRAY) {
for(int i = 0; i < document[afield.name].size(); ++i) {
} else if(afield.type == field_types::BOOL_ARRAY) {
for(size_t i = 0; i < document[afield.name].size(); ++i) {
value = std::to_string(document[afield.name][i].get<bool>());
auto index = facet_index_v4->insert(afield.name, value, seq_id);
fhashvalues.hashes.emplace_back(index);
@ -1028,10 +1024,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
}
}
void Index::tokenize_string_with_facets(const std::string& text, bool is_facet, const field& a_field,
const std::vector<char>& symbols_to_index,
const std::vector<char>& token_separators,
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets) {
void Index::tokenize_string(const std::string& text, bool is_facet, const field& a_field,
const std::vector<char>& symbols_to_index,
const std::vector<char>& token_separators,
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets) {
Tokenizer tokenizer(text, true, !a_field.is_string(), a_field.locale, symbols_to_index, token_separators);
std::string token;
@ -1057,11 +1053,11 @@ void Index::tokenize_string_with_facets(const std::string& text, bool is_facet,
}
}
void Index::tokenize_string_array_with_facets(const std::vector<std::string>& strings, bool is_facet,
const field& a_field,
const std::vector<char>& symbols_to_index,
const std::vector<char>& token_separators,
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets) {
void Index::tokenize_string_array(const std::vector<std::string>& strings, bool is_facet,
const field& a_field,
const std::vector<char>& symbols_to_index,
const std::vector<char>& token_separators,
std::unordered_map<std::string, std::vector<uint32_t>>& token_to_offsets) {
for(size_t array_index = 0; array_index < strings.size(); array_index++) {
const std::string& str = strings[array_index];
@ -1170,8 +1166,8 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
const auto facet_records = facet_index_v4->get_facet_count(a_facet.field_name);
if(results_size && facet_records && (facet_records <= 10 || is_wildcard_query == true)
&& use_facet_query == false && group_limit == 0 && no_filters_provided == true) {
if(results_size && facet_records && (facet_records <= 10 || is_wildcard_query) &&
!use_facet_query && group_limit == 0 && no_filters_provided) {
//LOG(INFO) << "Using intersection to find facets";
a_facet.is_intersected = true;