mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 05:32:30 +08:00
facet index refactor updated changes
This commit is contained in:
parent
8b4e95e421
commit
cd69111a5b
@ -3,8 +3,6 @@
|
||||
#include "ids_t.h"
|
||||
#include "tsl/htrie_map.h"
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <mutex>
|
||||
|
||||
class facet_index_t {
|
||||
private:
|
||||
@ -21,17 +19,27 @@ private:
|
||||
struct facet_index_counter {
|
||||
tsl::htrie_map<char, void*> facet_index_map;
|
||||
std::list<count_list> counter_list;
|
||||
|
||||
~facet_index_counter() {
|
||||
for(auto it = facet_index_map.begin(); it != facet_index_map.end(); ++it) {
|
||||
ids_t::destroy_list(it.value());
|
||||
}
|
||||
|
||||
facet_index_map.clear();
|
||||
|
||||
counter_list.clear();
|
||||
}
|
||||
};
|
||||
|
||||
std::map<std::string, facet_index_counter> facet_field_map;
|
||||
public:
|
||||
|
||||
facet_index_t() = default;
|
||||
|
||||
~facet_index_t();
|
||||
|
||||
void insert(const std::string& field, const std::string& value, uint32_t id);
|
||||
|
||||
size_t get(const std::string& field, std::map<std::string,std::vector<uint32_t>>& result_ids);
|
||||
|
||||
void erase(const std::string& field);
|
||||
|
||||
bool contains(const std::string& field);
|
||||
@ -40,4 +48,7 @@ public:
|
||||
|
||||
int intersect(const std::string& val, const uint32_t* result_ids, int result_id_len,
|
||||
int max_facet_count, std::map<std::string, uint32_t>& found);
|
||||
|
||||
int get_facet(const std::string& field, const std::vector<std::string>& searched_tokens,
|
||||
std::vector<std::string>& facets);
|
||||
};
|
@ -635,26 +635,24 @@ struct facet_stats_t {
|
||||
|
||||
struct facet {
|
||||
const std::string field_name;
|
||||
spp::sparse_hash_map<uint64_t, facet_count_t> result_map;
|
||||
|
||||
spp::sparse_hash_map<std::string, facet_count_t> result_map;
|
||||
// used for facet value query
|
||||
spp::sparse_hash_map<uint64_t, std::vector<std::string>> hash_tokens;
|
||||
//spp::sparse_hash_map<uint64_t, std::vector<std::string>> hash_tokens;
|
||||
spp::sparse_hash_map<std::string, std::vector<std::string>> facet_tokens;
|
||||
|
||||
// used for faceting grouped results
|
||||
spp::sparse_hash_map<uint64_t, spp::sparse_hash_set<uint64_t>> hash_groups;
|
||||
//spp::sparse_hash_map<uint64_t, spp::sparse_hash_set<uint64_t>> hash_groups;
|
||||
|
||||
facet_stats_t stats;
|
||||
|
||||
//dictionary of key=>pair(range_id, range_val)
|
||||
std::map<int64_t, std::string> facet_range_map;
|
||||
std::map<std::string, std::string> facet_range_map;
|
||||
|
||||
bool is_range_query;
|
||||
|
||||
bool sampled = false;
|
||||
|
||||
bool is_wildcard_match = false;
|
||||
|
||||
bool get_range(int64_t key, std::pair<int64_t, std::string>& range_pair)
|
||||
bool get_range(std::string key, std::pair<int64_t, std::string>& range_pair)
|
||||
{
|
||||
if(facet_range_map.empty())
|
||||
{
|
||||
@ -673,7 +671,7 @@ struct facet {
|
||||
}
|
||||
|
||||
explicit facet(const std::string& field_name,
|
||||
std::map<int64_t, std::string> facet_range = {}, bool is_range_q = false)
|
||||
std::map<std::string, std::string> facet_range = {}, bool is_range_q = false)
|
||||
:field_name(field_name){
|
||||
facet_range_map = facet_range;
|
||||
is_range_query = is_range_q;
|
||||
@ -684,7 +682,7 @@ struct facet_info_t {
|
||||
// facet hash => resolved tokens
|
||||
//std::unordered_map<uint64_t, std::vector<std::string>> hashes;
|
||||
//facet name => resolved tokens
|
||||
std::unordered_map<uint32_t, std::vector<std::string>> doc_id_tokens;
|
||||
std::unordered_map<std::string, std::vector<std::string>> facet_tokens;
|
||||
bool use_facet_query = false;
|
||||
bool should_compute_stats = false;
|
||||
field facet_field{"", "", false};
|
||||
|
@ -281,8 +281,6 @@ struct hnsw_index_t {
|
||||
}
|
||||
};
|
||||
|
||||
extern std::map<std::string, std::map<std::string, uint32_t>> facet_results;
|
||||
|
||||
class Index {
|
||||
private:
|
||||
mutable std::shared_mutex mutex;
|
||||
@ -509,7 +507,7 @@ private:
|
||||
|
||||
static uint64_t facet_token_hash(const field & a_field, const std::string &token);
|
||||
|
||||
static void compute_facet_stats(facet &a_facet, uint64_t raw_value, const std::string & field_type);
|
||||
static void compute_facet_stats(facet &a_facet, std::string raw_value, const std::string & field_type);
|
||||
|
||||
static void handle_doc_ops(const tsl::htrie_map<char, field>& search_schema,
|
||||
nlohmann::json& update_doc, const nlohmann::json& old_doc, nlohmann::json& new_doc);
|
||||
|
@ -22,8 +22,6 @@
|
||||
const std::string override_t::MATCH_EXACT = "exact";
|
||||
const std::string override_t::MATCH_CONTAINS = "contains";
|
||||
|
||||
std::map<std::string, std::map<std::string, uint32_t>> facet_results;
|
||||
|
||||
struct sort_fields_guard_t {
|
||||
std::vector<sort_by> sort_fields_std;
|
||||
|
||||
@ -1922,15 +1920,22 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
facet_result["counts"] = nlohmann::json::array();
|
||||
|
||||
std::vector<facet_value_t> facet_values;
|
||||
std::vector<std::pair<uint64_t, facet_count_t>> facet_hash_counts;
|
||||
// std::vector<std::pair<uint64_t, facet_count_t>> facet_hash_counts;
|
||||
|
||||
for (const auto & kv : a_facet.result_map) {
|
||||
facet_hash_counts.emplace_back(kv);
|
||||
}
|
||||
|
||||
if(a_facet.is_range_query){
|
||||
for(auto kv : a_facet.result_map){
|
||||
// for (const auto & kv : a_facet.result_map) {
|
||||
// facet_hash_counts.emplace_back(kv);
|
||||
// }
|
||||
|
||||
auto the_field = search_schema.at(a_facet.field_name);
|
||||
// keep only top K facets
|
||||
//auto max_facets = std::min(max_facet_values, facet_hash_counts.size());
|
||||
auto max_facets = std::min(max_facet_values, a_facet.result_map.size());
|
||||
// std::nth_element(facet_hash_counts.begin(), facet_hash_counts.begin() + max_facets,
|
||||
// facet_hash_counts.end(), Collection::facet_count_compare);
|
||||
//LOG (INFO) << "found facet size " << a_facet.result_map.size();
|
||||
for(auto& kv : a_facet.result_map) {
|
||||
|
||||
if(a_facet.is_range_query){
|
||||
auto facet_range_iter = a_facet.facet_range_map.find(kv.first);
|
||||
if(facet_range_iter != a_facet.facet_range_map.end()){
|
||||
auto & facet_count = kv.second;
|
||||
@ -1940,109 +1945,99 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
else{
|
||||
LOG (ERROR) << "range_id not found in result map.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto the_field = search_schema.at(a_facet.field_name);
|
||||
// keep only top K facets
|
||||
//auto max_facets = std::min(max_facet_values, facet_hash_counts.size());
|
||||
auto max_facets = std::min(max_facet_values, facet_results[a_facet.field_name].size());
|
||||
// std::nth_element(facet_hash_counts.begin(), facet_hash_counts.begin() + max_facets,
|
||||
// facet_hash_counts.end(), Collection::facet_count_compare);
|
||||
LOG (INFO) << "found_doc_seq_ids size " << facet_results[a_facet.field_name].size();
|
||||
//for(size_t fi = 0; fi < max_facets; fi++) {
|
||||
for(auto& it : facet_results[a_facet.field_name]) {
|
||||
} else {
|
||||
//facet_value_t facet_value = { kv.first, std::string(), kv.second.count};
|
||||
//facet_values.emplace_back(facet_value);
|
||||
|
||||
|
||||
if(a_facet.is_range_query){
|
||||
break;
|
||||
// remap facet value hash with actual string
|
||||
// auto & kv = facet_hash_counts[fi];
|
||||
// auto & facet_count = kv.second;
|
||||
// // fetch actual facet value from representative doc id
|
||||
// const std::string& seq_id_key = get_seq_id_key((uint32_t) facet_count.doc_id);
|
||||
// nlohmann::json document;
|
||||
// const Option<bool> & document_op = get_document_from_store(seq_id_key, document);
|
||||
// if(!document_op.ok()) {
|
||||
// LOG(ERROR) << "Facet fetch error. " << document_op.error();
|
||||
// continue;
|
||||
// }
|
||||
//std::string value;
|
||||
// bool facet_found = facet_value_to_string(a_facet, facet_count, document, value);
|
||||
// if(!facet_found) {
|
||||
// continue;
|
||||
// }
|
||||
std::string value = kv.first;
|
||||
std::unordered_map<std::string, size_t> ftoken_pos;
|
||||
//std::vector<string>& ftokens = a_facet.hash_tokens[kv.first];
|
||||
std::vector<string>& ftokens = a_facet.facet_tokens[kv.first];
|
||||
for(size_t ti = 0; ti < ftokens.size(); ti++) {
|
||||
// if(the_field.is_bool()) {
|
||||
// if(ftokens[ti] == "1") {
|
||||
// ftokens[ti] = "true";
|
||||
// } else {
|
||||
// ftokens[ti] = "false";
|
||||
// }
|
||||
// }
|
||||
const std::string& resolved_token = ftokens[ti];
|
||||
ftoken_pos[resolved_token] = ti;
|
||||
}
|
||||
const std::string& last_full_q_token = ftokens.empty() ? "" : ftokens.back();
|
||||
// 2 passes: first identify tokens that need to be highlighted and then construct highlighted text
|
||||
bool is_cyrillic = Tokenizer::is_cyrillic(the_field.locale);
|
||||
bool normalise = is_cyrillic ? false : true;
|
||||
Tokenizer tokenizer(value, normalise, !the_field.is_string(), the_field.locale, symbols_to_index, token_separators);
|
||||
// secondary tokenizer used for specific languages that requires transliteration
|
||||
// we use 2 tokenizers so that the original text offsets are available for highlighting
|
||||
Tokenizer word_tokenizer("", true, false, the_field.locale, symbols_to_index, token_separators);
|
||||
std::string raw_token;
|
||||
size_t raw_token_index = 0, tok_start = 0, tok_end = 0;
|
||||
// need an ordered map here to ensure that it is ordered by the key (start offset)
|
||||
std::map<size_t, size_t> token_offsets;
|
||||
size_t prefix_token_start_index = 0;
|
||||
while(tokenizer.next(raw_token, raw_token_index, tok_start, tok_end)) {
|
||||
if(is_cyrillic) {
|
||||
word_tokenizer.tokenize(raw_token);
|
||||
}
|
||||
auto token_pos_it = ftoken_pos.find(raw_token);
|
||||
if(token_pos_it != ftoken_pos.end()) {
|
||||
token_offsets[tok_start] = tok_end;
|
||||
if(raw_token == last_full_q_token) {
|
||||
prefix_token_start_index = tok_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
auto offset_it = token_offsets.begin();
|
||||
size_t i = 0;
|
||||
std::stringstream highlightedss;
|
||||
// loop until end index, accumulate token and complete highlighting
|
||||
while(i < value.size()) {
|
||||
if(offset_it != token_offsets.end()) {
|
||||
if (i == offset_it->first) {
|
||||
highlightedss << highlight_start_tag;
|
||||
// do prefix highlighting for non-dropped last token
|
||||
size_t token_len = (i == prefix_token_start_index && token_offsets.size() == facet_query_num_tokens) ?
|
||||
facet_query_last_token.size() :
|
||||
(offset_it->second - i + 1);
|
||||
if(i == prefix_token_start_index && token_offsets.size() == facet_query_num_tokens) {
|
||||
token_len = std::min((offset_it->second - i + 1), facet_query_last_token.size());
|
||||
} else {
|
||||
token_len = (offset_it->second - i + 1);
|
||||
}
|
||||
for(size_t j = 0; j < token_len; j++) {
|
||||
highlightedss << value[i + j];
|
||||
}
|
||||
highlightedss << highlight_end_tag;
|
||||
offset_it++;
|
||||
i += token_len;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
highlightedss << value[i];
|
||||
i++;
|
||||
}
|
||||
facet_value_t facet_value = {value, highlightedss.str(), kv.second.count};
|
||||
facet_values.emplace_back(facet_value);
|
||||
}
|
||||
|
||||
// remap facet value hash with actual string
|
||||
// auto & kv = facet_hash_counts[fi];
|
||||
// auto & facet_count = kv.second;
|
||||
// // fetch actual facet value from representative doc id
|
||||
// const std::string& seq_id_key = get_seq_id_key((uint32_t) facet_count.doc_id);
|
||||
// nlohmann::json document;
|
||||
// const Option<bool> & document_op = get_document_from_store(seq_id_key, document);
|
||||
// if(!document_op.ok()) {
|
||||
// LOG(ERROR) << "Facet fetch error. " << document_op.error();
|
||||
// continue;
|
||||
// }
|
||||
//std::string value;
|
||||
// bool facet_found = facet_value_to_string(a_facet, facet_count, document, value);
|
||||
// if(!facet_found) {
|
||||
// continue;
|
||||
// }
|
||||
// std::unordered_map<std::string, size_t> ftoken_pos;
|
||||
// std::vector<string>& ftokens = a_facet.hash_tokens[kv.first];
|
||||
// for(size_t ti = 0; ti < ftokens.size(); ti++) {
|
||||
// if(the_field.is_bool()) {
|
||||
// if(ftokens[ti] == "1") {
|
||||
// ftokens[ti] = "true";
|
||||
// } else {
|
||||
// ftokens[ti] = "false";
|
||||
// }
|
||||
// }
|
||||
// const std::string& resolved_token = ftokens[ti];
|
||||
// ftoken_pos[resolved_token] = ti;
|
||||
// }
|
||||
// const std::string& last_full_q_token = ftokens.empty() ? "" : ftokens.back();
|
||||
// // 2 passes: first identify tokens that need to be highlighted and then construct highlighted text
|
||||
// bool is_cyrillic = Tokenizer::is_cyrillic(the_field.locale);
|
||||
// bool normalise = is_cyrillic ? false : true;
|
||||
// Tokenizer tokenizer(value, normalise, !the_field.is_string(), the_field.locale, symbols_to_index, token_separators);
|
||||
// // secondary tokenizer used for specific languages that requires transliteration
|
||||
// // we use 2 tokenizers so that the original text offsets are available for highlighting
|
||||
// Tokenizer word_tokenizer("", true, false, the_field.locale, symbols_to_index, token_separators);
|
||||
// std::string raw_token;
|
||||
// size_t raw_token_index = 0, tok_start = 0, tok_end = 0;
|
||||
// // need an ordered map here to ensure that it is ordered by the key (start offset)
|
||||
// std::map<size_t, size_t> token_offsets;
|
||||
// size_t prefix_token_start_index = 0;
|
||||
// while(tokenizer.next(raw_token, raw_token_index, tok_start, tok_end)) {
|
||||
// if(is_cyrillic) {
|
||||
// word_tokenizer.tokenize(raw_token);
|
||||
// }
|
||||
// auto token_pos_it = ftoken_pos.find(raw_token);
|
||||
// if(token_pos_it != ftoken_pos.end()) {
|
||||
// token_offsets[tok_start] = tok_end;
|
||||
// if(raw_token == last_full_q_token) {
|
||||
// prefix_token_start_index = tok_start;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// auto offset_it = token_offsets.begin();
|
||||
// size_t i = 0;
|
||||
// std::stringstream highlightedss;
|
||||
// // loop until end index, accumulate token and complete highlighting
|
||||
// while(i < value.size()) {
|
||||
// if(offset_it != token_offsets.end()) {
|
||||
// if (i == offset_it->first) {
|
||||
// highlightedss << highlight_start_tag;
|
||||
// // do prefix highlighting for non-dropped last token
|
||||
// size_t token_len = (i == prefix_token_start_index && token_offsets.size() == facet_query_num_tokens) ?
|
||||
// facet_query_last_token.size() :
|
||||
// (offset_it->second - i + 1);
|
||||
// if(i == prefix_token_start_index && token_offsets.size() == facet_query_num_tokens) {
|
||||
// token_len = std::min((offset_it->second - i + 1), facet_query_last_token.size());
|
||||
// } else {
|
||||
// token_len = (offset_it->second - i + 1);
|
||||
// }
|
||||
// for(size_t j = 0; j < token_len; j++) {
|
||||
// highlightedss << value[i + j];
|
||||
// }
|
||||
// highlightedss << highlight_end_tag;
|
||||
// offset_it++;
|
||||
// i += token_len;
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
// highlightedss << value[i];
|
||||
// i++;
|
||||
// }
|
||||
//facet_value_t facet_value = {value, highlightedss.str(), facet_count.count};
|
||||
facet_value_t facet_value = { it.first, std::string(), it.second};
|
||||
facet_values.emplace_back(facet_value);
|
||||
}
|
||||
|
||||
std::stable_sort(facet_values.begin(), facet_values.end(), Collection::facet_count_str_compare);
|
||||
@ -2066,7 +2061,7 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
facet_result["stats"]["avg"] = (a_facet.stats.fvsum / a_facet.stats.fvcount);
|
||||
}
|
||||
|
||||
facet_result["stats"]["total_values"] = facet_hash_counts.size();
|
||||
facet_result["stats"]["total_values"] = facet_values.size();
|
||||
result["facet_counts"].push_back(facet_result);
|
||||
}
|
||||
|
||||
@ -4659,9 +4654,9 @@ Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector
|
||||
|
||||
for(const auto& tup : tupVec){
|
||||
|
||||
int64_t lower_range = std::get<0>(tup);
|
||||
int64_t upper_range = std::get<1>(tup);
|
||||
std::string range_val = std::get<2>(tup);
|
||||
const std::string& lower_range = std::to_string(std::get<0>(tup));
|
||||
const std::string& upper_range = std::to_string(std::get<1>(tup));
|
||||
const std::string& range_val = std::get<2>(tup);
|
||||
//check if ranges are continous or not
|
||||
if((!range_map.empty()) && (range_map.find(lower_range)== range_map.end())){
|
||||
std::string error = "Ranges in range facet syntax should be continous.";
|
||||
|
@ -51,25 +51,6 @@ void facet_index_t::insert(const std::string& field, const std::string& value, u
|
||||
}
|
||||
}
|
||||
|
||||
size_t facet_index_t::get(const std::string& field,
|
||||
std::map<std::string,std::vector<uint32_t>>& result_ids) {
|
||||
|
||||
const auto& facet_field_it = facet_field_map.find(field);
|
||||
if(facet_field_it == facet_field_map.end()) {
|
||||
return 0;
|
||||
}
|
||||
auto& facet_index_map = facet_field_it->second.facet_index_map;
|
||||
|
||||
for(auto it = facet_index_map.begin(); it != facet_index_map.end(); ++it) {
|
||||
auto ids = ids_t::uncompress(it.value());
|
||||
for(auto i = 0; i < ids_t::num_ids(ids); ++i) {
|
||||
result_ids[it.key()].emplace_back(ids[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return result_ids.size();
|
||||
}
|
||||
|
||||
bool facet_index_t::contains(const std::string& field) {
|
||||
|
||||
const auto& facet_field_it = facet_field_map.find(field);
|
||||
@ -77,38 +58,14 @@ bool facet_index_t::contains(const std::string& field) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// auto& facet_index_map = facet_field_it->second.facet_index_map;
|
||||
// LOG(INFO) << "Size of facet_field " << field << " " << facet_index_map.size();
|
||||
|
||||
// for(auto it = facet_index_map.begin(); it != facet_index_map.end(); ++it) {
|
||||
// LOG (INFO) << "facet_value " << it.key() << " with ids as follow";
|
||||
|
||||
// auto ids = ids_t::uncompress(it.value());
|
||||
// for(auto i = 0; i < ids_t::num_ids(ids); ++i) {
|
||||
// LOG(INFO) << ids[i];
|
||||
// }
|
||||
// }
|
||||
return true;
|
||||
}
|
||||
|
||||
void facet_index_t::erase(const std::string& field) {
|
||||
|
||||
const auto& facet_field_it = facet_field_map.find(field);
|
||||
if(facet_field_it == facet_field_map.end()) {
|
||||
return;
|
||||
const auto it = facet_field_map.find(field);
|
||||
if(it != facet_field_map.end()) {
|
||||
facet_field_map.erase(field);
|
||||
}
|
||||
|
||||
auto& facet_index_map = facet_field_it->second.facet_index_map;
|
||||
|
||||
for(auto it = facet_index_map.begin(); it != facet_index_map.end(); ++it) {
|
||||
ids_t::destroy_list(it.value());
|
||||
}
|
||||
|
||||
facet_index_map.clear();
|
||||
|
||||
facet_field_it->second.counter_list.clear();
|
||||
|
||||
facet_field_map.erase(field);
|
||||
}
|
||||
|
||||
size_t facet_index_t::size() {
|
||||
@ -132,39 +89,68 @@ int facet_index_t::intersect(const std::string& field, const uint32_t* result_id
|
||||
// LOG (INFO) << "facet_index_map size " << facet_index_map.size()
|
||||
// << " , counter_list size " << counter_list.size();
|
||||
|
||||
auto counter_list_it = counter_list.begin();
|
||||
int facet_count = 0;
|
||||
|
||||
std::vector<uint32_t> id_list;
|
||||
const auto max_facets = std::min((int)counter_list.size(), max_facet_count);
|
||||
while(facet_count < max_facets) {
|
||||
for(const auto& counter_list_it : counter_list) {
|
||||
//LOG (INFO) << "checking ids in facet_value " << counter_list_it->facet_value
|
||||
// << " having total count " << counter_list_it->count;
|
||||
|
||||
auto ids = facet_index_map.at(counter_list_it->facet_value);
|
||||
auto id_list = ids_t::uncompress(ids);
|
||||
auto ids = facet_index_map.at(counter_list_it.facet_value);
|
||||
ids_t::uncompress(ids, id_list);
|
||||
const auto ids_len = id_list.size();
|
||||
int count = 0;
|
||||
|
||||
for(int i = 0; i < result_ids_len; ++i) {
|
||||
if(std::binary_search(id_list, id_list + ids_t::num_ids(id_list), result_ids[i])) {
|
||||
if(std::binary_search(id_list.begin(), id_list.end(), result_ids[i])) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
if(count) {
|
||||
//LOG (INFO) << "fount count " << count << " for facet " << counter_list_it->facet_value;
|
||||
found[counter_list_it->facet_value] += count;
|
||||
found[counter_list_it.facet_value] = count;
|
||||
|
||||
if(found.size() == max_facets) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
++facet_count;
|
||||
++counter_list_it;
|
||||
id_list.clear();
|
||||
}
|
||||
|
||||
|
||||
return found.size();
|
||||
}
|
||||
|
||||
facet_index_t::~facet_index_t() {
|
||||
for(auto it = facet_field_map.begin(); it != facet_field_map.end(); ++it) {
|
||||
erase(it->first);
|
||||
int facet_index_t::get_facet(const std::string& field, const std::vector<std::string>& searched_tokens,
|
||||
std::vector<std::string>& facets) {
|
||||
|
||||
const auto& facet_field_it = facet_field_map.find(field);
|
||||
|
||||
if(facet_field_it == facet_field_map.end()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto facet_index_map = facet_field_it->second.facet_index_map;
|
||||
|
||||
for(const auto& token : searched_tokens) {
|
||||
auto token_string = token;
|
||||
std::transform(token_string.begin(), token_string.end(), token_string.begin(), ::tolower);
|
||||
|
||||
for(auto facet_index_map_it = facet_index_map.begin();
|
||||
facet_index_map_it != facet_index_map.end(); ++facet_index_map_it) {
|
||||
|
||||
auto facet_string = facet_index_map_it.key();
|
||||
std::transform(facet_string.begin(), facet_string.end(), facet_string.begin(), ::tolower);
|
||||
|
||||
if(facet_string.find(token_string) != std::string::npos) {
|
||||
facets.emplace_back(facet_index_map_it.key());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return facets.size();
|
||||
}
|
||||
|
||||
facet_index_t::~facet_index_t() {
|
||||
facet_field_map.clear();
|
||||
}
|
||||
|
||||
|
222
src/index.cpp
222
src/index.cpp
@ -62,6 +62,8 @@ Index::Index(const std::string& name, const uint32_t collection_id, const Store*
|
||||
search_schema(search_schema),
|
||||
seq_ids(new id_list_t(256)), symbols_to_index(symbols_to_index), token_separators(token_separators) {
|
||||
|
||||
facet_index_v4 = new facet_index_t();
|
||||
|
||||
for(const auto& a_field: search_schema) {
|
||||
if(!a_field.index) {
|
||||
continue;
|
||||
@ -102,9 +104,6 @@ Index::Index(const std::string& name, const uint32_t collection_id, const Store*
|
||||
|
||||
if(a_field.facet) {
|
||||
//initialize_facet_indexes(a_field);
|
||||
if(facet_index_v4 == nullptr) {
|
||||
facet_index_v4 = new facet_index_t();
|
||||
}
|
||||
}
|
||||
|
||||
// initialize for non-string facet fields
|
||||
@ -1173,9 +1172,9 @@ void Index::initialize_facet_indexes(const field& facet_field) {
|
||||
// }
|
||||
}
|
||||
|
||||
void Index::compute_facet_stats(facet &a_facet, uint64_t raw_value, const std::string & field_type) {
|
||||
void Index::compute_facet_stats(facet &a_facet, std::string raw_value, const std::string & field_type) {
|
||||
if(field_type == field_types::INT32 || field_type == field_types::INT32_ARRAY) {
|
||||
int32_t val = raw_value;
|
||||
int32_t val = std::stoi(raw_value);
|
||||
if (val < a_facet.stats.fvmin) {
|
||||
a_facet.stats.fvmin = val;
|
||||
}
|
||||
@ -1185,7 +1184,7 @@ void Index::compute_facet_stats(facet &a_facet, uint64_t raw_value, const std::s
|
||||
a_facet.stats.fvsum += val;
|
||||
a_facet.stats.fvcount++;
|
||||
} else if(field_type == field_types::INT64 || field_type == field_types::INT64_ARRAY) {
|
||||
int64_t val = raw_value;
|
||||
int64_t val = std::stol(raw_value);
|
||||
if(val < a_facet.stats.fvmin) {
|
||||
a_facet.stats.fvmin = val;
|
||||
}
|
||||
@ -1195,7 +1194,7 @@ void Index::compute_facet_stats(facet &a_facet, uint64_t raw_value, const std::s
|
||||
a_facet.stats.fvsum += val;
|
||||
a_facet.stats.fvcount++;
|
||||
} else if(field_type == field_types::FLOAT || field_type == field_types::FLOAT_ARRAY) {
|
||||
float val = reinterpret_cast<float&>(raw_value);
|
||||
float val = std::stof(raw_value);
|
||||
if(val < a_facet.stats.fvmin) {
|
||||
a_facet.stats.fvmin = val;
|
||||
}
|
||||
@ -1219,7 +1218,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
const auto& facet_field = facet_infos[findex].facet_field;
|
||||
const bool use_facet_query = facet_infos[findex].use_facet_query;
|
||||
//const auto& fquery_hashes = facet_infos[findex].hashes;
|
||||
const auto& fquery_doc_id_tokens = facet_infos[findex].doc_id_tokens;
|
||||
const auto& fquery_facet_tokens = facet_infos[findex].facet_tokens;
|
||||
const bool should_compute_stats = facet_infos[findex].should_compute_stats;
|
||||
|
||||
auto sort_index_it = sort_index.find(a_facet.field_name);
|
||||
@ -1232,9 +1231,41 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
// size_t facet_hash_count = 1;
|
||||
// const auto& field_facet_mapping_it = facet_index_v3.find(a_facet.field_name);
|
||||
// const auto& field_single_val_facet_mapping_it = single_val_facet_index_v3.find(a_facet.field_name);
|
||||
std::map<std::string, uint32_t> facet_results;
|
||||
facet_index_v4->intersect(a_facet.field_name, result_ids,
|
||||
results_size, max_facet_count, facet_results[a_facet.field_name]);
|
||||
results_size, max_facet_count, facet_results);
|
||||
//LOG(INFO) << "facet_results size " << facet_results.size();
|
||||
|
||||
for(const auto& kv : facet_results) {
|
||||
//range facet processing
|
||||
if(a_facet.is_range_query) {
|
||||
const auto doc_val = kv.first;
|
||||
std::pair<std::string, std::string> range_pair {};
|
||||
if(a_facet.get_range(doc_val, range_pair)) {
|
||||
const auto& range_id = range_pair.first;
|
||||
facet_count_t& facet_count = a_facet.result_map[range_id];
|
||||
facet_count.count = kv.second;
|
||||
}
|
||||
} else if(use_facet_query) {
|
||||
if (fquery_facet_tokens.find(kv.first) != fquery_facet_tokens.end()) {
|
||||
a_facet.facet_tokens[kv.first] = fquery_facet_tokens.at(kv.first);
|
||||
|
||||
facet_count_t& facet_count = a_facet.result_map[kv.first];
|
||||
facet_count.count = kv.second;
|
||||
}
|
||||
} else {
|
||||
facet_count_t& facet_count = a_facet.result_map[kv.first];
|
||||
facet_count.count = kv.second;
|
||||
}
|
||||
|
||||
if(should_compute_stats) {
|
||||
//LOG(INFO) << "Computing facet stas for facet " << a_facet.field_name;
|
||||
for(int i = 0; i < kv.second; ++i) {
|
||||
compute_facet_stats(a_facet, kv.first, facet_field.type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for(size_t i = 0; i < results_size; i++) {
|
||||
// // if sampling is enabled, we will skip a portion of the results to speed up things
|
||||
@ -1266,7 +1297,6 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
|
||||
// const uint64_t distinct_id = group_limit ? get_distinct_id(group_by_fields, doc_seq_id) : 0;
|
||||
// //for(size_t j = 0; j < facet_hash_count; j++) {
|
||||
// for(size_t j = 0; j < found_doc_seq_ids.size(); j++) {
|
||||
// // if(facet_field.is_array()) {
|
||||
// // fhash = facet_map_it->second.hashes[j];
|
||||
// // }
|
||||
@ -1306,7 +1336,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
}
|
||||
//}
|
||||
}
|
||||
|
||||
void Index::aggregate_topster(Topster* agg_topster, Topster* index_topster) {
|
||||
@ -2399,18 +2429,22 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
while (it.valid()) {
|
||||
uint32_t seq_id = it.id();
|
||||
uint64_t distinct_id = seq_id;
|
||||
if (group_limit != 0) {
|
||||
distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// if (group_limit != 0) {
|
||||
// distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
// if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
|
||||
int64_t scores[3] = {0};
|
||||
scores[0] = seq_id;
|
||||
int64_t match_score_index = -1;
|
||||
|
||||
result_ids.push_back(seq_id);
|
||||
if(group_limit == 0) {
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
|
||||
topster->add(&kv);
|
||||
}
|
||||
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores, nullptr);
|
||||
int ret = topster->add(&kv);
|
||||
@ -2507,12 +2541,12 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
}
|
||||
|
||||
uint64_t distinct_id = seq_id;
|
||||
if (group_limit != 0) {
|
||||
distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// if (group_limit != 0) {
|
||||
// distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
// if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
|
||||
auto vec_dist_score = (field_vector_index->distance_type == cosine) ? std::abs(dist_label.first) :
|
||||
dist_label.first;
|
||||
@ -2530,9 +2564,9 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores, nullptr);
|
||||
int ret = topster->add(&kv);
|
||||
|
||||
if(group_limit != 0 && ret < 2) {
|
||||
groups_processed[distinct_id]++;
|
||||
}
|
||||
// if(group_limit != 0 && ret < 2) {
|
||||
// groups_processed[distinct_id]++;
|
||||
// }
|
||||
nearest_ids.push_back(seq_id);
|
||||
}
|
||||
|
||||
@ -2826,7 +2860,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
bool estimate_facets = (facet_sample_percent < 100 && all_result_ids_len > facet_sample_threshold);
|
||||
|
||||
if(!facets.empty()) {
|
||||
const size_t num_threads = std::min(concurrency, all_result_ids_len);
|
||||
//const size_t num_threads = std::min(concurrency, all_result_ids_len);
|
||||
const size_t num_threads = 1;
|
||||
const size_t window_size = (num_threads == 0) ? 0 :
|
||||
(all_result_ids_len + num_threads - 1) / num_threads; // rounds up
|
||||
size_t num_processed = 0;
|
||||
@ -2897,10 +2932,10 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
for(auto & facet_kv: this_facet.result_map) {
|
||||
if(group_limit) {
|
||||
// we have to add all group sets
|
||||
acc_facet.hash_groups[facet_kv.first].insert(
|
||||
this_facet.hash_groups[facet_kv.first].begin(),
|
||||
this_facet.hash_groups[facet_kv.first].end()
|
||||
);
|
||||
// acc_facet.hash_groups[facet_kv.first].insert(
|
||||
// this_facet.hash_groups[facet_kv.first].begin(),
|
||||
// this_facet.hash_groups[facet_kv.first].end()
|
||||
// );
|
||||
} else {
|
||||
size_t count = 0;
|
||||
if(acc_facet.result_map.count(facet_kv.first) == 0) {
|
||||
@ -2912,9 +2947,10 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
acc_facet.result_map[facet_kv.first].count = count;
|
||||
}
|
||||
|
||||
acc_facet.result_map[facet_kv.first].doc_id = facet_kv.second.doc_id;
|
||||
acc_facet.result_map[facet_kv.first].array_pos = facet_kv.second.array_pos;
|
||||
acc_facet.hash_tokens[facet_kv.first] = this_facet.hash_tokens[facet_kv.first];
|
||||
//acc_facet.result_map[facet_kv.first].doc_id = facet_kv.second.doc_id;
|
||||
//acc_facet.result_map[facet_kv.first].array_pos = facet_kv.second.array_pos;
|
||||
//acc_facet.hash_tokens[facet_kv.first] = this_facet.hash_tokens[facet_kv.first];
|
||||
acc_facet.facet_tokens[facet_kv.first] = this_facet.facet_tokens[facet_kv.first];
|
||||
}
|
||||
|
||||
if(this_facet.stats.fvcount != 0) {
|
||||
@ -2928,9 +2964,9 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
|
||||
for(auto & acc_facet: facets) {
|
||||
for(auto& facet_kv: acc_facet.result_map) {
|
||||
if(group_limit) {
|
||||
facet_kv.second.count = acc_facet.hash_groups[facet_kv.first].size();
|
||||
}
|
||||
// if(group_limit) {
|
||||
// facet_kv.second.count = acc_facet.hash_groups[facet_kv.first].size();
|
||||
// }
|
||||
|
||||
if(estimate_facets) {
|
||||
facet_kv.second.count = size_t(double(facet_kv.second.count) * (100.0f / facet_sample_percent));
|
||||
@ -2955,8 +2991,6 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
facet_infos, group_limit, group_by_fields, &included_ids_vec[0],
|
||||
included_ids_vec.size(), max_facet_values);
|
||||
|
||||
facet_index_v4->contains("tags");
|
||||
|
||||
all_result_ids_len += curated_topster->size;
|
||||
|
||||
delete [] all_result_ids;
|
||||
@ -3665,12 +3699,12 @@ void Index::search_across_fields(const std::vector<token_t>& query_tokens,
|
||||
}
|
||||
|
||||
uint64_t distinct_id = seq_id;
|
||||
if(group_limit != 0) {
|
||||
distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// if(group_limit != 0) {
|
||||
// distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
// if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
// return;
|
||||
// }
|
||||
// }
|
||||
|
||||
int64_t scores[3] = {0};
|
||||
int64_t match_score_index = -1;
|
||||
@ -4267,18 +4301,21 @@ void Index::do_infix_search(const size_t num_search_fields, const std::vector<se
|
||||
100, scores, match_score_index);
|
||||
|
||||
uint64_t distinct_id = seq_id;
|
||||
if(group_limit != 0) {
|
||||
distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
|
||||
int ret = actual_topster->add(&kv);
|
||||
if(group_limit != 0 && ret < 2) {
|
||||
groups_processed[distinct_id]++;
|
||||
// if(group_limit != 0) {
|
||||
// distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
// if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
if(group_limit == 0) {
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
|
||||
int ret = actual_topster->add(&kv);
|
||||
}
|
||||
|
||||
// if(group_limit != 0 && ret < 2) {
|
||||
// groups_processed[distinct_id]++;
|
||||
// }
|
||||
|
||||
|
||||
if(((i + 1) % (1 << 12)) == 0) {
|
||||
BREAK_CIRCUIT_BREAKER
|
||||
@ -4374,10 +4411,6 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
|
||||
// && (field_single_val_facet_mapping_it == single_val_facet_index_v3.end())) {
|
||||
// continue;
|
||||
// }
|
||||
std::map<std::string, std::vector<uint32_t>> found_doc_ids;
|
||||
if(facet_index_v4->get(a_facet.field_name, found_doc_ids) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
facet_infos[findex].use_facet_query = false;
|
||||
|
||||
@ -4442,6 +4475,16 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
|
||||
for(auto leaf: searched_query) {
|
||||
posting_lists.push_back(leaf->values);
|
||||
std::string tok(reinterpret_cast<char*>(leaf->key), leaf->key_len - 1);
|
||||
|
||||
//convert again to boolean string to help search in facet_index map
|
||||
if (facet_field.is_bool()) {
|
||||
if (tok == "1") {
|
||||
tok = "true";
|
||||
} else if (tok == "0") {
|
||||
tok = "false";
|
||||
}
|
||||
}
|
||||
|
||||
searched_tokens.push_back(tok);
|
||||
//LOG(INFO) << "tok: " << tok;
|
||||
}
|
||||
@ -4468,6 +4511,7 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
|
||||
if(!id_matched) {
|
||||
continue;
|
||||
}
|
||||
//LOG(INFO) << "seq_id matched : " << seq_id;
|
||||
|
||||
// if(facet_field.is_array()) {
|
||||
// const auto doc_fvalues_it = field_facet_mapping_it->second[seq_id % ARRAY_FACET_DIM]->find(seq_id);
|
||||
@ -4501,19 +4545,18 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
|
||||
// facet_infos[findex].hashes.emplace(hash, searched_tokens);
|
||||
// }
|
||||
// }
|
||||
for(const auto& found_doc_it : found_doc_ids) {
|
||||
const auto& ids = found_doc_it.second;
|
||||
if(std::binary_search(ids.begin(), ids.end(), seq_id)){
|
||||
for(const auto& doc_id : ids) {
|
||||
if(facet_infos[findex].doc_id_tokens.count(doc_id) == 0) {
|
||||
facet_infos[findex].doc_id_tokens.emplace(doc_id, searched_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<std::string> matched_facets;
|
||||
if(facet_index_v4->get_facet(a_facet.field_name, searched_tokens, matched_facets)) {
|
||||
for(const auto& facet : matched_facets) {
|
||||
if(facet_infos[findex].facet_tokens.count(facet) == 0) {
|
||||
LOG(INFO) << "adding facet " << facet << " in facet_info";
|
||||
facet_infos[findex].facet_tokens.emplace(facet, searched_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
delete [] field_result_ids;
|
||||
}
|
||||
}
|
||||
@ -4621,20 +4664,20 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
|
||||
100, scores, match_score_index);
|
||||
|
||||
uint64_t distinct_id = seq_id;
|
||||
if(group_limit != 0) {
|
||||
distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
continue;
|
||||
}
|
||||
// if(group_limit != 0) {
|
||||
// distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
// if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
if(group_limit == 0) {
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
|
||||
int ret = topsters[thread_id]->add(&kv);
|
||||
}
|
||||
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
|
||||
int ret = topsters[thread_id]->add(&kv);
|
||||
|
||||
if(group_limit != 0 && ret < 2) {
|
||||
tgroups_processed[thread_id][distinct_id]++;
|
||||
}
|
||||
|
||||
|
||||
// if(group_limit != 0 && ret < 2) {
|
||||
// tgroups_processed[thread_id][distinct_id]++;
|
||||
// }
|
||||
if(check_for_circuit_break && ((i + 1) % (1 << 15)) == 0) {
|
||||
// check only once every 2^15 docs to reduce overhead
|
||||
BREAK_CIRCUIT_BREAKER
|
||||
@ -5220,11 +5263,16 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16
|
||||
|
||||
uint64_t distinct_id = seq_id;
|
||||
|
||||
if(group_limit != 0) {
|
||||
distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
}
|
||||
// if(group_limit != 0) {
|
||||
// distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
// groups_processed.emplace(distinct_id);
|
||||
// }
|
||||
|
||||
//LOG(INFO) << "Seq id: " << seq_id << ", match_score: " << match_score;
|
||||
if(group_limit == 0) {
|
||||
KV kv(query_index, seq_id, distinct_id, match_score_index, scores);
|
||||
topster->add(&kv);
|
||||
}
|
||||
KV kv(query_index, seq_id, distinct_id, match_score_index, scores);
|
||||
int ret = topster->add(&kv);
|
||||
if(group_limit != 0 && ret < 2) {
|
||||
@ -5239,7 +5287,6 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16
|
||||
uint64_t Index::get_distinct_id(const std::vector<std::string>& group_by_fields,
|
||||
const uint32_t seq_id) const {
|
||||
uint64_t distinct_id = 1; // some constant initial value
|
||||
std::hash<std::string> hasher;
|
||||
// calculate hash from group_by_fields
|
||||
for(const auto& field: group_by_fields) {
|
||||
// const auto& field_facet_mapping_it = facet_index_v3.find(field);
|
||||
@ -5277,9 +5324,6 @@ uint64_t Index::get_distinct_id(const std::vector<std::string>& group_by_fields,
|
||||
|
||||
// distinct_id = StringUtils::hash_combine(distinct_id, facet_hash);
|
||||
// }
|
||||
|
||||
const auto& hash = hasher(field);
|
||||
distinct_id = StringUtils::hash_combine(distinct_id, hash);
|
||||
}
|
||||
|
||||
return distinct_id;
|
||||
|
@ -103,11 +103,11 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
|
||||
ASSERT_STREQ("tags", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
|
||||
ASSERT_STREQ("gold", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_STREQ("bronze", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
|
||||
ASSERT_STREQ("silver", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
|
||||
ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][1]["count"]);
|
||||
ASSERT_STREQ("FINE PLATINUM", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
|
||||
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][1]["count"]);
|
||||
|
||||
// 2 facets, 1 text query with no filters
|
||||
facets.clear();
|
||||
@ -230,12 +230,12 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
|
||||
ASSERT_STREQ("age", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_STREQ("21", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("<mark>2</mark>1", results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("24", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("<mark>2</mark>4", results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][1]["count"]);
|
||||
ASSERT_STREQ("24", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("<mark>2</mark>4", results["facet_counts"][0]["counts"][1]["highlighted"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("21", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("<mark>2</mark>1", results["facet_counts"][0]["counts"][1]["highlighted"].get<std::string>().c_str());
|
||||
|
||||
// facet on a float field without query to check on stats
|
||||
results = coll_array_fields->search("*", query_fields, "", {"rating"}, sort_fields, {0}, 10, 1, FREQUENCY,
|
||||
@ -258,7 +258,7 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "rating: 7").get();
|
||||
|
||||
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_STREQ("rating", results["facet_counts"][0]["field_name"].get<std::string>().c_str());
|
||||
@ -278,7 +278,7 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
|
||||
results = coll_array_fields->search("*", query_fields, "", {"timestamps"}, sort_fields, {0}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "timestamps: 142189002").get();
|
||||
spp::sparse_hash_set<std::string>(), 10, "timestamps: 142189002").get();
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
|
||||
@ -607,7 +607,6 @@ TEST_F(CollectionFacetingTest, FacetCountsHighlighting) {
|
||||
ASSERT_STREQ("Cell Phone <mark>Acces</mark>sories", results["facet_counts"][0]["counts"][0]["highlighted"].get<std::string>().c_str());
|
||||
|
||||
// ensure that only the last token is treated as prefix search
|
||||
|
||||
coll1->remove("100");
|
||||
doc["categories"] = {"Cell Phones", "Cell Phone Accessories", "Cellophanes"};
|
||||
coll1->add(doc.dump());
|
||||
@ -616,6 +615,8 @@ TEST_F(CollectionFacetingTest, FacetCountsHighlighting) {
|
||||
token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "categories:cell ph").get();
|
||||
|
||||
LOG(INFO) << results.dump();
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
|
||||
@ -673,6 +674,7 @@ TEST_F(CollectionFacetingTest, FacetStatOnFloatFields) {
|
||||
1, FREQUENCY, {false});
|
||||
|
||||
auto results = res_op.get();
|
||||
LOG(INFO) << results.dump();
|
||||
|
||||
ASSERT_EQ(7, results["hits"].size());
|
||||
|
||||
@ -746,8 +748,8 @@ TEST_F(CollectionFacetingTest, FacetCountOnSimilarStrings) {
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
|
||||
ASSERT_STREQ("India in England", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("England in India", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("England in India", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("India in England", results["facet_counts"][0]["counts"][1]["value"].get<std::string>().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
@ -1645,6 +1647,7 @@ TEST_F(CollectionFacetingTest, FacetIndexRefactor) {
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(4, results["facet_counts"][0].size());
|
||||
ASSERT_EQ(4, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
|
||||
|
||||
ASSERT_STREQ("gold", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
|
@ -69,6 +69,8 @@ TEST_F(CollectionGroupingTest, GroupingBasics) {
|
||||
"", 10,
|
||||
{}, {}, {"size"}, 2).get();
|
||||
|
||||
LOG(INFO) << res.dump();
|
||||
|
||||
ASSERT_EQ(3, res["found"].get<size_t>());
|
||||
ASSERT_EQ(3, res["grouped_hits"].size());
|
||||
ASSERT_EQ(11, res["grouped_hits"][0]["group_key"][0].get<size_t>());
|
||||
|
Loading…
x
Reference in New Issue
Block a user