mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 22:33:27 +08:00
Facet estimation for large result sets.
This commit is contained in:
parent
e836af159f
commit
df8f6849fe
@ -408,7 +408,9 @@ public:
|
||||
const size_t facet_query_num_typos = 2,
|
||||
const size_t filter_curated_hits_option = 2,
|
||||
const bool prioritize_token_position = false,
|
||||
const std::string& vector_query_str = "") const;
|
||||
const std::string& vector_query_str = "",
|
||||
const size_t facet_sample_percent = 100,
|
||||
const size_t facet_sample_threshold = 0) const;
|
||||
|
||||
Option<bool> get_filter_ids(const std::string & simple_filter_query,
|
||||
std::vector<std::pair<size_t, uint32_t*>>& index_ids);
|
||||
|
@ -682,6 +682,8 @@ struct facet {
|
||||
|
||||
bool is_range_query;
|
||||
|
||||
bool sampled = false;
|
||||
|
||||
bool get_range(int64_t key, std::pair<int64_t, std::string>& range_pair)
|
||||
{
|
||||
if(facet_range_map.empty())
|
||||
|
@ -132,6 +132,8 @@ struct search_args {
|
||||
std::vector<std::vector<KV*>> override_result_kvs;
|
||||
|
||||
vector_query_t& vector_query;
|
||||
size_t facet_sample_percent;
|
||||
size_t facet_sample_threshold;
|
||||
|
||||
search_args(std::vector<query_tokens_t> field_query_tokens, std::vector<search_field_t> search_fields,
|
||||
filter_node_t* filter_tree_root, std::vector<facet>& facets,
|
||||
@ -145,7 +147,8 @@ struct search_args {
|
||||
size_t concurrency, size_t search_cutoff_ms,
|
||||
size_t min_len_1typo, size_t min_len_2typo, size_t max_candidates, const std::vector<enable_t>& infixes,
|
||||
const size_t max_extra_prefix, const size_t max_extra_suffix, const size_t facet_query_num_typos,
|
||||
const bool filter_curated_hits, const enable_t split_join_tokens, vector_query_t& vector_query) :
|
||||
const bool filter_curated_hits, const enable_t split_join_tokens, vector_query_t& vector_query,
|
||||
size_t facet_sample_percent, size_t facet_sample_threshold) :
|
||||
field_query_tokens(field_query_tokens),
|
||||
search_fields(search_fields), filter_tree_root(filter_tree_root), facets(facets),
|
||||
included_ids(included_ids), excluded_ids(excluded_ids), sort_fields_std(sort_fields_std),
|
||||
@ -159,7 +162,8 @@ struct search_args {
|
||||
min_len_1typo(min_len_1typo), min_len_2typo(min_len_2typo), max_candidates(max_candidates),
|
||||
infixes(infixes), max_extra_prefix(max_extra_prefix), max_extra_suffix(max_extra_suffix),
|
||||
facet_query_num_typos(facet_query_num_typos), filter_curated_hits(filter_curated_hits),
|
||||
split_join_tokens(split_join_tokens), vector_query(vector_query) {
|
||||
split_join_tokens(split_join_tokens), vector_query(vector_query),
|
||||
facet_sample_percent(facet_sample_percent), facet_sample_threshold(facet_sample_threshold) {
|
||||
|
||||
const size_t topster_size = std::max((size_t)1, max_hits); // needs to be atleast 1 since scoring is mandatory
|
||||
topster = new Topster(topster_size, group_limit);
|
||||
@ -357,6 +361,7 @@ private:
|
||||
void log_leaves(int cost, const std::string &token, const std::vector<art_leaf *> &leaves) const;
|
||||
|
||||
void do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
bool estimate_facets, size_t facet_sample_percent,
|
||||
const std::vector<facet_info_t>& facet_infos,
|
||||
size_t group_limit, const std::vector<std::string>& group_by_fields,
|
||||
const uint32_t* result_ids, size_t results_size) const;
|
||||
@ -645,7 +650,7 @@ public:
|
||||
size_t max_candidates, const std::vector<enable_t>& infixes, const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix, const size_t facet_query_num_typos,
|
||||
const bool filter_curated_hits, enable_t split_join_tokens,
|
||||
const vector_query_t& vector_query) const;
|
||||
const vector_query_t& vector_query, size_t facet_sample_percent, size_t facet_sample_threshold) const;
|
||||
|
||||
void remove_field(uint32_t seq_id, const nlohmann::json& document, const std::string& field_name);
|
||||
|
||||
|
@ -866,7 +866,9 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
const size_t facet_query_num_typos,
|
||||
const size_t filter_curated_hits_option,
|
||||
const bool prioritize_token_position,
|
||||
const std::string& vector_query_str) const {
|
||||
const std::string& vector_query_str,
|
||||
const size_t facet_sample_percent,
|
||||
const size_t facet_sample_threshold) const {
|
||||
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
@ -911,6 +913,10 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
}
|
||||
}
|
||||
|
||||
if(facet_sample_percent > 100) {
|
||||
return Option<nlohmann::json>(400, "Value of `facet_sample_percent` must be less than 100.");
|
||||
}
|
||||
|
||||
if(raw_group_by_fields.empty()) {
|
||||
group_limit = 0;
|
||||
}
|
||||
@ -1302,7 +1308,8 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
search_stop_millis,
|
||||
min_len_1typo, min_len_2typo, max_candidates, infixes,
|
||||
max_extra_prefix, max_extra_suffix, facet_query_num_typos,
|
||||
filter_curated_hits, split_join_tokens, vector_query);
|
||||
filter_curated_hits, split_join_tokens, vector_query,
|
||||
facet_sample_percent, facet_sample_threshold);
|
||||
|
||||
index->run_search(search_params);
|
||||
|
||||
@ -1319,12 +1326,6 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
|
||||
// for grouping we have to aggregate group set sizes to a count value
|
||||
if(group_limit) {
|
||||
for(auto& acc_facet: facets) {
|
||||
for(auto& facet_kv: acc_facet.result_map) {
|
||||
facet_kv.second.count = acc_facet.hash_groups[facet_kv.first].size();
|
||||
}
|
||||
}
|
||||
|
||||
total_found = search_params->groups_processed.size() + override_result_kvs.size();
|
||||
} else {
|
||||
total_found = search_params->all_result_ids_len;
|
||||
@ -1430,8 +1431,6 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
// handle which fields have to be highlighted
|
||||
|
||||
std::vector<highlight_field_t> highlight_items;
|
||||
bool has_atleast_one_fully_highlighted_field = false;
|
||||
|
||||
std::vector<std::string> highlight_field_names;
|
||||
StringUtils::split(highlight_fields, highlight_field_names, ",");
|
||||
|
||||
@ -1442,12 +1441,6 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
process_highlight_fields(weighted_search_fields, raw_search_fields, include_fields_full, exclude_fields_full,
|
||||
highlight_field_names, highlight_full_field_names, infixes, q_tokens,
|
||||
search_params->qtoken_set, highlight_items);
|
||||
|
||||
for(auto& highlight_item: highlight_items) {
|
||||
if(highlight_item.fully_highlighted) {
|
||||
has_atleast_one_fully_highlighted_field = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nlohmann::json result = nlohmann::json::object();
|
||||
@ -1657,6 +1650,7 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
|
||||
for(facet & a_facet: facets) {
|
||||
nlohmann::json facet_result = nlohmann::json::object();
|
||||
facet_result["field_name"] = a_facet.field_name;
|
||||
facet_result["sampled"] = a_facet.sampled;
|
||||
facet_result["counts"] = nlohmann::json::array();
|
||||
|
||||
std::vector<facet_value_t> facet_values;
|
||||
|
@ -695,6 +695,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
const char *EXHAUSTIVE_SEARCH = "exhaustive_search";
|
||||
const char *SPLIT_JOIN_TOKENS = "split_join_tokens";
|
||||
|
||||
const char *FACET_SAMPLE_PERCENT = "facet_sample_percent";
|
||||
const char *FACET_SAMPLE_THRESHOLD = "facet_sample_threshold";
|
||||
|
||||
// enrich params with values from embedded params
|
||||
for(auto& item: embedded_params.items()) {
|
||||
if(item.key() == "expires_at") {
|
||||
@ -720,7 +723,6 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
|
||||
// end check for mandatory params
|
||||
|
||||
|
||||
const std::string& raw_query = req_params[QUERY];
|
||||
std::vector<uint32_t> num_typos = {2};
|
||||
size_t min_len_1typo = 4;
|
||||
@ -772,6 +774,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
size_t max_extra_prefix = INT16_MAX;
|
||||
size_t max_extra_suffix = INT16_MAX;
|
||||
|
||||
size_t facet_sample_percent = 100;
|
||||
size_t facet_sample_threshold = 0;
|
||||
|
||||
std::unordered_map<std::string, size_t*> unsigned_int_values = {
|
||||
{MIN_LEN_1TYPO, &min_len_1typo},
|
||||
{MIN_LEN_2TYPO, &min_len_2typo},
|
||||
@ -790,6 +795,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
{MAX_CANDIDATES, &max_candidates},
|
||||
{FACET_QUERY_NUM_TYPOS, &facet_query_num_typos},
|
||||
{FILTER_CURATED_HITS, &filter_curated_hits_option},
|
||||
{FACET_SAMPLE_PERCENT, &facet_sample_percent},
|
||||
{FACET_SAMPLE_THRESHOLD, &facet_sample_threshold},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::string*> str_values = {
|
||||
@ -982,7 +989,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
facet_query_num_typos,
|
||||
filter_curated_hits_option,
|
||||
prioritize_token_position,
|
||||
vector_query
|
||||
vector_query,
|
||||
facet_sample_percent,
|
||||
facet_sample_threshold
|
||||
);
|
||||
|
||||
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <chrono>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <random>
|
||||
#include <array_utils.h>
|
||||
#include <match_score.h>
|
||||
#include <string_utils.h>
|
||||
@ -1228,6 +1229,7 @@ void Index::compute_facet_stats(facet &a_facet, uint64_t raw_value, const std::s
|
||||
}
|
||||
|
||||
void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
bool estimate_facets, size_t facet_sample_percent,
|
||||
const std::vector<facet_info_t>& facet_infos,
|
||||
const size_t group_limit, const std::vector<std::string>& group_by_fields,
|
||||
const uint32_t* result_ids, size_t results_size) const {
|
||||
@ -1247,8 +1249,21 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
|
||||
const auto& field_facet_mapping = field_facet_mapping_it->second;
|
||||
|
||||
// used for sampling facets (if enabled)
|
||||
std::mt19937 gen(137723); // use constant seed to make sure that counts don't jump around
|
||||
std::uniform_int_distribution<> distr(1, 100); // 1 to 100 inclusive
|
||||
|
||||
for(size_t i = 0; i < results_size; i++) {
|
||||
uint32_t doc_seq_id = result_ids[i];
|
||||
|
||||
// if sampling is enabled, we will skip a portion of the results to speed up things
|
||||
if(estimate_facets) {
|
||||
size_t num = distr(gen);
|
||||
if(num > facet_sample_percent) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& facet_hashes_it = field_facet_mapping[doc_seq_id % ARRAY_FACET_DIM]->find(doc_seq_id);
|
||||
|
||||
if(facet_hashes_it == field_facet_mapping[doc_seq_id % ARRAY_FACET_DIM]->end()) {
|
||||
@ -1265,7 +1280,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
compute_facet_stats(a_facet, fhash, facet_field.type);
|
||||
}
|
||||
|
||||
if(a_facet.is_range_query){
|
||||
if(a_facet.is_range_query) {
|
||||
auto sort_index_it = sort_index.find(a_facet.field_name);
|
||||
|
||||
if(sort_index_it != sort_index.end()){
|
||||
@ -1285,8 +1300,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(!use_facet_query || fquery_hashes.find(fhash) != fquery_hashes.end()) {
|
||||
} else if(!use_facet_query || fquery_hashes.find(fhash) != fquery_hashes.end()) {
|
||||
facet_count_t& facet_count = a_facet.result_map[fhash];
|
||||
|
||||
//LOG(INFO) << "field: " << a_facet.field_name << ", doc id: " << doc_seq_id << ", hash: " << fhash;
|
||||
@ -1980,7 +1994,9 @@ void Index::run_search(search_args* search_params) {
|
||||
search_params->facet_query_num_typos,
|
||||
search_params->filter_curated_hits,
|
||||
search_params->split_join_tokens,
|
||||
search_params->vector_query);
|
||||
search_params->vector_query,
|
||||
search_params->facet_sample_percent,
|
||||
search_params->facet_sample_threshold);
|
||||
}
|
||||
|
||||
void Index::collate_included_ids(const std::vector<token_t>& q_included_tokens,
|
||||
@ -2430,7 +2446,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
size_t max_candidates, const std::vector<enable_t>& infixes, const size_t max_extra_prefix,
|
||||
const size_t max_extra_suffix, const size_t facet_query_num_typos,
|
||||
const bool filter_curated_hits, const enable_t split_join_tokens,
|
||||
const vector_query_t& vector_query) const {
|
||||
const vector_query_t& vector_query,
|
||||
size_t facet_sample_percent, size_t facet_sample_threshold) const {
|
||||
|
||||
// process the filters
|
||||
|
||||
@ -2784,6 +2801,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
delete [] exclude_token_ids;
|
||||
delete [] excluded_result_ids;
|
||||
|
||||
bool estimate_facets = (facet_sample_percent < 100 && all_result_ids_len > facet_sample_threshold);
|
||||
|
||||
if(!facets.empty()) {
|
||||
const size_t num_threads = std::min(concurrency, all_result_ids_len);
|
||||
const size_t window_size = (num_threads == 0) ? 0 :
|
||||
@ -2820,9 +2839,11 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
|
||||
thread_pool->enqueue([this, thread_id, &facet_batches, &facet_query, group_limit, group_by_fields,
|
||||
batch_result_ids, batch_res_len, &facet_infos,
|
||||
estimate_facets, facet_sample_percent,
|
||||
&num_processed, &m_process, &cv_process]() {
|
||||
auto fq = facet_query;
|
||||
do_facets(facet_batches[thread_id], fq, facet_infos, group_limit, group_by_fields,
|
||||
do_facets(facet_batches[thread_id], fq, estimate_facets, facet_sample_percent,
|
||||
facet_infos, group_limit, group_by_fields,
|
||||
batch_result_ids, batch_res_len);
|
||||
std::unique_lock<std::mutex> lock(m_process);
|
||||
num_processed++;
|
||||
@ -2844,8 +2865,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
if(group_limit) {
|
||||
// we have to add all group sets
|
||||
acc_facet.hash_groups[facet_kv.first].insert(
|
||||
this_facet.hash_groups[facet_kv.first].begin(),
|
||||
this_facet.hash_groups[facet_kv.first].end()
|
||||
this_facet.hash_groups[facet_kv.first].begin(),
|
||||
this_facet.hash_groups[facet_kv.first].end()
|
||||
);
|
||||
} else {
|
||||
size_t count = 0;
|
||||
@ -2872,6 +2893,22 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
}
|
||||
}
|
||||
|
||||
for(auto & acc_facet: facets) {
|
||||
for(auto& facet_kv: acc_facet.result_map) {
|
||||
if(group_limit) {
|
||||
facet_kv.second.count = acc_facet.hash_groups[facet_kv.first].size();
|
||||
}
|
||||
|
||||
if(estimate_facets) {
|
||||
facet_kv.second.count = size_t(double(facet_kv.second.count) * (100.0f / facet_sample_percent));
|
||||
}
|
||||
}
|
||||
|
||||
if(estimate_facets) {
|
||||
acc_facet.sampled = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*long long int timeMillisF = std::chrono::duration_cast<std::chrono::milliseconds>(
|
||||
std::chrono::high_resolution_clock::now() - beginF).count();
|
||||
LOG(INFO) << "Time for faceting: " << timeMillisF;*/
|
||||
@ -2880,7 +2917,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
std::vector<facet_info_t> facet_infos(facets.size());
|
||||
compute_facet_infos(facets, facet_query, facet_query_num_typos,
|
||||
&included_ids_vec[0], included_ids_vec.size(), group_by_fields, max_candidates, facet_infos);
|
||||
do_facets(facets, facet_query, facet_infos, group_limit, group_by_fields, &included_ids_vec[0], included_ids_vec.size());
|
||||
do_facets(facets, facet_query, estimate_facets, facet_sample_percent,
|
||||
facet_infos, group_limit, group_by_fields, &included_ids_vec[0], included_ids_vec.size());
|
||||
|
||||
all_result_ids_len += curated_topster->size;
|
||||
|
||||
|
@ -73,8 +73,9 @@ TEST_F(CollectionFacetingTest, FacetCounts) {
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(3, results["facet_counts"][0].size());
|
||||
ASSERT_EQ(4, results["facet_counts"][0].size());
|
||||
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
|
||||
ASSERT_EQ(false, results["facet_counts"][0]["sampled"].get<bool>());
|
||||
ASSERT_EQ(4, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["stats"].size());
|
||||
ASSERT_EQ(4, results["facet_counts"][0]["stats"]["total_values"].get<size_t>());
|
||||
@ -981,7 +982,6 @@ TEST_F(CollectionFacetingTest, FacetByNestedIntField) {
|
||||
}
|
||||
|
||||
TEST_F(CollectionFacetingTest, FacetParseTest){
|
||||
|
||||
std::vector<field> fields = {
|
||||
field("score", field_types::INT32, true),
|
||||
field("grade", field_types::INT32, true),
|
||||
@ -1008,8 +1008,6 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
|
||||
ASSERT_TRUE(range_facets[1].is_range_query);
|
||||
ASSERT_GT(range_facets[1].facet_range_map.size(), 0);
|
||||
|
||||
|
||||
|
||||
std::vector<std::string> normal_facet_fields {
|
||||
"score",
|
||||
"grade"
|
||||
@ -1022,9 +1020,7 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
|
||||
|
||||
ASSERT_STREQ("score", normal_facets[0].field_name.c_str());
|
||||
ASSERT_STREQ("grade", normal_facets[1].field_name.c_str());
|
||||
|
||||
|
||||
|
||||
std::vector<std::string> mixed_facet_fields {
|
||||
"score",
|
||||
"grade(A:[80, 100], B:[60, 80], C:[40, 60])",
|
||||
@ -1044,3 +1040,304 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
|
||||
|
||||
ASSERT_STREQ("rank", mixed_facets[2].field_name.c_str());
|
||||
}
|
||||
|
||||
|
||||
TEST_F(CollectionFacetingTest, RangeFacetTest) {
|
||||
std::vector<field> fields = {field("place", field_types::STRING, false),
|
||||
field("state", field_types::STRING, false),
|
||||
field("visitors", field_types::INT32, true),};
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "", {}, {}
|
||||
).get();
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["place"] = "Mysore Palace";
|
||||
doc1["state"] = "Karnataka";
|
||||
doc1["visitors"] = 235486;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["place"] = "Hampi";
|
||||
doc2["state"] = "Karnataka";
|
||||
doc2["visitors"] = 187654;
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["place"] = "Mahabalipuram";
|
||||
doc3["state"] = "TamilNadu";
|
||||
doc3["visitors"] = 174684;
|
||||
|
||||
nlohmann::json doc4;
|
||||
doc4["id"] = "3";
|
||||
doc4["place"] = "Meenakshi Amman Temple";
|
||||
doc4["state"] = "TamilNadu";
|
||||
doc4["visitors"] = 246676;
|
||||
|
||||
nlohmann::json doc5;
|
||||
doc5["id"] = "4";
|
||||
doc5["place"] = "Staue of Unity";
|
||||
doc5["state"] = "Gujarat";
|
||||
doc5["visitors"] = 345878;
|
||||
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc4.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc5.dump()).ok());
|
||||
|
||||
auto results = coll1->search("Karnataka", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true).get();
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_STREQ("Busy", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
|
||||
auto results2 = coll1->search("Gujarat", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true).get();
|
||||
ASSERT_EQ(1, results2["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(1, results2["facet_counts"][0]["counts"][0]["count"].get<std::size_t>());
|
||||
ASSERT_STREQ("VeryBusy", results2["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
ASSERT_TRUE(results2["facet_counts"][0]["counts"][1]["value"] == nullptr);
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFacetingTest, RangeFacetContinuity) {
|
||||
std::vector<field> fields = {field("place", field_types::STRING, false),
|
||||
field("state", field_types::STRING, false),
|
||||
field("visitors", field_types::INT32, true),};
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "", {}, {}
|
||||
).get();
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["place"] = "Mysore Palace";
|
||||
doc1["state"] = "Karnataka";
|
||||
doc1["visitors"] = 235486;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["place"] = "Hampi";
|
||||
doc2["state"] = "Karnataka";
|
||||
doc2["visitors"] = 187654;
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["place"] = "Mahabalipuram";
|
||||
doc3["state"] = "TamilNadu";
|
||||
doc3["visitors"] = 174684;
|
||||
|
||||
nlohmann::json doc4;
|
||||
doc4["id"] = "3";
|
||||
doc4["place"] = "Meenakshi Amman Temple";
|
||||
doc4["state"] = "TamilNadu";
|
||||
doc4["visitors"] = 246676;
|
||||
|
||||
nlohmann::json doc5;
|
||||
doc5["id"] = "4";
|
||||
doc5["place"] = "Staue of Unity";
|
||||
doc5["state"] = "Gujarat";
|
||||
doc5["visitors"] = 345878;
|
||||
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc4.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc5.dump()).ok());
|
||||
|
||||
auto results = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[200001, 500000])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Ranges in range facet syntax should be continous.", results.error().c_str());
|
||||
|
||||
auto results2 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[199999, 500000])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Ranges in range facet syntax should be continous.", results2.error().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFacetingTest, RangeFacetTypo) {
|
||||
std::vector<field> fields = {field("place", field_types::STRING, false),
|
||||
field("state", field_types::STRING, false),
|
||||
field("visitors", field_types::INT32, true),};
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "", {}, {}
|
||||
).get();
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["place"] = "Mysore Palace";
|
||||
doc1["state"] = "Karnataka";
|
||||
doc1["visitors"] = 235486;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["place"] = "Hampi";
|
||||
doc2["state"] = "Karnataka";
|
||||
doc2["visitors"] = 187654;
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["place"] = "Mahabalipuram";
|
||||
doc3["state"] = "TamilNadu";
|
||||
doc3["visitors"] = 174684;
|
||||
|
||||
nlohmann::json doc4;
|
||||
doc4["id"] = "3";
|
||||
doc4["place"] = "Meenakshi Amman Temple";
|
||||
doc4["state"] = "TamilNadu";
|
||||
doc4["visitors"] = 246676;
|
||||
|
||||
nlohmann::json doc5;
|
||||
doc5["id"] = "4";
|
||||
doc5["place"] = "Staue of Unity";
|
||||
doc5["state"] = "Gujarat";
|
||||
doc5["visitors"] = 345878;
|
||||
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc4.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc5.dump()).ok());
|
||||
|
||||
auto results = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000)"}, //missing ']' at end
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Error splitting the range string.", results.error().c_str());
|
||||
|
||||
auto results2 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:200000, 500000])"}, //missing '[' in second range
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Error splitting the range string.", results2.error().c_str());
|
||||
|
||||
auto results3 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000] VeryBusy:[200000, 500000])"}, //missing ',' between ranges
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Error splitting the range string.", results3.error().c_str());
|
||||
|
||||
auto results4 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0 200000], VeryBusy:[200000, 500000])"}, //missing ',' between first ranges values
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Range String range pattern not matched.", results4.error().c_str());
|
||||
|
||||
auto results5 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000 VeryBusy:200000, 500000])"}, //missing '],' and '['
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Range String range pattern not matched.", results5.error().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFacetingTest, SampleFacetCounts) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
{"name": "color", "type": "string", "facet": true}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
for(size_t i = 0; i < 1000; i++) {
|
||||
nlohmann::json doc;
|
||||
if(i % 2 == 0) {
|
||||
doc["color"] = "blue";
|
||||
} else {
|
||||
doc["color"] = "red";
|
||||
}
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
auto res = coll1->search("*", {}, "", {"color"}, {}, {0}, 3, 1, FREQUENCY, {true}, 5,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 3, 3, 2, 2, false, "", 10, 0).get();
|
||||
|
||||
ASSERT_EQ(1000, res["found"].get<size_t>());
|
||||
ASSERT_EQ(1, res["facet_counts"].size());
|
||||
ASSERT_EQ(2, res["facet_counts"][0]["counts"].size());
|
||||
|
||||
// verify approximate counts
|
||||
ASSERT_GE(res["facet_counts"][0]["counts"][0]["count"].get<size_t>(), 250);
|
||||
ASSERT_GE(res["facet_counts"][0]["counts"][1]["count"].get<size_t>(), 250);
|
||||
ASSERT_TRUE(res["facet_counts"][0]["sampled"].get<bool>());
|
||||
|
||||
// when sample threshold is high, don't estimate
|
||||
res = coll1->search("*", {}, "", {"color"}, {}, {0}, 3, 1, FREQUENCY, {true}, 5,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 3, 3, 2, 2, false, "", 10, 10000).get();
|
||||
|
||||
ASSERT_EQ(1000, res["found"].get<size_t>());
|
||||
ASSERT_EQ(1, res["facet_counts"].size());
|
||||
ASSERT_EQ(2, res["facet_counts"][0]["counts"].size());
|
||||
|
||||
// verify approximate counts
|
||||
ASSERT_EQ(500, res["facet_counts"][0]["counts"][0]["count"].get<size_t>());
|
||||
ASSERT_EQ(500, res["facet_counts"][0]["counts"][1]["count"].get<size_t>());
|
||||
ASSERT_FALSE(res["facet_counts"][0]["sampled"].get<bool>());
|
||||
|
||||
// test for sample percent > 100
|
||||
|
||||
auto res_op = coll1->search("*", {}, "", {"color"}, {}, {0}, 3, 1, FREQUENCY, {true}, 5,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 3, 3, 2, 2, false, "", 200, 0);
|
||||
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Value of `facet_sample_percent` must be less than 100.", res_op.error());
|
||||
}
|
||||
|
@ -583,7 +583,7 @@ TEST_F(CollectionSchemaChangeTest, AbilityToDropAndReAddIndexAtTheSameTime) {
|
||||
ASSERT_EQ(1, res["found"].get<size_t>());
|
||||
ASSERT_EQ("0", res["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ(1, res["facet_counts"].size());
|
||||
ASSERT_EQ(3, res["facet_counts"][0].size());
|
||||
ASSERT_EQ(4, res["facet_counts"][0].size());
|
||||
ASSERT_EQ("title", res["facet_counts"][0]["field_name"]);
|
||||
ASSERT_EQ(1, res["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("123", res["facet_counts"][0]["counts"][0]["value"].get<std::string>());
|
||||
|
@ -2918,239 +2918,3 @@ TEST_F(CollectionSpecificTest, DontHighlightPunctuation) {
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, RangeFacetTest) {
|
||||
std::vector<field> fields = {field("place", field_types::STRING, false),
|
||||
field("state", field_types::STRING, false),
|
||||
field("visitors", field_types::INT32, true),};
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "", {}, {}
|
||||
).get();
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["place"] = "Mysore Palace";
|
||||
doc1["state"] = "Karnataka";
|
||||
doc1["visitors"] = 235486;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["place"] = "Hampi";
|
||||
doc2["state"] = "Karnataka";
|
||||
doc2["visitors"] = 187654;
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["place"] = "Mahabalipuram";
|
||||
doc3["state"] = "TamilNadu";
|
||||
doc3["visitors"] = 174684;
|
||||
|
||||
nlohmann::json doc4;
|
||||
doc4["id"] = "3";
|
||||
doc4["place"] = "Meenakshi Amman Temple";
|
||||
doc4["state"] = "TamilNadu";
|
||||
doc4["visitors"] = 246676;
|
||||
|
||||
nlohmann::json doc5;
|
||||
doc5["id"] = "4";
|
||||
doc5["place"] = "Staue of Unity";
|
||||
doc5["state"] = "Gujarat";
|
||||
doc5["visitors"] = 345878;
|
||||
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc4.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc5.dump()).ok());
|
||||
|
||||
auto results = coll1->search("Karnataka", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true).get();
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_STREQ("Busy", results["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
|
||||
auto results2 = coll1->search("Gujarat", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true).get();
|
||||
ASSERT_EQ(1, results2["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(1, results2["facet_counts"][0]["counts"][0]["count"].get<std::size_t>());
|
||||
ASSERT_STREQ("VeryBusy", results2["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
ASSERT_TRUE(results2["facet_counts"][0]["counts"][1]["value"] == nullptr);
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, RangeFacetContinuity) {
|
||||
std::vector<field> fields = {field("place", field_types::STRING, false),
|
||||
field("state", field_types::STRING, false),
|
||||
field("visitors", field_types::INT32, true),};
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "", {}, {}
|
||||
).get();
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["place"] = "Mysore Palace";
|
||||
doc1["state"] = "Karnataka";
|
||||
doc1["visitors"] = 235486;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["place"] = "Hampi";
|
||||
doc2["state"] = "Karnataka";
|
||||
doc2["visitors"] = 187654;
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["place"] = "Mahabalipuram";
|
||||
doc3["state"] = "TamilNadu";
|
||||
doc3["visitors"] = 174684;
|
||||
|
||||
nlohmann::json doc4;
|
||||
doc4["id"] = "3";
|
||||
doc4["place"] = "Meenakshi Amman Temple";
|
||||
doc4["state"] = "TamilNadu";
|
||||
doc4["visitors"] = 246676;
|
||||
|
||||
nlohmann::json doc5;
|
||||
doc5["id"] = "4";
|
||||
doc5["place"] = "Staue of Unity";
|
||||
doc5["state"] = "Gujarat";
|
||||
doc5["visitors"] = 345878;
|
||||
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc4.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc5.dump()).ok());
|
||||
|
||||
auto results = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[200001, 500000])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Ranges in range facet syntax should be continous.", results.error().c_str());
|
||||
|
||||
auto results2 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[199999, 500000])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Ranges in range facet syntax should be continous.", results2.error().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, RangeFacetTypo) {
|
||||
std::vector<field> fields = {field("place", field_types::STRING, false),
|
||||
field("state", field_types::STRING, false),
|
||||
field("visitors", field_types::INT32, true),};
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "", {}, {}
|
||||
).get();
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["place"] = "Mysore Palace";
|
||||
doc1["state"] = "Karnataka";
|
||||
doc1["visitors"] = 235486;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["place"] = "Hampi";
|
||||
doc2["state"] = "Karnataka";
|
||||
doc2["visitors"] = 187654;
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["place"] = "Mahabalipuram";
|
||||
doc3["state"] = "TamilNadu";
|
||||
doc3["visitors"] = 174684;
|
||||
|
||||
nlohmann::json doc4;
|
||||
doc4["id"] = "3";
|
||||
doc4["place"] = "Meenakshi Amman Temple";
|
||||
doc4["state"] = "TamilNadu";
|
||||
doc4["visitors"] = 246676;
|
||||
|
||||
nlohmann::json doc5;
|
||||
doc5["id"] = "4";
|
||||
doc5["place"] = "Staue of Unity";
|
||||
doc5["state"] = "Gujarat";
|
||||
doc5["visitors"] = 345878;
|
||||
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc4.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc5.dump()).ok());
|
||||
|
||||
auto results = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:[200000, 500000)"}, //missing ']' at end
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Error splitting the range string.", results.error().c_str());
|
||||
|
||||
auto results2 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000], VeryBusy:200000, 500000])"}, //missing '[' in second range
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Error splitting the range string.", results2.error().c_str());
|
||||
|
||||
auto results3 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000] VeryBusy:[200000, 500000])"}, //missing ',' between ranges
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Error splitting the range string.", results3.error().c_str());
|
||||
|
||||
auto results4 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0 200000], VeryBusy:[200000, 500000])"}, //missing ',' between first ranges values
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Range String range pattern not matched.", results4.error().c_str());
|
||||
|
||||
auto results5 = coll1->search("TamilNadu", {"state"},
|
||||
"", {"visitors(Busy:[0, 200000 VeryBusy:200000, 500000])"}, //missing '],' and '['
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true);
|
||||
ASSERT_STREQ("Range String range pattern not matched.", results5.error().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user