mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 05:32:30 +08:00
Merge pull request #1297 from krunal1313/facet_query_value_index
Facet query handling with value index
This commit is contained in:
commit
c5dbfc25bb
@ -133,7 +133,7 @@ public:
|
||||
size_t get_facet_count(const std::string& field_name);
|
||||
|
||||
size_t intersect(facet& a_facet,
|
||||
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
|
||||
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
|
||||
const uint32_t* result_ids, size_t result_id_len,
|
||||
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
|
||||
bool is_wildcard_no_filter_query, const std::string& sort_order = "");
|
||||
|
@ -677,7 +677,7 @@ struct facet {
|
||||
struct facet_info_t {
|
||||
// facet hash => resolved tokens
|
||||
std::unordered_map<uint64_t, std::vector<std::string>> hashes;
|
||||
std::vector<std::string> fvalue_searched_tokens;
|
||||
std::vector<std::vector<std::string>> fvalue_searched_tokens;
|
||||
bool use_facet_query = false;
|
||||
bool should_compute_stats = false;
|
||||
bool use_value_index = false;
|
||||
|
@ -158,7 +158,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) {
|
||||
|
||||
//returns the count of matching seq_ids from result array
|
||||
size_t facet_index_t::intersect(facet& a_facet,
|
||||
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
|
||||
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
|
||||
const uint32_t* result_ids, size_t result_ids_len,
|
||||
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
|
||||
bool is_wildcard_no_filter_query, const std::string& sort_order) {
|
||||
@ -187,10 +187,18 @@ size_t facet_index_t::intersect(facet& a_facet,
|
||||
auto facet_str = facet_count_it->facet_value;
|
||||
transform(facet_str.begin(), facet_str.end(), facet_str.begin(), ::tolower);
|
||||
|
||||
for(const auto& searched_token: fvalue_searched_tokens) {
|
||||
if(facet_str.find(searched_token) != std::string::npos) {
|
||||
for(const auto& searched_tokens : fvalue_searched_tokens) {
|
||||
bool found_all_tokens = true;
|
||||
for (const auto &searched_token: searched_tokens) {
|
||||
if (facet_str.find(searched_token) == std::string::npos) {
|
||||
found_all_tokens = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_all_tokens) {
|
||||
a_facet.fvalue_tokens[facet_count_it->facet_value] = searched_tokens;
|
||||
found_search_token = true;
|
||||
a_facet.fvalue_tokens[facet_count_it->facet_value] = fvalue_searched_tokens;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -5167,9 +5167,7 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
|
||||
// need to ensure that document ID actually contains searched_query tokens
|
||||
// since `field_result_ids` contains documents matched across all queries
|
||||
// value based index
|
||||
for(const auto& val : searched_tokens) {
|
||||
facet_infos[findex].fvalue_searched_tokens.emplace_back(val);
|
||||
}
|
||||
facet_infos[findex].fvalue_searched_tokens.emplace_back(searched_tokens);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -776,6 +776,60 @@ TEST_F(CollectionFacetingTest, FacetCountOnSimilarStrings) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFacetingTest, FacetQueryTest) {
|
||||
std::vector<field> fields = {
|
||||
field("color", field_types::STRING, true),
|
||||
};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
|
||||
std::vector<std::string> colors = {"apple red", "azure", "amazon green", "apricot orange",
|
||||
"blue", "barrel blue", "banana yellow", "ball green", "baikal"};
|
||||
|
||||
for(size_t i = 0; i < 100; i++) {
|
||||
nlohmann::json doc;
|
||||
doc["color"] = colors[i % colors.size()];
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
// add colors that DON'T start with "b" to push these up the count list
|
||||
for(size_t i = 0; i < 4; i++) {
|
||||
nlohmann::json doc;
|
||||
doc["color"] = colors[i];
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
auto results = coll1->search("*", {},
|
||||
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 5, "color:b", 30, 4, "", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
|
||||
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(4, results["facet_counts"][0]["counts"].size()); // 4 is default candidate size
|
||||
|
||||
// junk string should produce no facets
|
||||
|
||||
results = coll1->search("*", {},
|
||||
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 5, "color:xsda", 30, 4, "", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(0, results["facet_counts"][0]["counts"].size());
|
||||
|
||||
results = coll1->search("*", {},
|
||||
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 5, "color:green a", 30, 4, "", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("amazon green", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("<mark>a</mark>mazon <mark>green</mark>", results["facet_counts"][0]["counts"][0]["highlighted"]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionFacetingTest, FacetQueryOnStringWithColon) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, true),
|
||||
field("points", field_types::INT32, false)};
|
||||
|
@ -1390,6 +1390,17 @@ TEST_F(CollectionOptimizedFacetingTest, FacetQueryTest) {
|
||||
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(0, results["facet_counts"][0]["counts"].size());
|
||||
|
||||
results = coll1->search("*", {},
|
||||
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 5, "color:green a", 30, 4, "", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ("amazon green", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("<mark>a</mark>mazon <mark>green</mark>", results["facet_counts"][0]["counts"][0]["highlighted"]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user