Merge pull request #1297 from krunal1313/facet_query_value_index

Facet query handling with value index
This commit is contained in:
Kishore Nallan 2023-10-13 17:17:31 +05:30 committed by GitHub
commit c5dbfc25bb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 80 additions and 9 deletions

View File

@ -133,7 +133,7 @@ public:
size_t get_facet_count(const std::string& field_name);
size_t intersect(facet& a_facet,
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
const uint32_t* result_ids, size_t result_id_len,
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
bool is_wildcard_no_filter_query, const std::string& sort_order = "");

View File

@ -677,7 +677,7 @@ struct facet {
struct facet_info_t {
// facet hash => resolved tokens
std::unordered_map<uint64_t, std::vector<std::string>> hashes;
std::vector<std::string> fvalue_searched_tokens;
std::vector<std::vector<std::string>> fvalue_searched_tokens;
bool use_facet_query = false;
bool should_compute_stats = false;
bool use_value_index = false;

View File

@ -158,7 +158,7 @@ size_t facet_index_t::get_facet_count(const std::string& field_name) {
//returns the count of matching seq_ids from result array
size_t facet_index_t::intersect(facet& a_facet,
bool has_facet_query, const std::vector<std::string>& fvalue_searched_tokens,
bool has_facet_query, const std::vector<std::vector<std::string>>& fvalue_searched_tokens,
const uint32_t* result_ids, size_t result_ids_len,
size_t max_facet_count, std::map<std::string, docid_count_t>& found,
bool is_wildcard_no_filter_query, const std::string& sort_order) {
@ -187,10 +187,18 @@ size_t facet_index_t::intersect(facet& a_facet,
auto facet_str = facet_count_it->facet_value;
transform(facet_str.begin(), facet_str.end(), facet_str.begin(), ::tolower);
for(const auto& searched_token: fvalue_searched_tokens) {
if(facet_str.find(searched_token) != std::string::npos) {
for(const auto& searched_tokens : fvalue_searched_tokens) {
bool found_all_tokens = true;
for (const auto &searched_token: searched_tokens) {
if (facet_str.find(searched_token) == std::string::npos) {
found_all_tokens = false;
break;
}
}
if (found_all_tokens) {
a_facet.fvalue_tokens[facet_count_it->facet_value] = searched_tokens;
found_search_token = true;
a_facet.fvalue_tokens[facet_count_it->facet_value] = fvalue_searched_tokens;
break;
}
}

View File

@ -5167,9 +5167,7 @@ void Index::compute_facet_infos(const std::vector<facet>& facets, facet_query_t&
// need to ensure that document ID actually contains searched_query tokens
// since `field_result_ids` contains documents matched across all queries
// value based index
for(const auto& val : searched_tokens) {
facet_infos[findex].fvalue_searched_tokens.emplace_back(val);
}
facet_infos[findex].fvalue_searched_tokens.emplace_back(searched_tokens);
}
}

View File

@ -776,6 +776,60 @@ TEST_F(CollectionFacetingTest, FacetCountOnSimilarStrings) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionFacetingTest, FacetQueryTest) {
std::vector<field> fields = {
field("color", field_types::STRING, true),
};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
std::vector<std::string> colors = {"apple red", "azure", "amazon green", "apricot orange",
"blue", "barrel blue", "banana yellow", "ball green", "baikal"};
for(size_t i = 0; i < 100; i++) {
nlohmann::json doc;
doc["color"] = colors[i % colors.size()];
ASSERT_TRUE(coll1->add(doc.dump()).ok());
}
// add colors that DON'T start with "b" to push these up the count list
for(size_t i = 0; i < 4; i++) {
nlohmann::json doc;
doc["color"] = colors[i];
ASSERT_TRUE(coll1->add(doc.dump()).ok());
}
auto results = coll1->search("*", {},
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 5, "color:b", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(4, results["facet_counts"][0]["counts"].size()); // 4 is default candidate size
// junk string should produce no facets
results = coll1->search("*", {},
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 5, "color:xsda", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(0, results["facet_counts"][0]["counts"].size());
results = coll1->search("*", {},
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 5, "color:green a", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("amazon green", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("<mark>a</mark>mazon <mark>green</mark>", results["facet_counts"][0]["counts"][0]["highlighted"]);
}
TEST_F(CollectionFacetingTest, FacetQueryOnStringWithColon) {
std::vector<field> fields = {field("title", field_types::STRING, true),
field("points", field_types::INT32, false)};

View File

@ -1390,6 +1390,17 @@ TEST_F(CollectionOptimizedFacetingTest, FacetQueryTest) {
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(0, results["facet_counts"][0]["counts"].size());
results = coll1->search("*", {},
"", {"color"}, {}, {2}, 1, 1, FREQUENCY, {true}, 1, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 5, "color:green a", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", true, 0, max_score, 100, 0, 4294967295UL, VALUE).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"].size());
ASSERT_EQ("amazon green", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ("<mark>a</mark>mazon <mark>green</mark>", results["facet_counts"][0]["counts"][0]["highlighted"]);
}
TEST_F(CollectionOptimizedFacetingTest, StringLengthTest) {