Fix hash-based faceting for coerced field values.

When an integer field is coerced to string via schema update, the underlying data on disk will still be integer. We have to account for this during hash based faceting.
This commit is contained in:
Kishore Nallan 2024-01-25 11:18:45 +05:30
parent 77ade5ff65
commit 94c560429d
4 changed files with 67 additions and 20 deletions

View File

@ -472,7 +472,7 @@ public:
const Index* _get_index() const;
bool facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count, const nlohmann::json &document,
bool facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count, nlohmann::json &document,
std::string &value) const;
nlohmann::json get_facet_parent(const std::string& facet_field_name, const nlohmann::json& document) const;

View File

@ -3585,7 +3585,7 @@ Option<bool> Collection::get_reference_filter_ids(const std::string & filter_que
}
bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t &facet_count,
const nlohmann::json &document, std::string &value) const {
nlohmann::json &document, std::string &value) const {
if(document.count(a_facet.field_name) == 0) {
// check for field exists
@ -3610,6 +3610,14 @@ bool Collection::facet_value_to_string(const facet &a_facet, const facet_count_t
}
}
auto coerce_op = validator_t::coerce_element(search_schema.at(a_facet.field_name), document,
document[a_facet.field_name], fallback_field_type,
DIRTY_VALUES::COERCE_OR_REJECT);
if(!coerce_op.ok()) {
LOG(ERROR) << "Bad type for field " << a_facet.field_name << ", document: " << document;
return false;
}
if(search_schema.at(a_facet.field_name).type == field_types::STRING) {
value = document[a_facet.field_name];
} else if(search_schema.at(a_facet.field_name).type == field_types::STRING_ARRAY) {

View File

@ -10,10 +10,13 @@ Option<uint32_t> validator_t::coerce_element(const field& a_field, nlohmann::jso
bool array_ele_erased = false;
nlohmann::json::iterator dummy_iter;
if(a_field.type == field_types::STRING && !doc_ele.is_string()) {
Option<uint32_t> coerce_op = coerce_string(dirty_values, fallback_field_type, a_field, document, field_name, dummy_iter, false, array_ele_erased);
if(!coerce_op.ok()) {
return coerce_op;
if(a_field.type == field_types::STRING) {
if(!doc_ele.is_string()) {
Option<uint32_t> coerce_op = coerce_string(dirty_values, fallback_field_type, a_field, document,
field_name, dummy_iter, false, array_ele_erased);
if(!coerce_op.ok()) {
return coerce_op;
}
}
} else if(a_field.type == field_types::INT32) {
if(!doc_ele.is_number_integer()) {
@ -22,21 +25,27 @@ Option<uint32_t> validator_t::coerce_element(const field& a_field, nlohmann::jso
return coerce_op;
}
}
} else if(a_field.type == field_types::INT64 && !doc_ele.is_number_integer()) {
Option<uint32_t> coerce_op = coerce_int64_t(dirty_values, a_field, document, field_name, dummy_iter, false, array_ele_erased);
if(!coerce_op.ok()) {
return coerce_op;
} else if(a_field.type == field_types::INT64) {
if(!doc_ele.is_number_integer()) {
Option<uint32_t> coerce_op = coerce_int64_t(dirty_values, a_field, document, field_name, dummy_iter, false, array_ele_erased);
if(!coerce_op.ok()) {
return coerce_op;
}
}
} else if(a_field.type == field_types::FLOAT && !doc_ele.is_number()) {
// using `is_number` allows integer to be passed to a float field
Option<uint32_t> coerce_op = coerce_float(dirty_values, a_field, document, field_name, dummy_iter, false, array_ele_erased);
if(!coerce_op.ok()) {
return coerce_op;
} else if(a_field.type == field_types::FLOAT) {
if(!doc_ele.is_number()) {
// using `is_number` allows integer to be passed to a float field
Option<uint32_t> coerce_op = coerce_float(dirty_values, a_field, document, field_name, dummy_iter, false, array_ele_erased);
if(!coerce_op.ok()) {
return coerce_op;
}
}
} else if(a_field.type == field_types::BOOL && !doc_ele.is_boolean()) {
Option<uint32_t> coerce_op = coerce_bool(dirty_values, a_field, document, field_name, dummy_iter, false, array_ele_erased);
if(!coerce_op.ok()) {
return coerce_op;
} else if(a_field.type == field_types::BOOL) {
if(!doc_ele.is_boolean()) {
Option<uint32_t> coerce_op = coerce_bool(dirty_values, a_field, document, field_name, dummy_iter, false, array_ele_erased);
if(!coerce_op.ok()) {
return coerce_op;
}
}
} else if(a_field.type == field_types::GEOPOINT) {
if(!doc_ele.is_array() || doc_ele.size() != 2) {

View File

@ -3025,4 +3025,34 @@ TEST_F(CollectionFacetingTest, RangeFacetAlphanumericLabels) {
ASSERT_EQ("20thAD", results["facet_counts"][0]["counts"][1]["value"]);
ASSERT_EQ(1, results["facet_counts"][0]["counts"][2]["count"]);
ASSERT_EQ("10thAD", results["facet_counts"][0]["counts"][2]["value"]);
}
}
TEST_F(CollectionFacetingTest, FacetingWithCoercedString) {
std::vector<field> fields = {field("years", field_types::INT64_ARRAY, true)};
Collection* coll1 = collectionManager.create_collection(
"coll1", 1, fields, "", 0, "",
{},{}).get();
nlohmann::json doc;
doc["id"] = "0";
doc["years"] = {2000, 2010, 2020};
ASSERT_TRUE(coll1->add(doc.dump()).ok());
auto schema_changes = R"({
"fields": [
{"name": "years", "drop": true},
{"name": "years", "type": "string[]", "facet": true}
]
})"_json;
// schema change will not change the data on disk, so we have to account for this during hash based faceting
auto alter_op = coll1->alter(schema_changes);
ASSERT_TRUE(alter_op.ok());
auto results = coll1->search("*", {}, "", {"years"}, {}, {2}, 10,
1, FREQUENCY, {true}).get();
ASSERT_EQ(3, results["facet_counts"][0]["counts"].size());
ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"]);
}