Allow fields to be marked as optional in the schema.

Downside: optional fields cannot be used for sorting or marked as default sorting field.
This commit is contained in:
kishorenc 2020-03-05 21:56:05 +05:30
parent ba17243a36
commit 6c8e62a61b
8 changed files with 110 additions and 17 deletions

View File

@ -28,8 +28,15 @@ struct field {
std::string name;
std::string type;
bool facet;
bool optional;
field(const std::string & name, const std::string & type, const bool & facet): name(name), type(type), facet(facet) {
field(const std::string & name, const std::string & type, const bool facet):
name(name), type(type), facet(facet), optional(false) {
}
field(const std::string & name, const std::string & type, const bool facet, const bool optional):
name(name), type(type), facet(facet), optional(optional) {
}

View File

@ -513,6 +513,11 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
return Option<nlohmann::json>(404, error);
}
if(sort_schema.count(_sort_field.name) != 0 && sort_schema.at(_sort_field.name).optional) {
std::string error = "Cannot sort by `" + _sort_field.name + "` as it is defined as an optional field.";
return Option<nlohmann::json>(400, error);
}
std::string sort_order = _sort_field.order;
StringUtils::toupper(sort_order);

View File

@ -227,6 +227,11 @@ Option<Collection*> CollectionManager::create_collection(const std::string name,
}
if(field.name == default_sorting_field) {
if(field.optional) {
return Option<Collection*>(400, "Default sorting field `" + default_sorting_field +
"` cannot be an optional field.");
}
found_default_sorting_field = true;
}
}

View File

@ -114,8 +114,12 @@ void post_create_collection(http_req & req, http_res & res) {
field_json["facet"] = false;
}
fields.push_back(
field(field_json["name"], field_json["type"], field_json["facet"])
if(field_json.count("optional") == 0) {
field_json["optional"] = false;
}
fields.emplace_back(
field(field_json["name"], field_json["type"], field_json["facet"], field_json["optional"])
);
}

View File

@ -103,6 +103,10 @@ Option<uint32_t> Index::index_in_memory(const nlohmann::json &document, uint32_t
for(const std::pair<std::string, field> & field_pair: search_schema) {
const std::string & field_name = field_pair.first;
if(field_pair.second.optional && document.count(field_name) == 0) {
continue;
}
int facet_id = -1;
if(facet_schema.count(field_name) != 0) {
facet_id = facet_to_id[field_name];
@ -230,9 +234,13 @@ Option<uint32_t> Index::validate_index_in_memory(const nlohmann::json &document,
for(const std::pair<std::string, field> & field_pair: search_schema) {
const std::string & field_name = field_pair.first;
if(field_pair.second.optional && document.count(field_name) == 0) {
continue;
}
if(document.count(field_name) == 0) {
return Option<>(400, "Field `" + field_name + "` has been declared in the schema, "
"but is not found in the document.");
"but is not found in the document.");
}
if(field_pair.second.type == field_types::STRING) {
@ -301,16 +309,6 @@ Option<uint32_t> Index::validate_index_in_memory(const nlohmann::json &document,
}
}
// since every facet field has to be a search field, we don't have to revalidate types here
for(const std::pair<std::string, field> & field_pair: facet_schema) {
const std::string & field_name = field_pair.first;
if(document.count(field_name) == 0) {
return Option<>(400, "Field `" + field_name + "` has been declared as a facet field in the schema, "
"but is not found in the document.");
}
}
return Option<>(200);
}

View File

@ -225,8 +225,6 @@ TEST_F(CollectionSortingTest, ThreeSortFieldsLimit) {
field("max", field_types::INT32, false),
};
std::vector<sort_by> sort_fields = { sort_by("points", "DESC") };
coll1 = collectionManager.get_collection("coll1");
if(coll1 == nullptr) {
coll1 = collectionManager.create_collection("coll1", fields, "points").get();
@ -250,7 +248,7 @@ TEST_F(CollectionSortingTest, ThreeSortFieldsLimit) {
sort_by("min", "DESC"),
};
auto res_op = coll1->search("Jeremy", query_fields, "", {}, sort_fields_desc, 0, 10, 1, FREQUENCY, false);
auto res_op = coll1->search("the", query_fields, "", {}, sort_fields_desc, 0, 10, 1, FREQUENCY, false);
ASSERT_FALSE(res_op.ok());
ASSERT_STREQ("Only upto 3 sort_by fields can be specified.", res_op.error().c_str());

View File

@ -2290,3 +2290,73 @@ TEST_F(CollectionTest, SearchHighlightFieldFully) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionTest, OptionalFields) {
Collection *coll1;
std::vector<field> fields = {
field("title", field_types::STRING, false),
field("description", field_types::STRING, true, true),
field("max", field_types::INT32, false),
field("scores", field_types::INT64_ARRAY, false, true),
field("average", field_types::FLOAT, false, true),
field("is_valid", field_types::BOOL, false, true),
};
coll1 = collectionManager.get_collection("coll1");
if(coll1 == nullptr) {
coll1 = collectionManager.create_collection("coll1", fields, "max").get();
}
std::ifstream infile(std::string(ROOT_DIR)+"test/optional_fields.jsonl");
std::string json_line;
while (std::getline(infile, json_line)) {
auto add_op = coll1->add(json_line);
if(!add_op.ok()) {
std::cout << add_op.error() << std::endl;
}
ASSERT_TRUE(add_op.ok());
}
infile.close();
// first must be able to fetch all records (i.e. all must have been index)
auto res = coll1->search("*", {"title"}, "", {}, {}, 0, 10, 1, FREQUENCY, false).get();
ASSERT_EQ(6, res["found"].get<size_t>());
// search on optional `description` field
res = coll1->search("book", {"description"}, "", {}, {}, 0, 10, 1, FREQUENCY, false).get();
ASSERT_EQ(5, res["found"].get<size_t>());
// filter on optional `average` field
res = coll1->search("the", {"title"}, "average: >0", {}, {}, 0, 10, 1, FREQUENCY, false).get();
ASSERT_EQ(5, res["found"].get<size_t>());
// facet on optional `description` field
res = coll1->search("the", {"title"}, "", {"description"}, {}, 0, 10, 1, FREQUENCY, false).get();
ASSERT_EQ(6, res["found"].get<size_t>());
ASSERT_EQ(5, res["facet_counts"][0]["counts"][0]["count"].get<size_t>());
ASSERT_STREQ("description", res["facet_counts"][0]["field_name"].get<std::string>().c_str());
// sort_by optional `average` field should be rejected
std::vector<sort_by> sort_fields = { sort_by("average", "DESC") };
auto res_op = coll1->search("*", {"title"}, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false);
ASSERT_FALSE(res_op.ok());
ASSERT_STREQ("Cannot sort by `average` as it is defined as an optional field.", res_op.error().c_str());
// default sorting field should not be declared optional
fields = {
field("title", field_types::STRING, false),
field("score", field_types::INT32, false, true),
};
auto create_op = collectionManager.create_collection("coll2", fields, "score");
ASSERT_FALSE(create_op.ok());
ASSERT_STREQ("Default sorting field `score` cannot be an optional field.", create_op.error().c_str());
collectionManager.drop_collection("coll1");
}

View File

@ -0,0 +1,6 @@
{"title": "The quick brown fox.", "description": "A book.", "max": 25, "scores": [10, 25, 15], "average": 16.66, "is_valid": true}
{"title": "The quick brown fox.", "max": 25, "scores": [10, 25, 15], "average": 16.66, "is_valid": true}
{"title": "The quick brown fox.", "description": "A book.", "max": 25, "scores": [10, 25, 15], "average": 16.66, "is_valid": true}
{"title": "The quick brown fox.", "description": "A book.", "max": 25, "average": 16.66, "is_valid": true}
{"title": "The quick brown fox.", "description": "A book.", "max": 25, "scores": [10, 25, 15], "is_valid": true}
{"title": "The quick brown fox.", "description": "A book.", "max": 25, "scores": [10, 25, 15], "average": 16.66}