mirror of
https://github.com/typesense/typesense.git
synced 2025-05-20 13:42:26 +08:00
Add more tests for testing schema detection.
This commit is contained in:
parent
f1b70384cc
commit
0a9cf4aee0
@ -291,9 +291,9 @@ private:
|
||||
|
||||
const std::string name;
|
||||
|
||||
const uint32_t collection_id;
|
||||
const std::atomic<uint32_t> collection_id;
|
||||
|
||||
const uint64_t created_at;
|
||||
const std::atomic<uint64_t> created_at;
|
||||
|
||||
std::atomic<size_t> num_documents;
|
||||
|
||||
@ -486,10 +486,6 @@ public:
|
||||
|
||||
bool is_exceeding_memory_threshold() const;
|
||||
|
||||
static void get_doc_changes(const nlohmann::json &document, nlohmann::json &old_doc,
|
||||
nlohmann::json &new_doc,
|
||||
nlohmann::json &del_doc);
|
||||
|
||||
void parse_search_query(const std::string &query, std::vector<std::string>& q_include_tokens,
|
||||
std::vector<std::string>& q_exclude_tokens) const;
|
||||
|
||||
|
@ -225,6 +225,75 @@ struct field {
|
||||
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
static Option<bool> json_fields_to_fields(nlohmann::json& fields_json,
|
||||
std::string& auto_detect_schema,
|
||||
std::vector<field>& fields) {
|
||||
size_t num_auto_detect_fields = 0;
|
||||
|
||||
for(nlohmann::json & field_json: fields_json) {
|
||||
if(!field_json.is_object() ||
|
||||
field_json.count(fields::name) == 0 || field_json.count(fields::type) == 0 ||
|
||||
!field_json.at(fields::name).is_string() || !field_json.at(fields::type).is_string()) {
|
||||
|
||||
return Option<bool>(400, "Wrong format for `fields`. It should be an array of objects containing "
|
||||
"`name`, `type`, `optional` and `facet` properties.");
|
||||
}
|
||||
|
||||
if(field_json.count(fields::facet) != 0 && !field_json.at(fields::facet).is_boolean()) {
|
||||
return Option<bool>(400, std::string("The `facet` property of the field `") +
|
||||
field_json[fields::name].get<std::string>() + std::string("` should be a boolean."));
|
||||
}
|
||||
|
||||
if(field_json.count(fields::optional) != 0 && !field_json.at(fields::optional).is_boolean()) {
|
||||
return Option<bool>(400, std::string("The `optional` property of the field `") +
|
||||
field_json[fields::name].get<std::string>() + std::string("` should be a boolean."));
|
||||
}
|
||||
|
||||
if(field_json["name"] == "*") {
|
||||
if(field_json["type"] == schema_detect_types::AUTO || field_json["type"] == schema_detect_types::STRINGIFY) {
|
||||
auto_detect_schema = field_json["type"];
|
||||
num_auto_detect_fields++;
|
||||
} else {
|
||||
return Option<bool>(400, "The `type` of field `*` is invalid.");
|
||||
}
|
||||
|
||||
if(field_json.count("facet") == 0) {
|
||||
field_json["facet"] = false;
|
||||
}
|
||||
|
||||
if(field_json.count("optional") == 0) {
|
||||
field_json["optional"] = true;
|
||||
}
|
||||
|
||||
if(field_json["optional"] == false) {
|
||||
return Option<bool>(400, "Field `*` must be an optional field.");
|
||||
}
|
||||
|
||||
if(field_json["facet"] == true) {
|
||||
return Option<bool>(400, "Field `*` cannot be a facet field.");
|
||||
}
|
||||
}
|
||||
|
||||
if(field_json.count("facet") == 0) {
|
||||
field_json["facet"] = false;
|
||||
}
|
||||
|
||||
if(field_json.count("optional") == 0) {
|
||||
field_json["optional"] = false;
|
||||
}
|
||||
|
||||
fields.emplace_back(
|
||||
field(field_json["name"], field_json["type"], field_json["facet"], field_json["optional"])
|
||||
);
|
||||
}
|
||||
|
||||
if(num_auto_detect_fields > 1) {
|
||||
return Option<bool>(400,"There can be only one field named `*`.");
|
||||
}
|
||||
|
||||
return Option<bool>(true);
|
||||
}
|
||||
};
|
||||
|
||||
struct filter {
|
||||
|
@ -243,6 +243,9 @@ private:
|
||||
|
||||
static void compute_facet_stats(facet &a_facet, uint64_t raw_value, const std::string & field_type);
|
||||
|
||||
static void get_doc_changes(const nlohmann::json &document, nlohmann::json &old_doc,
|
||||
nlohmann::json &new_doc, nlohmann::json &del_doc);
|
||||
|
||||
static Option<uint32_t> coerce_string(const DIRTY_VALUES& dirty_values, const field& a_field, nlohmann::json &document,
|
||||
const std::string &field_name, const int array_index);
|
||||
|
||||
|
@ -58,6 +58,7 @@ Collection::~Collection() {
|
||||
}
|
||||
|
||||
uint32_t Collection::get_next_seq_id() {
|
||||
std::shared_lock lock(mutex);
|
||||
store->increment(get_next_seq_id_key(name), 1);
|
||||
return next_seq_id++;
|
||||
}
|
||||
@ -154,7 +155,7 @@ nlohmann::json Collection::get_summary_json() const {
|
||||
json_response["name"] = name;
|
||||
json_response["num_memory_shards"] = num_memory_shards.load();
|
||||
json_response["num_documents"] = num_documents.load();
|
||||
json_response["created_at"] = created_at;
|
||||
json_response["created_at"] = created_at.load();
|
||||
|
||||
nlohmann::json fields_arr;
|
||||
|
||||
@ -195,27 +196,6 @@ Option<nlohmann::json> Collection::add(const std::string & json_str,
|
||||
return Option<nlohmann::json>(document);
|
||||
}
|
||||
|
||||
void Collection::get_doc_changes(const nlohmann::json &document, nlohmann::json &old_doc,
|
||||
nlohmann::json &new_doc, nlohmann::json &del_doc) {
|
||||
|
||||
for(auto it = old_doc.begin(); it != old_doc.end(); ++it) {
|
||||
new_doc[it.key()] = it.value();
|
||||
}
|
||||
|
||||
for(auto it = document.begin(); it != document.end(); ++it) {
|
||||
// adds new key or overrides existing key from `old_doc`
|
||||
new_doc[it.key()] = it.value();
|
||||
|
||||
// if the update document contains a field that exists in old, we record that (for delete + reindex)
|
||||
bool field_exists_in_old_doc = (old_doc.count(it.key()) != 0);
|
||||
if(field_exists_in_old_doc) {
|
||||
// key exists in the stored doc, so it must be reindexed
|
||||
// we need to check for this because a field can be optional
|
||||
del_doc[it.key()] = old_doc[it.key()];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nlohmann::json Collection::add_many(std::vector<std::string>& json_lines, nlohmann::json& document,
|
||||
const index_operation_t& operation, const std::string& id,
|
||||
const DIRTY_VALUES& dirty_values) {
|
||||
@ -251,7 +231,6 @@ nlohmann::json Collection::add_many(std::vector<std::string>& json_lines, nlohma
|
||||
record.is_update = !doc_seq_id_op.get().is_new;
|
||||
if(record.is_update) {
|
||||
get_document_from_store(get_seq_id_key(seq_id), record.old_doc);
|
||||
get_doc_changes(document, record.old_doc, record.new_doc, record.del_doc);
|
||||
}
|
||||
|
||||
// if `auto_detect_schema` is enabled, we will have to update schema first before indexing
|
||||
@ -313,6 +292,8 @@ void Collection::batch_index(std::vector<std::vector<index_record>> &index_batch
|
||||
|
||||
if(index_record.indexed.ok()) {
|
||||
if(index_record.is_update) {
|
||||
//get_doc_changes(index_record.doc, index_record.old_doc, index_record.new_doc, index_record.del_doc);
|
||||
|
||||
const std::string& serialized_json = index_record.new_doc.dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore);
|
||||
bool write_ok = store->insert(get_seq_id_key(index_record.seq_id), serialized_json);
|
||||
|
||||
@ -425,11 +406,9 @@ void Collection::prune_document(nlohmann::json &document, const spp::sparse_hash
|
||||
const spp::sparse_hash_set<std::string>& exclude_fields) {
|
||||
auto it = document.begin();
|
||||
for(; it != document.end(); ) {
|
||||
if(document.count(Collection::DOC_META_KEY) != 0) {
|
||||
document.erase(Collection::DOC_META_KEY);
|
||||
}
|
||||
|
||||
if(exclude_fields.count(it.key()) != 0 || (!include_fields.empty() && include_fields.count(it.key()) == 0)) {
|
||||
if (exclude_fields.count(it.key()) != 0 ||
|
||||
(!include_fields.empty() && include_fields.count(it.key()) == 0) ||
|
||||
document.count(Collection::DOC_META_KEY) != 0) {
|
||||
it = document.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
@ -1586,12 +1565,15 @@ Option<nlohmann::json> Collection::get(const std::string & id) const {
|
||||
}
|
||||
|
||||
void Collection::remove_document(const nlohmann::json & document, const uint32_t seq_id, bool remove_from_store) {
|
||||
std::unique_lock lock(mutex);
|
||||
const std::string& id = document["id"];
|
||||
|
||||
Index* index = indices[seq_id % num_memory_shards];
|
||||
index->remove(seq_id, document);
|
||||
num_documents -= 1;
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
|
||||
Index* index = indices[seq_id % num_memory_shards];
|
||||
index->remove(seq_id, document);
|
||||
num_documents -= 1;
|
||||
}
|
||||
|
||||
if(remove_from_store) {
|
||||
store->remove(get_doc_id_key(id));
|
||||
@ -1687,7 +1669,7 @@ Option<uint32_t> Collection::remove_override(const std::string & id) {
|
||||
}
|
||||
|
||||
size_t Collection::get_num_memory_shards() {
|
||||
return num_memory_shards;
|
||||
return num_memory_shards.load();
|
||||
}
|
||||
|
||||
uint32_t Collection::get_seq_id_from_key(const std::string & key) {
|
||||
@ -1712,11 +1694,12 @@ std::string Collection::get_doc_id_key(const std::string & doc_id) const {
|
||||
}
|
||||
|
||||
std::string Collection::get_name() const {
|
||||
std::shared_lock lock(mutex);
|
||||
return name;
|
||||
}
|
||||
|
||||
uint64_t Collection::get_created_at() const {
|
||||
return created_at;
|
||||
return created_at.load();
|
||||
}
|
||||
|
||||
size_t Collection::get_num_documents() const {
|
||||
@ -1724,7 +1707,7 @@ size_t Collection::get_num_documents() const {
|
||||
}
|
||||
|
||||
uint32_t Collection::get_collection_id() const {
|
||||
return collection_id;
|
||||
return collection_id.load();
|
||||
}
|
||||
|
||||
Option<uint32_t> Collection::doc_id_to_seq_id(const std::string & doc_id) const {
|
||||
@ -1743,6 +1726,8 @@ Option<uint32_t> Collection::doc_id_to_seq_id(const std::string & doc_id) const
|
||||
}
|
||||
|
||||
std::vector<std::string> Collection::get_facet_fields() {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
std::vector<std::string> facet_fields_copy;
|
||||
for(auto it = facet_schema.begin(); it != facet_schema.end(); ++it) {
|
||||
facet_fields_copy.push_back(it->first);
|
||||
@ -1752,6 +1737,8 @@ std::vector<std::string> Collection::get_facet_fields() {
|
||||
}
|
||||
|
||||
std::vector<field> Collection::get_sort_fields() {
|
||||
std::shared_lock lock(mutex);
|
||||
|
||||
std::vector<field> sort_fields_copy;
|
||||
for(auto it = sort_schema.begin(); it != sort_schema.end(); ++it) {
|
||||
sort_fields_copy.push_back(it->second);
|
||||
@ -1761,10 +1748,12 @@ std::vector<field> Collection::get_sort_fields() {
|
||||
}
|
||||
|
||||
std::vector<field> Collection::get_fields() {
|
||||
std::shared_lock lock(mutex);
|
||||
return fields;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, field> Collection::get_schema() {
|
||||
std::shared_lock lock(mutex);
|
||||
return search_schema;
|
||||
};
|
||||
|
||||
@ -1785,6 +1774,7 @@ std::string Collection::get_seq_id_collection_prefix() const {
|
||||
}
|
||||
|
||||
std::string Collection::get_default_sorting_field() {
|
||||
std::shared_lock lock(mutex);
|
||||
return default_sorting_field;
|
||||
}
|
||||
|
||||
|
@ -133,8 +133,6 @@ bool post_create_collection(http_req & req, http_res & res) {
|
||||
|
||||
// field specific validation
|
||||
|
||||
std::vector<field> fields;
|
||||
|
||||
if(!req_json["fields"].is_array() || req_json["fields"].empty()) {
|
||||
res.set_400("The `fields` value should be an array of objects containing "
|
||||
"`name`, `type` and optionally, `facet` properties.");
|
||||
@ -142,52 +140,11 @@ bool post_create_collection(http_req & req, http_res & res) {
|
||||
}
|
||||
|
||||
std::string auto_detect_schema = schema_detect_types::OFF;
|
||||
size_t num_auto_detect_fields = 0;
|
||||
std::vector<field> fields;
|
||||
auto parse_op = field::json_fields_to_fields(req_json["fields"], auto_detect_schema, fields);
|
||||
|
||||
for(nlohmann::json & field_json: req_json["fields"]) {
|
||||
if(!field_json.is_object() ||
|
||||
field_json.count(fields::name) == 0 || field_json.count(fields::type) == 0 ||
|
||||
!field_json.at(fields::name).is_string() || !field_json.at(fields::type).is_string()) {
|
||||
|
||||
res.set_400("Wrong format for `fields`. It should be an array of objects containing "
|
||||
"`name`, `type` and optionally, `facet` properties.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(field_json.count("facet") != 0 && !field_json.at(fields::facet).is_boolean()) {
|
||||
res.set_400(std::string("The `facet` property of the field `") +
|
||||
field_json.at(fields::name).get<std::string>() + "` should be a boolean.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(field_json.count("facet") == 0) {
|
||||
field_json["facet"] = false;
|
||||
}
|
||||
|
||||
if(field_json.count("optional") == 0) {
|
||||
field_json["optional"] = false;
|
||||
}
|
||||
|
||||
if(field_json["name"] == "*") {
|
||||
if(field_json["type"] == schema_detect_types::AUTO || field_json["type"] == schema_detect_types::STRINGIFY) {
|
||||
auto_detect_schema = field_json["type"];
|
||||
num_auto_detect_fields++;
|
||||
} else {
|
||||
res.set_400(std::string("The `type` of field `") +
|
||||
field_json["name"].get<std::string>() + "` is invalid.");
|
||||
return false;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
fields.emplace_back(
|
||||
field(field_json["name"], field_json["type"], field_json["facet"], field_json["optional"])
|
||||
);
|
||||
}
|
||||
|
||||
if(num_auto_detect_fields > 1) {
|
||||
res.set_400("There can be only one field with name `*`.");
|
||||
if(!parse_op.ok()) {
|
||||
res.set(parse_op.code(), parse_op.error());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -485,6 +485,7 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_
|
||||
|
||||
if(index_rec.is_update) {
|
||||
// scrub string fields to reduce delete ops
|
||||
get_doc_changes(index_rec.doc, index_rec.old_doc, index_rec.new_doc, index_rec.del_doc);
|
||||
index->scrub_reindex_doc(index_rec.doc, index_rec.del_doc, index_rec.old_doc);
|
||||
index->remove(index_rec.seq_id, index_rec.del_doc);
|
||||
}
|
||||
@ -2627,3 +2628,23 @@ Option<uint32_t> Index::coerce_float(const DIRTY_VALUES& dirty_values, const fie
|
||||
|
||||
return Option<uint32_t>(200);
|
||||
}
|
||||
|
||||
void Index::get_doc_changes(const nlohmann::json &document, nlohmann::json &old_doc, nlohmann::json &new_doc,
|
||||
nlohmann::json &del_doc) {
|
||||
for(auto it = old_doc.begin(); it != old_doc.end(); ++it) {
|
||||
new_doc[it.key()] = it.value();
|
||||
}
|
||||
|
||||
for(auto it = document.begin(); it != document.end(); ++it) {
|
||||
// adds new key or overrides existing key from `old_doc`
|
||||
new_doc[it.key()] = it.value();
|
||||
|
||||
// if the update document contains a field that exists in old, we record that (for delete + reindex)
|
||||
bool field_exists_in_old_doc = (old_doc.count(it.key()) != 0);
|
||||
if(field_exists_in_old_doc) {
|
||||
// key exists in the stored doc, so it must be reindexed
|
||||
// we need to check for this because a field can be optional
|
||||
del_doc[it.key()] = old_doc[it.key()];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -242,6 +242,94 @@ TEST_F(CollectionAllFieldsTest, NonOptionalFieldShouldNotBeDropped) {
|
||||
add_op = coll1->add(doc.dump(), CREATE, "0", DIRTY_VALUES::COERCE_OR_DROP);
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
ASSERT_EQ("Field `points` must be an int32.", add_op.error());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionAllFieldsTest, ShouldBeAbleToUpdateSchemaDetectedDocs) {
|
||||
Collection *coll1;
|
||||
|
||||
std::vector<field> fields = {
|
||||
|
||||
};
|
||||
|
||||
coll1 = collectionManager.get_collection("coll1").get();
|
||||
if (coll1 == nullptr) {
|
||||
coll1 = collectionManager.create_collection("coll1", 4, fields, "", 0, schema_detect_types::AUTO).get();
|
||||
}
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["title"] = "FIRST";
|
||||
doc["scores"] = {100, 200, 300};
|
||||
|
||||
Option<nlohmann::json> add_op = coll1->add(doc.dump(), CREATE, "0", DIRTY_VALUES::REJECT);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
// now update both values and reinsert
|
||||
doc["title"] = "SECOND";
|
||||
doc["scores"] = {100, 250, "300", 400};
|
||||
|
||||
add_op = coll1->add(doc.dump(), UPDATE, "0", DIRTY_VALUES::COERCE_OR_DROP);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto results = coll1->search("second", {"title"}, "", {}, {}, 0, 10, 1, FREQUENCY, false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("SECOND", results["hits"][0]["document"]["title"].get<std::string>());
|
||||
ASSERT_EQ(4, results["hits"][0]["document"]["scores"].size());
|
||||
|
||||
ASSERT_EQ(100, results["hits"][0]["document"]["scores"][0].get<size_t>());
|
||||
ASSERT_EQ(250, results["hits"][0]["document"]["scores"][1].get<size_t>());
|
||||
ASSERT_EQ(300, results["hits"][0]["document"]["scores"][2].get<size_t>());
|
||||
ASSERT_EQ(400, results["hits"][0]["document"]["scores"][3].get<size_t>());
|
||||
|
||||
// insert multiple docs at the same time
|
||||
const size_t NUM_DOCS = 20;
|
||||
std::vector<std::string> json_lines;
|
||||
|
||||
for(size_t i = 0; i < NUM_DOCS; i++) {
|
||||
const std::string &i_str = std::to_string(i);
|
||||
doc["title"] = std::string("upserted ") + std::to_string(StringUtils::hash_wy(i_str.c_str(), i_str.size()));
|
||||
doc["scores"] = {i};
|
||||
doc["max"] = i;
|
||||
doc["id"] = std::to_string(i+10);
|
||||
|
||||
json_lines.push_back(doc.dump());
|
||||
}
|
||||
|
||||
nlohmann::json insert_doc;
|
||||
auto res = coll1->add_many(json_lines, insert_doc, UPSERT);
|
||||
ASSERT_TRUE(res["success"].get<bool>());
|
||||
|
||||
// now we will replace all `max` values with the same value and assert that
|
||||
json_lines.clear();
|
||||
insert_doc.clear();
|
||||
|
||||
for(size_t i = 0; i < NUM_DOCS; i++) {
|
||||
const std::string &i_str = std::to_string(i);
|
||||
doc.clear();
|
||||
doc["title"] = std::string("updated ") + std::to_string(StringUtils::hash_wy(i_str.c_str(), i_str.size()));
|
||||
doc["scores"] = {1000, 2000};
|
||||
doc["max"] = 2000;
|
||||
doc["id"] = std::to_string(i+10);
|
||||
|
||||
json_lines.push_back(doc.dump());
|
||||
}
|
||||
|
||||
res = coll1->add_many(json_lines, insert_doc, UPDATE);
|
||||
ASSERT_TRUE(res["success"].get<bool>());
|
||||
|
||||
results = coll1->search("updated", {"title"}, "", {}, {}, 0, 50, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(20, results["hits"].size());
|
||||
|
||||
for(auto& hit: results["hits"]) {
|
||||
ASSERT_EQ(2000, hit["document"]["max"].get<int>());
|
||||
ASSERT_EQ(2, hit["document"]["scores"].size());
|
||||
ASSERT_EQ(1000, hit["document"]["scores"][0].get<int>());
|
||||
ASSERT_EQ(2000, hit["document"]["scores"][1].get<int>());
|
||||
}
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionAllFieldsTest, StringifyAllValues) {
|
||||
@ -267,4 +355,61 @@ TEST_F(CollectionAllFieldsTest, StringifyAllValues) {
|
||||
ASSERT_EQ(2, results["hits"][0]["document"]["int_values"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["int_values"][0].get<std::string>());
|
||||
ASSERT_EQ("2", results["hits"][0]["document"]["int_values"][1].get<std::string>());
|
||||
}
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionAllFieldsTest, JsonFieldsToFieldsConversion) {
|
||||
nlohmann::json fields_json = nlohmann::json::array();
|
||||
nlohmann::json all_field;
|
||||
all_field[fields::name] = "*";
|
||||
all_field[fields::type] = "stringify";
|
||||
fields_json.emplace_back(all_field);
|
||||
|
||||
std::string auto_detect_schema;
|
||||
std::vector<field> fields;
|
||||
|
||||
auto parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
|
||||
|
||||
ASSERT_TRUE(parse_op.ok());
|
||||
ASSERT_EQ(1, fields.size());
|
||||
ASSERT_EQ("stringify", auto_detect_schema);
|
||||
ASSERT_EQ(true, fields[0].optional);
|
||||
ASSERT_EQ(false, fields[0].facet);
|
||||
ASSERT_EQ("*", fields[0].name);
|
||||
ASSERT_EQ("stringify", fields[0].type);
|
||||
|
||||
// reject when you try to set optional to false or facet to true
|
||||
fields_json[0][fields::optional] = false;
|
||||
parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
|
||||
|
||||
ASSERT_FALSE(parse_op.ok());
|
||||
ASSERT_EQ("Field `*` must be an optional field.", parse_op.error());
|
||||
|
||||
fields_json[0][fields::optional] = true;
|
||||
fields_json[0][fields::facet] = true;
|
||||
parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
|
||||
|
||||
ASSERT_FALSE(parse_op.ok());
|
||||
ASSERT_EQ("Field `*` cannot be a facet field.", parse_op.error());
|
||||
|
||||
fields_json[0][fields::facet] = false;
|
||||
|
||||
// can have only one "*" field
|
||||
fields_json.emplace_back(all_field);
|
||||
|
||||
parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
|
||||
|
||||
ASSERT_FALSE(parse_op.ok());
|
||||
ASSERT_EQ("There can be only one field named `*`.", parse_op.error());
|
||||
|
||||
// try with the `auto` type
|
||||
fields_json.clear();
|
||||
fields.clear();
|
||||
all_field[fields::type] = "auto";
|
||||
fields_json.emplace_back(all_field);
|
||||
|
||||
parse_op = field::json_fields_to_fields(fields_json, auto_detect_schema, fields);
|
||||
ASSERT_TRUE(parse_op.ok());
|
||||
ASSERT_EQ("auto", fields[0].type);
|
||||
}
|
||||
|
@ -169,13 +169,13 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
override_t::parse(override_json_include, "", override_include);
|
||||
|
||||
nlohmann::json override_json = {
|
||||
{"id", "exclude-rule"},
|
||||
{
|
||||
"rule", {
|
||||
{"query", "of"},
|
||||
{"match", override_t::MATCH_EXACT}
|
||||
}
|
||||
}
|
||||
{"id", "exclude-rule"},
|
||||
{
|
||||
"rule", {
|
||||
{"query", "of"},
|
||||
{"match", override_t::MATCH_EXACT}
|
||||
}
|
||||
}
|
||||
};
|
||||
override_json["excludes"] = nlohmann::json::array();
|
||||
override_json["excludes"][0] = nlohmann::json::object();
|
||||
@ -304,8 +304,9 @@ TEST_F(CollectionManagerTest, RestoreAutoSchemaDocsOnRestart) {
|
||||
ASSERT_EQ(1, coll1->get_collection_id());
|
||||
ASSERT_EQ(3, coll1->get_sort_fields().size());
|
||||
|
||||
// index a document with a bad field value with COERCE_OR_DROP setting
|
||||
auto doc_json = R"({"title": "Unique record.", "max": 25, "scores": [22, "how", 44],
|
||||
// index a document with a 2 bad field values with COERCE_OR_DROP setting
|
||||
// `title` is an integer and `average` is a string
|
||||
auto doc_json = R"({"title": 12345, "max": 25, "scores": [22, "how", 44],
|
||||
"average": "bad data", "is_valid": true})";
|
||||
|
||||
Option<nlohmann::json> add_op = coll1->add(doc_json, CREATE, "", DIRTY_VALUES::COERCE_OR_DROP);
|
||||
@ -362,11 +363,14 @@ TEST_F(CollectionManagerTest, RestoreAutoSchemaDocsOnRestart) {
|
||||
}
|
||||
|
||||
// try searching for record with bad data
|
||||
auto results = restored_coll->search("unique", {"title"}, "", {}, {}, 0, 10, 1, FREQUENCY, false).get();
|
||||
auto results = restored_coll->search("12345", {"title"}, "", {}, {}, 0, 10, 1, FREQUENCY, false).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_STREQ("Unique record.", results["hits"][0]["document"]["title"].get<std::string>().c_str());
|
||||
|
||||
// int to string conversion should be done for `title` while `average` field must be dropped
|
||||
ASSERT_STREQ("12345", results["hits"][0]["document"]["title"].get<std::string>().c_str());
|
||||
ASSERT_EQ(0, results["hits"][0]["document"].count("average"));
|
||||
|
||||
ASSERT_EQ(2, results["hits"][0]["document"]["scores"].size());
|
||||
ASSERT_EQ(22, results["hits"][0]["document"]["scores"][0]);
|
||||
ASSERT_EQ(44, results["hits"][0]["document"]["scores"][1]);
|
||||
|
@ -91,7 +91,7 @@ TEST_F(CollectionTest, VerifyCountOfDocuments) {
|
||||
ASSERT_EQ(DIRTY_VALUES::REJECT, collection->parse_dirty_values_option(empty_dirty_values));
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, MetaKeyIsNotReturnedAsDocumentField) {
|
||||
TEST_F(CollectionTest, MetaKeyChecks) {
|
||||
nlohmann::json results = collection->search("the", query_fields, "", {}, sort_fields, 0, 10).get();
|
||||
ASSERT_EQ(7, results["hits"].size());
|
||||
ASSERT_EQ(7, results["found"].get<int>());
|
||||
@ -100,6 +100,16 @@ TEST_F(CollectionTest, MetaKeyIsNotReturnedAsDocumentField) {
|
||||
nlohmann::json doc = results["hits"].at(i)["document"];
|
||||
ASSERT_EQ(0, doc.count(Collection::DOC_META_KEY));
|
||||
}
|
||||
|
||||
// don't allow a document with meta key to be indexed since it is reserved
|
||||
nlohmann::json doc;
|
||||
doc["title"] = "foo bar";
|
||||
doc["points"] = 100;
|
||||
doc[Collection::DOC_META_KEY] = "override";
|
||||
|
||||
auto op = collection->add(doc.dump());
|
||||
ASSERT_FALSE(op.ok());
|
||||
ASSERT_EQ("Document cannot contain a `$TSM$_` key.", op.error());
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, RetrieveADocumentById) {
|
||||
@ -114,6 +124,9 @@ TEST_F(CollectionTest, RetrieveADocumentById) {
|
||||
id = doc["id"];
|
||||
ASSERT_STREQ("foo", id.c_str());
|
||||
|
||||
// returned document should not have internal doc meta key
|
||||
ASSERT_EQ(0, doc.count(Collection::DOC_META_KEY));
|
||||
|
||||
doc_option = collection->get("baz");
|
||||
ASSERT_FALSE(doc_option.ok());
|
||||
}
|
||||
@ -652,7 +665,9 @@ TEST_F(CollectionTest, MultiOccurrenceString) {
|
||||
document["title"] = "The brown fox was the tallest of the lot and the quickest of the trot.";
|
||||
document["points"] = 100;
|
||||
|
||||
coll_multi_string->add(document.dump());
|
||||
auto doc = coll_multi_string->add(document.dump()).get();
|
||||
|
||||
ASSERT_EQ(0, doc.count(Collection::DOC_META_KEY));
|
||||
|
||||
query_fields = {"title"};
|
||||
nlohmann::json results = coll_multi_string->search("the", query_fields, "", {}, sort_fields, 0, 10, 1,
|
||||
|
Loading…
x
Reference in New Issue
Block a user