mirror of
https://github.com/typesense/typesense.git
synced 2025-05-25 08:17:38 +08:00
1. Move document specific actions under /documents 2. Document creation echoes the full document 3. Collections summary returns full detail on each each collection 4. Collections summary endpoint has no nested root attribute 5. When collection or document is deleted, the whole entity is returned in response
1636 lines
66 KiB
C++
1636 lines
66 KiB
C++
#include <gtest/gtest.h>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <fstream>
|
|
#include <algorithm>
|
|
#include <collection_manager.h>
|
|
#include "collection.h"
|
|
#include "person.h"
|
|
#include "number.h"
|
|
|
|
class CollectionTest : public ::testing::Test {
|
|
protected:
|
|
Collection *collection;
|
|
std::vector<std::string> query_fields;
|
|
Store *store;
|
|
CollectionManager & collectionManager = CollectionManager::get_instance();
|
|
std::vector<sort_by> sort_fields;
|
|
|
|
void setupCollection() {
|
|
std::string state_dir_path = "/tmp/typesense_test/collection";
|
|
std::cout << "Truncating and creating: " << state_dir_path << std::endl;
|
|
system(("rm -rf "+state_dir_path+" && mkdir -p "+state_dir_path).c_str());
|
|
|
|
store = new Store(state_dir_path);
|
|
collectionManager.init(store, "auth_key", "search_auth_key");
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/documents.jsonl");
|
|
std::vector<field> search_fields = {
|
|
field("title", field_types::STRING, false),
|
|
field("points", field_types::INT32, false)
|
|
};
|
|
|
|
query_fields = {"title"};
|
|
sort_fields = { sort_by("points", "DESC") };
|
|
|
|
collection = collectionManager.get_collection("collection");
|
|
if(collection == nullptr) {
|
|
collection = collectionManager.create_collection("collection", search_fields, "points").get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
// dummy record for record id 0: to make the test record IDs to match with line numbers
|
|
json_line = "{\"points\":10,\"title\":\"z\"}";
|
|
collection->add(json_line);
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
collection->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
}
|
|
|
|
virtual void SetUp() {
|
|
setupCollection();
|
|
}
|
|
|
|
virtual void TearDown() {
|
|
collectionManager.drop_collection("collection");
|
|
delete store;
|
|
}
|
|
};
|
|
|
|
TEST_F(CollectionTest, VerifyCountOfDocuments) {
|
|
// we have 1 dummy record to match the line numbers on the fixtures file with sequence numbers
|
|
ASSERT_EQ(24+1, collection->get_num_documents());
|
|
}
|
|
|
|
TEST_F(CollectionTest, RetrieveADocumentById) {
|
|
Option<nlohmann::json> doc_option = collection->get("1");
|
|
ASSERT_TRUE(doc_option.ok());
|
|
nlohmann::json doc = doc_option.get();
|
|
std::string id = doc["id"];
|
|
|
|
doc_option = collection->get("foo");
|
|
ASSERT_TRUE(doc_option.ok());
|
|
doc = doc_option.get();
|
|
id = doc["id"];
|
|
ASSERT_STREQ("foo", id.c_str());
|
|
|
|
doc_option = collection->get("baz");
|
|
ASSERT_FALSE(doc_option.ok());
|
|
}
|
|
|
|
TEST_F(CollectionTest, ExactSearchShouldBeStable) {
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = collection->search("the", query_fields, "", facets, sort_fields, 0, 10).get();
|
|
ASSERT_EQ(7, results["hits"].size());
|
|
ASSERT_EQ(7, results["found"].get<int>());
|
|
|
|
// For two documents of the same score, the larger doc_id appears first
|
|
std::vector<std::string> ids = {"1", "6", "foo", "13", "10", "8", "16"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string id = ids.at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// check ASC sorting
|
|
std::vector<sort_by> sort_fields_asc = { sort_by("points", "ASC") };
|
|
|
|
results = collection->search("the", query_fields, "", facets, sort_fields_asc, 0, 10).get();
|
|
ASSERT_EQ(7, results["hits"].size());
|
|
ASSERT_EQ(7, results["found"].get<int>());
|
|
|
|
ids = {"16", "13", "10", "8", "6", "foo", "1"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string id = ids.at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
}
|
|
|
|
TEST_F(CollectionTest, ExactPhraseSearch) {
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = collection->search("rocket launch", query_fields, "", facets, sort_fields, 0, 10).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
ASSERT_EQ(5, results["found"].get<uint32_t>());
|
|
|
|
/*
|
|
Sort by (match, diff, score)
|
|
8: score: 12, diff: 0
|
|
1: score: 15, diff: 4
|
|
17: score: 8, diff: 4
|
|
16: score: 10, diff: 5
|
|
13: score: 12, (single word match)
|
|
*/
|
|
|
|
std::vector<std::string> ids = {"8", "1", "17", "16", "13"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string id = ids.at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// Check ASC sort order
|
|
std::vector<sort_by> sort_fields_asc = { sort_by("points", "ASC") };
|
|
results = collection->search("rocket launch", query_fields, "", facets, sort_fields_asc, 0, 10).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
ASSERT_EQ(5, results["found"].get<uint32_t>());
|
|
|
|
ids = {"8", "17", "1", "16", "13"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string id = ids.at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// Check pagination
|
|
results = collection->search("rocket launch", query_fields, "", facets, sort_fields, 0, 3).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
ASSERT_EQ(5, results["found"].get<uint32_t>());
|
|
|
|
ids = {"8", "1", "17"};
|
|
|
|
for(size_t i = 0; i < 3; i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string id = ids.at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
}
|
|
|
|
TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
|
|
// Tokens that are not found in the index should be skipped
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = collection->search("DoesNotExist from", query_fields, "", facets, sort_fields, 0, 10).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"2", "17"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string id = ids.at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// with non-zero cost
|
|
results = collection->search("DoesNotExist from", query_fields, "", facets, sort_fields, 1, 10).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string id = ids.at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// with 2 indexed words
|
|
results = collection->search("from DoesNotExist insTruments", query_fields, "", facets, sort_fields, 1, 10).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
ids = {"2", "17"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string id = ids.at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results.clear();
|
|
results = collection->search("DoesNotExist1 DoesNotExist2", query_fields, "", facets, sort_fields, 0, 10).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
results.clear();
|
|
results = collection->search("DoesNotExist1 DoesNotExist2", query_fields, "", facets, sort_fields, 2, 10).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
}
|
|
|
|
TEST_F(CollectionTest, PartialPhraseSearch) {
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = collection->search("rocket research", query_fields, "", facets, sort_fields, 0, 10).get();
|
|
ASSERT_EQ(6, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"19", "1", "10", "8", "16", "17"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
}
|
|
|
|
TEST_F(CollectionTest, QueryWithTypo) {
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = collection->search("kind biologcal", query_fields, "", facets, sort_fields, 2, 3).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"19", "20", "21"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results.clear();
|
|
results = collection->search("fer thx", query_fields, "", facets, sort_fields, 1, 3).get();
|
|
ids = {"1", "10", "13"};
|
|
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
}
|
|
|
|
TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 2, 1, MAX_SCORE, false).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
std::vector<std::string> ids = {"22", "3"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 3, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
ids = {"22", "3", "12"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// Check pagination
|
|
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 1, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["found"].get<int>());
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
std::string solo_id = results["hits"].at(0)["document"]["id"];
|
|
ASSERT_STREQ("22", solo_id.c_str());
|
|
|
|
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 2, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["found"].get<int>());
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
|
|
// Check total ordering
|
|
|
|
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
ids = {"22", "3", "12", "23", "24"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 10, 1, MAX_SCORE, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
ids = {"22", "3", "12", "23", "24"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
}
|
|
|
|
TEST_F(CollectionTest, TextContainingAnActualTypo) {
|
|
// A line contains "ISX" but not "what" - need to ensure that correction to "ISS what" happens
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = collection->search("ISX what", query_fields, "", facets, sort_fields, 1, 4, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
ASSERT_EQ(9, results["found"].get<uint32_t>());
|
|
|
|
std::vector<std::string> ids = {"8", "19", "6", "21"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// Record containing exact token match should appear first
|
|
results = collection->search("ISX", query_fields, "", facets, sort_fields, 1, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(8, results["hits"].size());
|
|
ASSERT_EQ(8, results["found"].get<uint32_t>());
|
|
|
|
ids = {"20", "19", "6", "4", "3", "10", "8", "21"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
}
|
|
|
|
TEST_F(CollectionTest, Pagination) {
|
|
nlohmann::json results = collection->search("the", query_fields, "", {}, sort_fields, 0, 3, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
ASSERT_EQ(7, results["found"].get<uint32_t>());
|
|
|
|
std::vector<std::string> ids = {"1", "6", "foo"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = collection->search("the", query_fields, "", {}, sort_fields, 0, 3, 2, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
ASSERT_EQ(7, results["found"].get<uint32_t>());
|
|
|
|
ids = {"13", "10", "8"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = collection->search("the", query_fields, "", {}, sort_fields, 0, 3, 3, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
ASSERT_EQ(7, results["found"].get<uint32_t>());
|
|
|
|
ids = {"16"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
}
|
|
|
|
TEST_F(CollectionTest, PrefixSearching) {
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = collection->search("ex", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, true).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
std::vector<std::string> ids = {"6", "12"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = collection->search("ex", query_fields, "", facets, sort_fields, 0, 10, 1, MAX_SCORE, true).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
ids = {"6", "12"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = collection->search("what ex", query_fields, "", facets, sort_fields, 0, 10, 1, MAX_SCORE, true).get();
|
|
ASSERT_EQ(9, results["hits"].size());
|
|
ids = {"6", "12", "19", "22", "13", "8", "15", "24", "21"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// restrict to only 2 results and differentiate between MAX_SCORE and FREQUENCY
|
|
results = collection->search("t", query_fields, "", facets, sort_fields, 0, 2, 1, MAX_SCORE, true).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
ids = {"19", "22"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = collection->search("t", query_fields, "", facets, sort_fields, 0, 2, 1, FREQUENCY, true).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
ids = {"19", "22"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// only the last token in the query should be used for prefix search - so, "math" should not match "mathematics"
|
|
results = collection->search("math fx", query_fields, "", facets, sort_fields, 0, 1, 1, FREQUENCY, true).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
// single and double char prefixes should set a ceiling on the num_typos possible
|
|
results = collection->search("x", query_fields, "", facets, sort_fields, 2, 2, 1, FREQUENCY, true).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
results = collection->search("xq", query_fields, "", facets, sort_fields, 2, 2, 1, FREQUENCY, true).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
// prefix with a typo
|
|
results = collection->search("late propx", query_fields, "", facets, sort_fields, 2, 1, 1, FREQUENCY, true).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
ASSERT_EQ("16", results["hits"].at(0)["document"]["id"]);
|
|
}
|
|
|
|
TEST_F(CollectionTest, MultipleFields) {
|
|
Collection *coll_mul_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl");
|
|
std::vector<field> fields = {
|
|
field("title", field_types::STRING, false),
|
|
field("starring", field_types::STRING, false),
|
|
field("cast", field_types::STRING_ARRAY, false),
|
|
field("points", field_types::INT32, false)
|
|
};
|
|
|
|
coll_mul_fields = collectionManager.get_collection("coll_mul_fields");
|
|
if(coll_mul_fields == nullptr) {
|
|
coll_mul_fields = collectionManager.create_collection("coll_mul_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_mul_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
query_fields = {"title", "starring"};
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = coll_mul_fields->search("Will", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"3", "2", "1", "0"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// when "starring" takes higher priority than "title"
|
|
|
|
query_fields = {"starring", "title"};
|
|
results = coll_mul_fields->search("thomas", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
|
|
ids = {"15", "14", "12", "13"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
query_fields = {"starring", "title", "cast"};
|
|
results = coll_mul_fields->search("ben affleck", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
|
|
query_fields = {"cast"};
|
|
results = coll_mul_fields->search("chris", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
ids = {"6", "1", "7"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
query_fields = {"cast"};
|
|
results = coll_mul_fields->search("chris pine", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
ids = {"7", "6", "1"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// filtering on unfaceted multi-valued string field
|
|
query_fields = {"title"};
|
|
results = coll_mul_fields->search("captain", query_fields, "cast: chris", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
ids = {"6"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
collectionManager.drop_collection("coll_mul_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, FilterAndQueryFieldRestrictions) {
|
|
Collection *coll_mul_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl");
|
|
std::vector<field> fields = {
|
|
field("title", field_types::STRING, false),
|
|
field("starring", field_types::STRING, false),
|
|
field("cast", field_types::STRING_ARRAY, true),
|
|
field("points", field_types::INT32, false)
|
|
};
|
|
|
|
coll_mul_fields = collectionManager.get_collection("coll_mul_fields");
|
|
if(coll_mul_fields == nullptr) {
|
|
coll_mul_fields = collectionManager.create_collection("coll_mul_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_mul_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
std::vector<std::string> facets;
|
|
|
|
// query should be allowed only on non-faceted text fields
|
|
query_fields = {"cast"};
|
|
Option<nlohmann::json> result_op =
|
|
coll_mul_fields->search("anton", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false);
|
|
ASSERT_FALSE(result_op.ok());
|
|
ASSERT_EQ(400, result_op.code());
|
|
ASSERT_EQ("Field `cast` is a faceted field - it cannot be used as a query field.", result_op.error());
|
|
|
|
// filtering on string field should be possible
|
|
query_fields = {"title"};
|
|
result_op = coll_mul_fields->search("captain", query_fields, "starring: Samuel L. Jackson", facets, sort_fields, 0, 10, 1,
|
|
FREQUENCY, false);
|
|
ASSERT_EQ(true, result_op.ok());
|
|
nlohmann::json results = result_op.get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
|
|
collectionManager.drop_collection("coll_mul_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, FilterOnNumericFields) {
|
|
Collection *coll_array_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
|
std::vector<field> fields = {
|
|
field("name", field_types::STRING, false),
|
|
field("age", field_types::INT32, false),
|
|
field("years", field_types::INT32_ARRAY, false),
|
|
field("timestamps", field_types::INT64_ARRAY, false),
|
|
field("tags", field_types::STRING_ARRAY, true)
|
|
};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("age", "DESC") };
|
|
|
|
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
|
if(coll_array_fields == nullptr) {
|
|
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_array_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
// Plain search with no filters - results should be sorted by rank fields
|
|
query_fields = {"name"};
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"3", "1", "4", "0", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// Searching on an int32 field
|
|
results = coll_array_fields->search("Jeremy", query_fields, "age:>24", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
ids = {"3", "1", "4"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = coll_array_fields->search("Jeremy", query_fields, "age:>=24", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
|
|
results = coll_array_fields->search("Jeremy", query_fields, "age:24", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
|
|
// Searching a number against an int32 array field
|
|
results = coll_array_fields->search("Jeremy", query_fields, "years:>2002", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
ids = {"1", "0", "2"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = coll_array_fields->search("Jeremy", query_fields, "years:<1989", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
|
|
ids = {"3"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// multiple filters
|
|
results = coll_array_fields->search("Jeremy", query_fields, "years:<2005 && years:>1987", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
|
|
ids = {"4"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// multiple search values (works like SQL's IN operator) against a single int field
|
|
results = coll_array_fields->search("Jeremy", query_fields, "age:[21, 24, 63]", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
ids = {"3", "0", "2"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// multiple search values against an int32 array field - also use extra padding between symbols
|
|
results = coll_array_fields->search("Jeremy", query_fields, "years : [ 2015, 1985 , 1999]", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
|
|
ids = {"3", "1", "4", "0"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// searching on an int64 array field - also ensure that padded space causes no issues
|
|
results = coll_array_fields->search("Jeremy", query_fields, "timestamps : > 475205222", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
|
|
ids = {"1", "4", "0", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// when filters don't match any record, no results should be returned
|
|
results = coll_array_fields->search("Jeremy", query_fields, "timestamps:<1", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
collectionManager.drop_collection("coll_array_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, FilterOnFloatFields) {
|
|
Collection *coll_array_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
|
std::vector<field> fields = {
|
|
field("name", field_types::STRING, false),
|
|
field("age", field_types::INT32, false),
|
|
field("top_3", field_types::FLOAT_ARRAY, false),
|
|
field("rating", field_types::FLOAT, false)
|
|
};
|
|
std::vector<sort_by> sort_fields_desc = { sort_by("rating", "DESC") };
|
|
std::vector<sort_by> sort_fields_asc = { sort_by("rating", "ASC") };
|
|
|
|
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
|
if(coll_array_fields == nullptr) {
|
|
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_array_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
// Plain search with no filters - results should be sorted by rating field DESC
|
|
query_fields = {"name"};
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"1", "2", "4", "0", "3"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// Plain search with no filters - results should be sorted by rating field ASC
|
|
results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields_asc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
|
|
ids = {"3", "0", "4", "2", "1"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str()); //?
|
|
}
|
|
|
|
// Searching on a float field, sorted desc by rating
|
|
results = coll_array_fields->search("Jeremy", query_fields, "rating:>0.0", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
|
|
ids = {"1", "2", "4", "0"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// Searching a float against an float array field
|
|
results = coll_array_fields->search("Jeremy", query_fields, "top_3:>7.8", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
|
|
ids = {"1", "2"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// multiple filters
|
|
results = coll_array_fields->search("Jeremy", query_fields, "top_3:>7.8 && rating:>7.9", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
|
|
ids = {"1"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// multiple search values (works like SQL's IN operator) against a single float field
|
|
results = coll_array_fields->search("Jeremy", query_fields, "rating:[1.09, 7.812]", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
|
|
ids = {"2", "0"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// multiple search values against a float array field - also use extra padding between symbols
|
|
results = coll_array_fields->search("Jeremy", query_fields, "top_3 : [ 5.431, 0.001 , 7.812, 11.992]", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
ids = {"2", "4", "0"};
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// when filters don't match any record, no results should be returned
|
|
Option<nlohmann::json> results_op = coll_array_fields->search("Jeremy", query_fields, "rating:<-2.78", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_TRUE(results_op.ok());
|
|
results = results_op.get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
// rank tokens by token ranking field
|
|
results_op = coll_array_fields->search("j", query_fields, "", facets, sort_fields_desc, 0, 10, 1, MAX_SCORE, true).get();
|
|
ASSERT_TRUE(results_op.ok());
|
|
results = results_op.get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
|
|
ids = {"1", "2", "4", "0", "3"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
collectionManager.drop_collection("coll_array_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, SortOnFloatFields) {
|
|
Collection *coll_float_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/float_documents.jsonl");
|
|
std::vector<field> fields = {
|
|
field("title", field_types::STRING, false),
|
|
field("score", field_types::FLOAT, false),
|
|
field("average", field_types::FLOAT, false)
|
|
};
|
|
|
|
std::vector<sort_by> sort_fields_desc = { sort_by("score", "DESC"), sort_by("average", "DESC") };
|
|
|
|
coll_float_fields = collectionManager.get_collection("coll_float_fields");
|
|
if(coll_float_fields == nullptr) {
|
|
coll_float_fields = collectionManager.create_collection("coll_float_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_float_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
query_fields = {"title"};
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = coll_float_fields->search("Jeremy", query_fields, "", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(7, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"2", "0", "3", "1", "5", "4", "6"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
EXPECT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
std::vector<sort_by> sort_fields_asc = { sort_by("score", "ASC"), sort_by("average", "ASC") };
|
|
results = coll_float_fields->search("Jeremy", query_fields, "", facets, sort_fields_asc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(7, results["hits"].size());
|
|
|
|
ids = {"6", "4", "5", "1", "3", "0", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
EXPECT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// second field by desc
|
|
|
|
std::vector<sort_by> sort_fields_asc_desc = { sort_by("score", "ASC"), sort_by("average", "DESC") };
|
|
results = coll_float_fields->search("Jeremy", query_fields, "", facets, sort_fields_asc_desc, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(7, results["hits"].size());
|
|
|
|
ids = {"5", "4", "6", "1", "3", "0", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
EXPECT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
collectionManager.drop_collection("coll_float_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, QueryBoolFields) {
|
|
Collection *coll_bool;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/bool_documents.jsonl");
|
|
std::vector<field> fields = {
|
|
field("popular", field_types::BOOL, false),
|
|
field("title", field_types::STRING, false),
|
|
field("rating", field_types::FLOAT, false),
|
|
field("bool_array", field_types::BOOL_ARRAY, false),
|
|
};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("popular", "DESC"), sort_by("rating", "DESC") };
|
|
|
|
coll_bool = collectionManager.get_collection("coll_bool");
|
|
if(coll_bool == nullptr) {
|
|
coll_bool = collectionManager.create_collection("coll_bool", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_bool->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
// Plain search with no filters - results should be sorted correctly
|
|
query_fields = {"title"};
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = coll_bool->search("the", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"1", "3", "4", "9", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// Searching on a bool field
|
|
results = coll_bool->search("the", query_fields, "popular:true", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
|
|
ids = {"1", "3", "4"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = coll_bool->search("the", query_fields, "popular:false", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
|
|
ids = {"9", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// searching against a bool array field
|
|
|
|
// should be able to search only with a single boolean value
|
|
Option<nlohmann::json> res_op = coll_bool->search("the", query_fields, "bool_array:[true, false]", facets,
|
|
sort_fields, 0, 10, 1, FREQUENCY, false);
|
|
ASSERT_FALSE(res_op.ok());
|
|
|
|
results = coll_bool->search("the", query_fields, "bool_array: true", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
ids = {"1", "4", "9", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
collectionManager.drop_collection("coll_bool");
|
|
}
|
|
|
|
TEST_F(CollectionTest, FilterOnTextFields) {
|
|
Collection *coll_array_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
|
std::vector<field> fields = {
|
|
field("name", field_types::STRING, false),
|
|
field("age", field_types::INT32, false),
|
|
field("years", field_types::INT32_ARRAY, false),
|
|
field("tags", field_types::STRING_ARRAY, true)
|
|
};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("age", "DESC") };
|
|
|
|
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
|
if(coll_array_fields == nullptr) {
|
|
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_array_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
query_fields = {"name"};
|
|
std::vector<std::string> facets;
|
|
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "tags: gold", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"1", "4", "0", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
results = coll_array_fields->search("Jeremy", query_fields, "tags : bronze", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
|
|
ids = {"4", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// search with a list of tags, also testing extra padding of space
|
|
results = coll_array_fields->search("Jeremy", query_fields, "tags: [bronze, silver]", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(4, results["hits"].size());
|
|
|
|
ids = {"3", "4", "0", "2"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// need not be exact matches (normalization can happen)
|
|
results = coll_array_fields->search("Jeremy", query_fields, "tags: BrONZe", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(2, results["hits"].size());
|
|
|
|
// when comparators are used, should just treat them as part of search string
|
|
results = coll_array_fields->search("Jeremy", query_fields, "tags:<bronze", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
results = coll_array_fields->search("Jeremy", query_fields, "tags:<=BRONZE", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
results = coll_array_fields->search("Jeremy", query_fields, "tags:>BRONZE", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
collectionManager.drop_collection("coll_array_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, HandleBadlyFormedFilterQuery) {
|
|
// should not crash when filter query is malformed!
|
|
Collection *coll_array_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
|
std::vector<field> fields = {field("name", field_types::STRING, false), field("age", field_types::INT32, false),
|
|
field("years", field_types::INT32_ARRAY, false),
|
|
field("timestamps", field_types::INT64_ARRAY, false),
|
|
field("tags", field_types::STRING_ARRAY, false)};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("age", "DESC") };
|
|
|
|
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
|
if(coll_array_fields == nullptr) {
|
|
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_array_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
query_fields = {"name"};
|
|
std::vector<std::string> facets;
|
|
|
|
// when filter field does not exist in the schema
|
|
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "tagzz: gold", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
// searching using a string for a numeric field
|
|
results = coll_array_fields->search("Jeremy", query_fields, "age: abcdef", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
// searching using a string for a numeric array field
|
|
results = coll_array_fields->search("Jeremy", query_fields, "timestamps: abcdef", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
// malformed k:v syntax
|
|
results = coll_array_fields->search("Jeremy", query_fields, "timestamps abcdef", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
// just spaces - must be treated as empty filter
|
|
results = coll_array_fields->search("Jeremy", query_fields, " ", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
|
|
// wrapping number with quotes
|
|
results = coll_array_fields->search("Jeremy", query_fields, "age: '21'", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
collectionManager.drop_collection("coll_array_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, FacetCounts) {
|
|
Collection *coll_array_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
|
std::vector<field> fields = {field("name", field_types::STRING, false),
|
|
field("name_facet", field_types::STRING, true),
|
|
field("age", field_types::INT32, false),
|
|
field("years", field_types::INT32_ARRAY, false),
|
|
field("timestamps", field_types::INT64_ARRAY, false),
|
|
field("tags", field_types::STRING_ARRAY, true)};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("age", "DESC") };
|
|
|
|
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
|
if(coll_array_fields == nullptr) {
|
|
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
nlohmann::json document = nlohmann::json::parse(json_line);
|
|
document["name_facet"] = document["name"];
|
|
const std::string & patched_json_line = document.dump();
|
|
coll_array_fields->add(patched_json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
query_fields = {"name"};
|
|
std::vector<std::string> facets = {"tags"};
|
|
|
|
// single facet with no filters
|
|
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
|
|
ASSERT_EQ(1, results["facet_counts"].size());
|
|
ASSERT_EQ(2, results["facet_counts"][0].size());
|
|
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
|
|
|
|
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][0]["value"]);
|
|
ASSERT_EQ(4, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
|
|
|
ASSERT_EQ("silver", results["facet_counts"][0]["counts"][1]["value"]);
|
|
ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][1]["count"]);
|
|
|
|
ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][2]["value"]);
|
|
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][2]["count"]);
|
|
|
|
// 2 facets, 1 text filter with no filters
|
|
facets.clear();
|
|
facets.push_back("tags");
|
|
facets.push_back("name_facet");
|
|
results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
ASSERT_EQ(2, results["facet_counts"].size());
|
|
|
|
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
|
|
ASSERT_EQ("name_facet", results["facet_counts"][1]["field_name"]);
|
|
|
|
// facet value must one that's stored, not indexed (i.e. no tokenization/standardization)
|
|
ASSERT_EQ("Jeremy Howard", results["facet_counts"][1]["counts"][0]["value"]);
|
|
ASSERT_EQ(5, (int) results["facet_counts"][1]["counts"][0]["count"]);
|
|
|
|
// facet with filters
|
|
facets.clear();
|
|
facets.push_back("tags");
|
|
results = coll_array_fields->search("Jeremy", query_fields, "age: >24", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
|
|
ASSERT_EQ(3, results["hits"].size());
|
|
ASSERT_EQ(1, results["facet_counts"].size());
|
|
|
|
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
|
|
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
|
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][1]["count"]);
|
|
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][2]["count"]);
|
|
|
|
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][0]["value"]);
|
|
ASSERT_EQ("silver", results["facet_counts"][0]["counts"][1]["value"]);
|
|
ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][2]["value"]);
|
|
|
|
collectionManager.drop_collection("coll_array_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, SortingOrder) {
|
|
Collection *coll_mul_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl");
|
|
std::vector<field> fields = {field("title", field_types::STRING, false),
|
|
field("starring", field_types::STRING, false),
|
|
field("points", field_types::INT32, false),
|
|
field("cast", field_types::STRING_ARRAY, false)};
|
|
|
|
coll_mul_fields = collectionManager.get_collection("coll_mul_fields");
|
|
if(coll_mul_fields == nullptr) {
|
|
coll_mul_fields = collectionManager.create_collection("coll_mul_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_mul_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
query_fields = {"title"};
|
|
std::vector<std::string> facets;
|
|
sort_fields = { sort_by("points", "ASC") };
|
|
nlohmann::json results = coll_mul_fields->search("the", query_fields, "", facets, sort_fields, 0, 15, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(10, results["hits"].size());
|
|
|
|
std::vector<std::string> ids = {"17", "13", "10", "4", "0", "1", "8", "6", "16", "11"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// limiting results to just 5, "ASC" keyword must be case insensitive
|
|
sort_fields = { sort_by("points", "asc") };
|
|
results = coll_mul_fields->search("the", query_fields, "", facets, sort_fields, 0, 5, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(5, results["hits"].size());
|
|
|
|
ids = {"17", "13", "10", "4", "0"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// desc
|
|
|
|
sort_fields = { sort_by("points", "dEsc") };
|
|
results = coll_mul_fields->search("the", query_fields, "", facets, sort_fields, 0, 15, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(10, results["hits"].size());
|
|
|
|
ids = {"11", "16", "6", "8", "1", "0", "10", "4", "13", "17"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
// With empty list of sort_by fields:
|
|
// should be ordered desc on the seq_id, since the match score will be the same for all records.
|
|
sort_fields = { };
|
|
results = coll_mul_fields->search("the", query_fields, "", facets, sort_fields, 0, 15, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(10, results["hits"].size());
|
|
|
|
ids = {"17", "16", "13", "11", "10", "8", "6", "4", "1", "0"};
|
|
|
|
for(size_t i = 0; i < results["hits"].size(); i++) {
|
|
nlohmann::json result = results["hits"].at(i);
|
|
std::string result_id = result["document"]["id"];
|
|
std::string id = ids.at(i);
|
|
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
|
}
|
|
|
|
collectionManager.drop_collection("coll_mul_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, SearchingWithMissingFields) {
|
|
// return error without crashing when searching for fields that do not conform to the schema
|
|
Collection *coll_array_fields;
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
|
std::vector<field> fields = {field("name", field_types::STRING, false),
|
|
field("age", field_types::INT32, false),
|
|
field("years", field_types::INT32_ARRAY, false),
|
|
field("timestamps", field_types::INT64_ARRAY, false),
|
|
field("tags", field_types::STRING_ARRAY, true)};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("age", "DESC") };
|
|
|
|
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
|
if(coll_array_fields == nullptr) {
|
|
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields).get();
|
|
}
|
|
|
|
std::string json_line;
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
coll_array_fields->add(json_line);
|
|
}
|
|
|
|
infile.close();
|
|
|
|
// when a query field mentioned in schema does not exist
|
|
std::vector<std::string> facets;
|
|
std::vector<std::string> query_fields_not_found = {"titlez"};
|
|
|
|
Option<nlohmann::json> res_op = coll_array_fields->search("the", query_fields_not_found, "", facets, sort_fields, 0, 10);
|
|
ASSERT_FALSE(res_op.ok());
|
|
ASSERT_EQ(400, res_op.code());
|
|
ASSERT_STREQ("Could not find a field named `titlez` in the schema.", res_op.error().c_str());
|
|
|
|
// when a query field is an integer field
|
|
res_op = coll_array_fields->search("the", {"age"}, "", facets, sort_fields, 0, 10);
|
|
ASSERT_EQ(400, res_op.code());
|
|
ASSERT_STREQ("Field `age` should be a string or a string array.", res_op.error().c_str());
|
|
|
|
// when a facet field is not defined in the schema
|
|
res_op = coll_array_fields->search("the", {"name"}, "", {"timestamps"}, sort_fields, 0, 10);
|
|
ASSERT_EQ(400, res_op.code());
|
|
ASSERT_STREQ("Could not find a facet field named `timestamps` in the schema.", res_op.error().c_str());
|
|
|
|
// when a rank field is not defined in the schema
|
|
res_op = coll_array_fields->search("the", {"name"}, "", {}, { sort_by("timestamps", "ASC") }, 0, 10);
|
|
ASSERT_EQ(400, res_op.code());
|
|
ASSERT_STREQ("Could not find a field named `timestamps` in the schema for sorting.", res_op.error().c_str());
|
|
|
|
res_op = coll_array_fields->search("the", {"name"}, "", {}, { sort_by("_rank", "ASC") }, 0, 10);
|
|
ASSERT_EQ(400, res_op.code());
|
|
ASSERT_STREQ("Could not find a field named `_rank` in the schema for sorting.", res_op.error().c_str());
|
|
|
|
collectionManager.drop_collection("coll_array_fields");
|
|
}
|
|
|
|
TEST_F(CollectionTest, IndexingWithBadData) {
|
|
// should not crash when document to-be-indexed doesn't match schema
|
|
Collection *sample_collection;
|
|
|
|
std::vector<field> fields = {field("name", field_types::STRING, false),
|
|
field("tags", field_types::STRING_ARRAY, true),
|
|
field("age", field_types::INT32, false),
|
|
field("average", field_types::INT32, false) };
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("age", "DESC"), sort_by("average", "DESC") };
|
|
|
|
sample_collection = collectionManager.get_collection("sample_collection");
|
|
if(sample_collection == nullptr) {
|
|
sample_collection = collectionManager.create_collection("sample_collection", fields, "age").get();
|
|
}
|
|
|
|
const Option<nlohmann::json> & search_fields_missing_op1 = sample_collection->add("{\"namezz\": \"foo\", \"age\": 29, \"average\": 78}");
|
|
ASSERT_FALSE(search_fields_missing_op1.ok());
|
|
ASSERT_STREQ("Field `tags` has been declared in the schema, but is not found in the document.",
|
|
search_fields_missing_op1.error().c_str());
|
|
|
|
const Option<nlohmann::json> & search_fields_missing_op2 = sample_collection->add("{\"namez\": \"foo\", \"tags\": [], \"age\": 34, \"average\": 78}");
|
|
ASSERT_FALSE(search_fields_missing_op2.ok());
|
|
ASSERT_STREQ("Field `name` has been declared in the schema, but is not found in the document.",
|
|
search_fields_missing_op2.error().c_str());
|
|
|
|
const Option<nlohmann::json> & facet_fields_missing_op1 = sample_collection->add("{\"name\": \"foo\", \"age\": 34, \"average\": 78}");
|
|
ASSERT_FALSE(facet_fields_missing_op1.ok());
|
|
ASSERT_STREQ("Field `tags` has been declared in the schema, but is not found in the document.",
|
|
facet_fields_missing_op1.error().c_str());
|
|
|
|
const char *doc_str = "{\"name\": \"foo\", \"age\": 34, \"avg\": 78, \"tags\": [\"red\", \"blue\"]}";
|
|
const Option<nlohmann::json> & sort_fields_missing_op1 = sample_collection->add(doc_str);
|
|
ASSERT_FALSE(sort_fields_missing_op1.ok());
|
|
ASSERT_STREQ("Field `average` has been declared in the schema, but is not found in the document.",
|
|
sort_fields_missing_op1.error().c_str());
|
|
|
|
// Handle type errors
|
|
|
|
doc_str = "{\"name\": \"foo\", \"age\": 34, \"tags\": 22, \"average\": 78}";
|
|
const Option<nlohmann::json> & bad_facet_field_op = sample_collection->add(doc_str);
|
|
ASSERT_FALSE(bad_facet_field_op.ok());
|
|
ASSERT_STREQ("Field `tags` must be a string array.", bad_facet_field_op.error().c_str());
|
|
|
|
doc_str = "{\"name\": \"foo\", \"age\": 34, \"tags\": [], \"average\": 34}";
|
|
const Option<nlohmann::json> & empty_facet_field_op = sample_collection->add(doc_str);
|
|
ASSERT_TRUE(empty_facet_field_op.ok());
|
|
|
|
doc_str = "{\"name\": \"foo\", \"age\": \"34\", \"tags\": [], \"average\": 34 }";
|
|
const Option<nlohmann::json> & bad_token_ranking_field_op1 = sample_collection->add(doc_str);
|
|
ASSERT_FALSE(bad_token_ranking_field_op1.ok());
|
|
ASSERT_STREQ("Token ranking field `age` must be a number.", bad_token_ranking_field_op1.error().c_str());
|
|
|
|
doc_str = "{\"name\": \"foo\", \"age\": 343234324234233234, \"tags\": [], \"average\": 34 }";
|
|
const Option<nlohmann::json> & bad_token_ranking_field_op2 = sample_collection->add(doc_str);
|
|
ASSERT_FALSE(bad_token_ranking_field_op2.ok());
|
|
ASSERT_STREQ("Token ranking field `age` exceeds maximum value of int32.", bad_token_ranking_field_op2.error().c_str());
|
|
|
|
doc_str = "{\"name\": \"foo\", \"tags\": [], \"average\": 34 }";
|
|
const Option<nlohmann::json> & bad_token_ranking_field_op3 = sample_collection->add(doc_str);
|
|
ASSERT_FALSE(bad_token_ranking_field_op3.ok());
|
|
ASSERT_STREQ("Field `age` has been declared as a token ranking field, but is not found in the document.",
|
|
bad_token_ranking_field_op3.error().c_str());
|
|
|
|
doc_str = "{\"name\": \"foo\", \"age\": 34, \"tags\": [], \"average\": \"34\"}";
|
|
const Option<nlohmann::json> & bad_rank_field_op = sample_collection->add(doc_str);
|
|
ASSERT_FALSE(bad_rank_field_op.ok());
|
|
ASSERT_STREQ("Field `average` must be an int32.", bad_rank_field_op.error().c_str());
|
|
|
|
doc_str = "{\"name\": \"foo\", \"age\": asdadasd, \"tags\": [], \"average\": 34 }";
|
|
const Option<nlohmann::json> & bad_token_ranking_field_op4 = sample_collection->add(doc_str);
|
|
ASSERT_FALSE(bad_token_ranking_field_op4.ok());
|
|
ASSERT_STREQ("Bad JSON.", bad_token_ranking_field_op4.error().c_str());
|
|
|
|
collectionManager.drop_collection("sample_collection");
|
|
}
|
|
|
|
TEST_F(CollectionTest, EmptyIndexShouldNotCrash) {
|
|
Collection *empty_coll;
|
|
|
|
std::vector<field> fields = {field("name", field_types::STRING, false),
|
|
field("tags", field_types::STRING_ARRAY, false),
|
|
field("age", field_types::INT32, false),
|
|
field("average", field_types::INT32, false)};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("age", "DESC"), sort_by("average", "DESC") };
|
|
|
|
empty_coll = collectionManager.get_collection("empty_coll");
|
|
if(empty_coll == nullptr) {
|
|
empty_coll = collectionManager.create_collection("empty_coll", fields, "age").get();
|
|
}
|
|
|
|
nlohmann::json results = empty_coll->search("a", {"name"}, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
collectionManager.drop_collection("empty_coll");
|
|
}
|
|
|
|
TEST_F(CollectionTest, IdFieldShouldBeAString) {
|
|
Collection *coll1;
|
|
|
|
std::vector<field> fields = {field("name", field_types::STRING, false),
|
|
field("tags", field_types::STRING_ARRAY, false),
|
|
field("age", field_types::INT32, false),
|
|
field("average", field_types::INT32, false)};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("age", "DESC"), sort_by("average", "DESC") };
|
|
|
|
coll1 = collectionManager.get_collection("coll1");
|
|
if(coll1 == nullptr) {
|
|
coll1 = collectionManager.create_collection("coll1", fields, "age").get();
|
|
}
|
|
|
|
nlohmann::json doc;
|
|
doc["id"] = 101010;
|
|
doc["name"] = "Jane";
|
|
doc["age"] = 25;
|
|
doc["average"] = 98;
|
|
doc["tags"] = nlohmann::json::array();
|
|
doc["tags"].push_back("tag1");
|
|
|
|
Option<nlohmann::json> inserted_id_op = coll1->add(doc.dump());
|
|
ASSERT_FALSE(inserted_id_op.ok());
|
|
ASSERT_STREQ("Document's `id` field should be a string.", inserted_id_op.error().c_str());
|
|
|
|
collectionManager.drop_collection("coll1");
|
|
}
|
|
|
|
TEST_F(CollectionTest, AnIntegerCanBePassedToAFloatField) {
|
|
Collection *coll1;
|
|
|
|
std::vector<field> fields = {field("name", field_types::STRING, false),
|
|
field("average", field_types::FLOAT, false)};
|
|
|
|
std::vector<sort_by> sort_fields = { sort_by("average", "DESC") };
|
|
|
|
coll1 = collectionManager.get_collection("coll1");
|
|
if(coll1 == nullptr) {
|
|
coll1 = collectionManager.create_collection("coll1", fields).get();
|
|
}
|
|
|
|
nlohmann::json doc;
|
|
doc["id"] = "101010";
|
|
doc["name"] = "Jane";
|
|
doc["average"] = 98;
|
|
|
|
Option<nlohmann::json> inserted_id_op = coll1->add(doc.dump());
|
|
EXPECT_TRUE(inserted_id_op.ok());
|
|
collectionManager.drop_collection("coll1");
|
|
}
|
|
|
|
TEST_F(CollectionTest, DeletionOfADocument) {
|
|
collectionManager.drop_collection("collection");
|
|
|
|
std::ifstream infile(std::string(ROOT_DIR)+"test/documents.jsonl");
|
|
|
|
std::vector<field> search_fields = {field("title", field_types::STRING, false),
|
|
field("points", field_types::INT32, false)};
|
|
|
|
|
|
std::vector<std::string> query_fields = {"title"};
|
|
std::vector<sort_by> sort_fields = { sort_by("points", "DESC") };
|
|
|
|
Collection *collection_for_del;
|
|
collection_for_del = collectionManager.get_collection("collection_for_del");
|
|
if(collection_for_del == nullptr) {
|
|
collection_for_del = collectionManager.create_collection("collection_for_del", search_fields, "points").get();
|
|
}
|
|
|
|
std::string json_line;
|
|
rocksdb::Iterator* it;
|
|
size_t num_keys = 0;
|
|
|
|
// dummy record for record id 0: to make the test record IDs to match with line numbers
|
|
json_line = "{\"points\":10,\"title\":\"z\"}";
|
|
collection_for_del->add(json_line);
|
|
|
|
while (std::getline(infile, json_line)) {
|
|
collection_for_del->add(json_line);
|
|
}
|
|
|
|
ASSERT_EQ(25, collection_for_del->get_num_documents());
|
|
|
|
infile.close();
|
|
|
|
nlohmann::json results;
|
|
|
|
// asserts before removing any record
|
|
results = collection_for_del->search("cryogenic", query_fields, "", {}, sort_fields, 0, 5, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
|
|
it = store->get_iterator();
|
|
num_keys = 0;
|
|
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
|
num_keys += 1;
|
|
}
|
|
ASSERT_EQ(25+25+3, num_keys); // 25 records, 25 id mapping, 3 meta keys
|
|
delete it;
|
|
|
|
// actually remove a record now
|
|
collection_for_del->remove("1");
|
|
|
|
results = collection_for_del->search("cryogenic", query_fields, "", {}, sort_fields, 0, 5, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
results = collection_for_del->search("archives", query_fields, "", {}, sort_fields, 0, 5, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(1, results["hits"].size());
|
|
|
|
collection_for_del->remove("foo"); // custom id record
|
|
results = collection_for_del->search("martian", query_fields, "", {}, sort_fields, 0, 5, 1, FREQUENCY, false).get();
|
|
ASSERT_EQ(0, results["hits"].size());
|
|
|
|
// delete all records
|
|
for(int id = 0; id <= 25; id++) {
|
|
collection_for_del->remove(std::to_string(id));
|
|
}
|
|
|
|
ASSERT_EQ(0, collection_for_del->get_num_documents());
|
|
|
|
it = store->get_iterator();
|
|
num_keys = 0;
|
|
for (it->SeekToFirst(); it->Valid(); it->Next()) {
|
|
num_keys += 1;
|
|
}
|
|
delete it;
|
|
ASSERT_EQ(3, num_keys);
|
|
|
|
collectionManager.drop_collection("collection_for_del");
|
|
} |