mirror of
https://github.com/typesense/typesense.git
synced 2025-05-16 19:55:21 +08:00
Collection operations on float fields.
This commit is contained in:
parent
a2f475d7fc
commit
e384b777a1
31
TODO.md
31
TODO.md
@ -48,18 +48,26 @@
|
||||
- ~~Fetch an individual document~~
|
||||
- ~~ID field should be a string: must validate~~
|
||||
- ~~Number of records in collection~~
|
||||
- ~~Test for asc/desc upper/lower casing~~
|
||||
- ~~Test for search without any sort_by given~~
|
||||
- ~~Test for collection creation validation~~
|
||||
- ~~Test for delete document~~
|
||||
- ~~art float search~~
|
||||
- When prefix=true, use token_ranking_field for token ordering only for last word
|
||||
- only last token should be prefix searched
|
||||
- test for token ranking on float field
|
||||
- test for float int field deletion during doc deletion
|
||||
- Prefix-search strings should not be null terminated
|
||||
- > INT32_MAX validation for float field
|
||||
- art bool support
|
||||
- Proper logging
|
||||
- Add docs/explanation around ranking calc
|
||||
- Use rocksdb batch put for atomic insertion
|
||||
- When prefix=true, use token_ranking_field for token ordering only for last word
|
||||
- Query token ids should match query token ordering
|
||||
- ID should not have "/"
|
||||
- Group results by field
|
||||
- Handle store-get() not finding a key
|
||||
- Delete using range: https://github.com/facebook/rocksdb/wiki/Delete-A-Range-Of-Keys
|
||||
- ~~Test for asc/desc upper/lower casing~~
|
||||
- ~~Test for search without any sort_by given~~
|
||||
- ~~Test for collection creation validation~~
|
||||
- ~~Test for delete document~~
|
||||
- Test for sorted_array::indexOf when length is 0
|
||||
- Test for snippets
|
||||
- Test for pagination
|
||||
@ -70,18 +78,12 @@
|
||||
- UTF-8 support for fuzzy search
|
||||
- Handle searching for non-existing fields gracefully
|
||||
- test for same match score but different primary, secondary attr
|
||||
- only last token should be prefix searched
|
||||
- Intersection without unpacking
|
||||
- Support nested fields via "."
|
||||
- Support search operators like +, - etc.
|
||||
- Prefix-search strings should not be null terminated
|
||||
- string_utils::tokenize should not have max length
|
||||
- art float search
|
||||
- Benchmark with -ffast-math
|
||||
- Space sensitivity
|
||||
- Use bitmap index instead of compressed array for doc list
|
||||
- Primary_rank_scores and secondary_rank_scores hashmaps should be combined
|
||||
- Proper logging
|
||||
- Use bitmap index instead of compressed array for doc list?
|
||||
- Primary_rank_scores and secondary_rank_scores hashmaps should be combined?
|
||||
- d-ary heap?
|
||||
|
||||
**API**
|
||||
@ -105,4 +107,5 @@
|
||||
|
||||
**Tech debt**
|
||||
|
||||
- ~~Use GLOB file pattern for CMake (better IDE refactoring support)~~
|
||||
- ~~Use GLOB file pattern for CMake (better IDE refactoring support)~~
|
||||
- DRY index_int64_field* methods
|
@ -103,14 +103,18 @@ private:
|
||||
void index_string_array_field(const std::vector<std::string> & strings, const uint32_t score, art_tree *t,
|
||||
uint32_t seq_id, const bool verbatim) const;
|
||||
|
||||
void index_int32_field(const int32_t value, uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_int32_field(const int32_t value, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_int64_field(const int64_t value, uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_int64_field(const int64_t value, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_float_field(const float value, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_int32_array_field(const std::vector<int32_t> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_int64_array_field(const std::vector<int64_t> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_float_array_field(const std::vector<float> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void remove_and_shift_offset_index(sorted_array &offset_index, const uint32_t *indices_sorted,
|
||||
const uint32_t indices_length);
|
||||
|
||||
|
@ -9,6 +9,8 @@ namespace field_types {
|
||||
static const std::string STRING = "STRING";
|
||||
static const std::string INT32 = "INT32";
|
||||
static const std::string INT64 = "INT64";
|
||||
static const std::string FLOAT = "FLOAT";
|
||||
static const std::string FLOAT_ARRAY = "FLOAT_ARRAY";
|
||||
static const std::string STRING_ARRAY = "STRING_ARRAY";
|
||||
static const std::string INT32_ARRAY = "INT32_ARRAY";
|
||||
static const std::string INT64_ARRAY = "INT64_ARRAY";
|
||||
@ -27,9 +29,17 @@ struct field {
|
||||
|
||||
}
|
||||
|
||||
bool integer() {
|
||||
return type == field_types::INT32 || type == field_types::INT32_ARRAY ||
|
||||
type == field_types::INT64 || type == field_types::INT64_ARRAY;
|
||||
bool is_integer() {
|
||||
return (type == field_types::INT32 || type == field_types::INT32_ARRAY ||
|
||||
type == field_types::INT64 || type == field_types::INT64_ARRAY);
|
||||
}
|
||||
|
||||
bool is_float() {
|
||||
return (type == field_types::FLOAT || type == field_types::FLOAT_ARRAY);
|
||||
}
|
||||
|
||||
bool is_string() {
|
||||
return (type == field_types::STRING || type == field_types::STRING_ARRAY);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -72,6 +72,28 @@ struct StringUtils {
|
||||
return escaped.str();
|
||||
}
|
||||
|
||||
// See: https://stackoverflow.com/a/19751887/131050
|
||||
static bool is_float(const std::string &s) {
|
||||
std::string::const_iterator it = s.begin();
|
||||
bool decimalPoint = false;
|
||||
int minSize = 0;
|
||||
if(s.size() > 0 && (s[0] == '-' || s[0] == '+')) {
|
||||
it++;
|
||||
minSize++;
|
||||
}
|
||||
|
||||
while(it != s.end()){
|
||||
if(*it == '.') {
|
||||
if(!decimalPoint) decimalPoint = true;
|
||||
else break;
|
||||
} else if(!std::isdigit(*it) && ((*it!='f') || it+1 != s.end() || !decimalPoint)) {
|
||||
break;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
return s.size() > minSize && it == s.end();
|
||||
}
|
||||
|
||||
// Adapted from: http://stackoverflow.com/a/2845275/131050
|
||||
static bool is_integer(const std::string &s) {
|
||||
if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) {
|
||||
|
@ -85,7 +85,7 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
|
||||
}
|
||||
|
||||
if(!token_ranking_field.empty() && !document[token_ranking_field].is_number()) {
|
||||
return Option<>(400, "Token ranking field `" + token_ranking_field + "` must be an INT32.");
|
||||
return Option<>(400, "Token ranking field `" + token_ranking_field + "` must be a number.");
|
||||
}
|
||||
|
||||
if(!token_ranking_field.empty() && document[token_ranking_field].get<int64_t>() > INT32_MAX) {
|
||||
@ -114,7 +114,7 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
|
||||
const std::string & text = document[field_name];
|
||||
index_string_field(text, points, t, seq_id, false);
|
||||
} else if(field_pair.second.type == field_types::INT32) {
|
||||
if(!document[field_name].is_number()) {
|
||||
if(!document[field_name].is_number_integer()) {
|
||||
return Option<>(400, "Search field `" + field_name + "` must be an INT32.");
|
||||
}
|
||||
|
||||
@ -125,12 +125,19 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
|
||||
uint32_t value = document[field_name];
|
||||
index_int32_field(value, points, t, seq_id);
|
||||
} else if(field_pair.second.type == field_types::INT64) {
|
||||
if(!document[field_name].is_number()) {
|
||||
if(!document[field_name].is_number_integer()) {
|
||||
return Option<>(400, "Search field `" + field_name + "` must be an INT64.");
|
||||
}
|
||||
|
||||
uint64_t value = document[field_name];
|
||||
index_int64_field(value, points, t, seq_id);
|
||||
} else if(field_pair.second.type == field_types::FLOAT) {
|
||||
if(!document[field_name].is_number_float()) {
|
||||
return Option<>(400, "Search field `" + field_name + "` must be a FLOAT.");
|
||||
}
|
||||
|
||||
float value = document[field_name];
|
||||
index_float_field(value, points, t, seq_id);
|
||||
} else if(field_pair.second.type == field_types::STRING_ARRAY) {
|
||||
if(!document[field_name].is_array()) {
|
||||
return Option<>(400, "Search field `" + field_name + "` must be a STRING_ARRAY.");
|
||||
@ -147,7 +154,7 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
|
||||
return Option<>(400, "Search field `" + field_name + "` must be an INT32_ARRAY.");
|
||||
}
|
||||
|
||||
if(document[field_name].size() > 0 && !document[field_name][0].is_number()) {
|
||||
if(document[field_name].size() > 0 && !document[field_name][0].is_number_integer()) {
|
||||
return Option<>(400, "Search field `" + field_name + "` must be an INT32_ARRAY.");
|
||||
}
|
||||
|
||||
@ -158,12 +165,23 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
|
||||
return Option<>(400, "Search field `" + field_name + "` must be an INT64_ARRAY.");
|
||||
}
|
||||
|
||||
if(document[field_name].size() > 0 && !document[field_name][0].is_number()) {
|
||||
if(document[field_name].size() > 0 && !document[field_name][0].is_number_integer()) {
|
||||
return Option<>(400, "Search field `" + field_name + "` must be an INT64_ARRAY.");
|
||||
}
|
||||
|
||||
std::vector<int64_t> values = document[field_name];
|
||||
index_int64_array_field(values, points, t, seq_id);
|
||||
} else if(field_pair.second.type == field_types::FLOAT_ARRAY) {
|
||||
if(!document[field_name].is_array()) {
|
||||
return Option<>(400, "Search field `" + field_name + "` must be an FLOAT_ARRAY.");
|
||||
}
|
||||
|
||||
if(document[field_name].size() > 0 && !document[field_name][0].is_number_float()) {
|
||||
return Option<>(400, "Search field `" + field_name + "` must be an FLOAT_ARRAY.");
|
||||
}
|
||||
|
||||
std::vector<float> values = document[field_name];
|
||||
index_float_array_field(values, points, t, seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
@ -260,6 +278,30 @@ void Collection::index_int64_field(const int64_t value, uint32_t score, art_tree
|
||||
art_insert(t, key, KEY_LEN, &art_doc, num_hits);
|
||||
}
|
||||
|
||||
void Collection::index_float_field(const float value, uint32_t score, art_tree *t, uint32_t seq_id) const {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
|
||||
encode_float(value, key);
|
||||
|
||||
uint32_t num_hits = 0;
|
||||
art_leaf* leaf = (art_leaf *) art_search(t, key, KEY_LEN);
|
||||
if(leaf != NULL) {
|
||||
num_hits = leaf->values->ids.getLength();
|
||||
}
|
||||
|
||||
num_hits += 1;
|
||||
|
||||
art_document art_doc;
|
||||
art_doc.id = seq_id;
|
||||
art_doc.score = score;
|
||||
art_doc.offsets_len = 0;
|
||||
art_doc.offsets = nullptr;
|
||||
|
||||
art_insert(t, key, KEY_LEN, &art_doc, num_hits);
|
||||
}
|
||||
|
||||
|
||||
void Collection::index_string_field(const std::string & text, const uint32_t score, art_tree *t,
|
||||
uint32_t seq_id, const bool verbatim) const {
|
||||
std::vector<std::string> tokens;
|
||||
@ -327,6 +369,13 @@ void Collection::index_int64_array_field(const std::vector<int64_t> & values, co
|
||||
}
|
||||
}
|
||||
|
||||
void Collection::index_float_array_field(const std::vector<float> & values, const float score, art_tree *t,
|
||||
uint32_t seq_id) const {
|
||||
for(const float value: values) {
|
||||
index_float_field(value, score, t, seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Collection::do_facets(std::vector<facet> & facets, uint32_t* result_ids, size_t results_size) {
|
||||
for(auto & a_facet: facets) {
|
||||
// assumed that facet fields have already been validated upstream
|
||||
@ -466,16 +515,20 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
|
||||
const std::string & raw_value = expression_parts[1];
|
||||
filter f;
|
||||
|
||||
if(_field.integer()) {
|
||||
if(_field.is_integer() || _field.is_float()) {
|
||||
// could be a single value or a list
|
||||
if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
|
||||
std::vector<std::string> filter_values;
|
||||
StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");
|
||||
|
||||
for(const std::string & filter_value: filter_values) {
|
||||
if(!StringUtils::is_integer(filter_value)) {
|
||||
if(_field.is_integer() && !StringUtils::is_integer(filter_value)) {
|
||||
return Option<>(400, "Error with field `" + _field.name + "`: Not an integer.");
|
||||
}
|
||||
|
||||
if(_field.is_float() && !StringUtils::is_float(filter_value)) {
|
||||
return Option<>(400, "Error with field `" + _field.name + "`: Not a float.");
|
||||
}
|
||||
}
|
||||
|
||||
f = {field_name, filter_values, EQUALS};
|
||||
@ -498,13 +551,17 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
|
||||
|
||||
filter_value = StringUtils::trim(filter_value);
|
||||
|
||||
if(!StringUtils::is_integer(filter_value)) {
|
||||
if(_field.is_integer() && !StringUtils::is_integer(filter_value)) {
|
||||
return Option<>(400, "Error with field `" + _field.name + "`: Not an integer.");
|
||||
}
|
||||
|
||||
if(_field.is_float() && !StringUtils::is_float(filter_value)) {
|
||||
return Option<>(400, "Error with field `" + _field.name + "`: Not a float.");
|
||||
}
|
||||
|
||||
f = {field_name, {filter_value}, op_comparator.get()};
|
||||
}
|
||||
} else {
|
||||
} else if(_field.is_string()) {
|
||||
if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
|
||||
std::vector<std::string> filter_values;
|
||||
StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");
|
||||
@ -512,6 +569,8 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
|
||||
} else {
|
||||
f = {field_name, {raw_value}, EQUALS};
|
||||
}
|
||||
} else {
|
||||
return Option<>(400, "Error with field `" + _field.name + "`: Unidentified field type.");
|
||||
}
|
||||
|
||||
filters.push_back(f);
|
||||
@ -527,7 +586,7 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
|
||||
field f = search_schema.at(a_filter.field_name);
|
||||
std::vector<const art_leaf*> leaves;
|
||||
|
||||
if(f.integer()) {
|
||||
if(f.is_integer()) {
|
||||
for(const std::string & filter_value: a_filter.values) {
|
||||
if(f.type == field_types::INT32 || f.type == field_types::INT32_ARRAY) {
|
||||
int32_t value = (int32_t) std::stoi(filter_value);
|
||||
@ -537,7 +596,12 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
|
||||
art_int64_search(t, value, a_filter.compare_operator, leaves);
|
||||
}
|
||||
}
|
||||
} else if(f.type == field_types::STRING || f.type == field_types::STRING_ARRAY) {
|
||||
} else if(f.is_float()) {
|
||||
for(const std::string & filter_value: a_filter.values) {
|
||||
float value = (float) std::atof(filter_value.c_str());
|
||||
art_float_search(t, value, a_filter.compare_operator, leaves);
|
||||
}
|
||||
} else if(f.is_string()) {
|
||||
for(const std::string & filter_value: a_filter.values) {
|
||||
art_leaf* leaf = (art_leaf *) art_search(t, (const unsigned char*) filter_value.c_str(), filter_value.length()+1);
|
||||
if(leaf != nullptr) {
|
||||
@ -1153,6 +1217,7 @@ Option<std::string> Collection::remove(const std::string & id) {
|
||||
nlohmann::json document = nlohmann::json::parse(parsed_document);
|
||||
|
||||
for(auto & name_field: search_schema) {
|
||||
// Go through all the field names and find the keys+values so that they can be removed from in-memory index
|
||||
std::vector<std::string> tokens;
|
||||
if(name_field.second.type == field_types::STRING) {
|
||||
StringUtils::split(document[name_field.first], tokens, " ");
|
||||
@ -1186,6 +1251,20 @@ Option<std::string> Collection::remove(const std::string & id) {
|
||||
encode_int64(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
}
|
||||
} else if(name_field.second.type == field_types::FLOAT) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
int64_t value = document[name_field.first].get<int64_t>();
|
||||
encode_float(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
} else if(name_field.second.type == field_types::FLOAT_ARRAY) {
|
||||
std::vector<float> values = document[name_field.first].get<std::vector<float>>();
|
||||
for(const float value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_float(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
}
|
||||
}
|
||||
|
||||
for(auto & token: tokens) {
|
||||
|
@ -22,14 +22,15 @@ int main(int argc, char* argv[]) {
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
collectionManager.init(store, "abcd");
|
||||
|
||||
Collection *collection = collectionManager.get_collection("collection");
|
||||
Collection *collection = collectionManager.get_collection("hnstories_direct");
|
||||
if(collection == nullptr) {
|
||||
collection = collectionManager.create_collection("collection", fields_to_index, {}, sort_fields);
|
||||
collection = collectionManager.create_collection("hnstories_direct", fields_to_index, {}, sort_fields);
|
||||
}
|
||||
|
||||
std::ifstream infile("/Users/kishore/Downloads/hnstories_small.jsonl");
|
||||
std::ifstream infile("/Users/kishore/Downloads/hnstories.jsonl");
|
||||
|
||||
std::string json_line;
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
|
||||
while (std::getline(infile, json_line)) {
|
||||
collection->add(json_line);
|
||||
@ -38,7 +39,7 @@ int main(int argc, char* argv[]) {
|
||||
infile.close();
|
||||
cout << "FINISHED INDEXING!" << endl << flush;
|
||||
|
||||
std::vector<std::string> search_fields = {"title"};
|
||||
/*std::vector<std::string> search_fields = {"title"};
|
||||
|
||||
std::vector<string> queries = {"the", "and", "to", "of", "in"};
|
||||
auto counter = 0;
|
||||
@ -51,10 +52,10 @@ int main(int argc, char* argv[]) {
|
||||
auto results = collection->search(queries[i], search_fields, "", { }, {sort_field("points", "DESC")}, 1, 10, 1, MAX_SCORE, 0).get();
|
||||
results_total += results.size();
|
||||
counter++;
|
||||
}
|
||||
}*/
|
||||
|
||||
long long int timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
cout << "Time taken: " << timeMillis << "ms" << endl;
|
||||
cout << "Total: " << results_total << endl;
|
||||
//cout << "Total: " << results_total << endl;
|
||||
return 0;
|
||||
}
|
@ -1138,6 +1138,11 @@ TEST(ArtTest, test_encode_float_positive) {
|
||||
ASSERT_EQ(1, results.size());
|
||||
results.clear();
|
||||
|
||||
res = art_float_search(&t, 0.0, GREATER_THAN, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(5, results.size());
|
||||
results.clear();
|
||||
|
||||
res = art_float_search(&t, 10.5678, LESS_THAN, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(4, results.size());
|
||||
@ -1153,10 +1158,20 @@ TEST(ArtTest, test_encode_float_positive) {
|
||||
ASSERT_EQ(1, results.size());
|
||||
results.clear();
|
||||
|
||||
res = art_float_search(&t, 10.4, GREATER_THAN, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(2, results.size());
|
||||
results.clear();
|
||||
|
||||
res = art_float_search(&t, 10.5678, GREATER_THAN_EQUALS, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(2, results.size());
|
||||
results.clear();
|
||||
|
||||
res = art_float_search(&t, 10, GREATER_THAN_EQUALS, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(2, results.size());
|
||||
results.clear();
|
||||
}
|
||||
|
||||
TEST(ArtTest, test_encode_float_positive_negative) {
|
||||
@ -1204,4 +1219,9 @@ TEST(ArtTest, test_encode_float_positive_negative) {
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(6, results.size());
|
||||
results.clear();
|
||||
|
||||
res = art_float_search(&t, -24, GREATER_THAN_EQUALS, results);
|
||||
ASSERT_TRUE(res == 0);
|
||||
ASSERT_EQ(5, results.size());
|
||||
results.clear();
|
||||
}
|
@ -597,6 +597,143 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, FilterOnFloatFields) {
|
||||
Collection *coll_array_fields;
|
||||
|
||||
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
||||
std::vector<field> fields = {field("name", field_types::STRING), field("age", field_types::INT32),
|
||||
field("top_3", field_types::FLOAT_ARRAY),
|
||||
field("rating", field_types::FLOAT)};
|
||||
std::vector<field> sort_fields_index = { field("rating", "FLOAT") };
|
||||
std::vector<sort_field> sort_fields_desc = { sort_field("rating", "DESC") };
|
||||
std::vector<sort_field> sort_fields_asc = { sort_field("rating", "ASC") };
|
||||
|
||||
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
||||
if(coll_array_fields == nullptr) {
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, sort_fields_index);
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
|
||||
while (std::getline(infile, json_line)) {
|
||||
coll_array_fields->add(json_line);
|
||||
}
|
||||
|
||||
infile.close();
|
||||
|
||||
// Plain search with no filters - results should be sorted by rating field DESC
|
||||
query_fields = {"name"};
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"1", "2", "4", "0", "3"};
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// Plain search with no filters - results should be sorted by rating field ASC
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields_asc, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
|
||||
ids = {"3", "0", "4", "2", "1"};
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// Searching on a float field, sorted desc by rating
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "rating:>0.0", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
ids = {"1", "2", "4", "0"};
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// Searching a float against an float array field
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "top_3:>7.8", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ids = {"1", "2"};
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// multiple filters
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "top_3:>7.8 && rating:>7.9", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ids = {"1"};
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// multiple search values (works like SQL's IN operator) against a single float field
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "rating:[1.09, 7.812]", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ids = {"2", "0"};
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// multiple search values against a float array field - also use extra padding between symbols
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "top_3 : [ 5.431, 0.001 , 7.812, 11.992]", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"2", "4", "0"};
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// when filters don't match any record, no results should be returned
|
||||
Option<nlohmann::json> results_op = coll_array_fields->search("Jeremy", query_fields, "rating:<-2.78", facets, sort_fields_desc, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_TRUE(results_op.ok());
|
||||
results = results_op.get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// rank tokens by token ranking field
|
||||
results_op = coll_array_fields->search("j", query_fields, "", facets, sort_fields_desc, 0, 10, 1, MAX_SCORE, true).get();
|
||||
ASSERT_TRUE(results_op.ok());
|
||||
results = results_op.get();
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
|
||||
ids = {"1", "2", "4", "0", "3"};
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
Collection *coll_array_fields;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
{"name": "Jeremy Howard", "age": 24, "years": [2014, 2015, 2016], "timestamps": [1390354022, 1421890022, 1453426022], "tags": ["gold", "silver"]}
|
||||
{"name": "Jeremy Howard", "age": 44, "years": [2015, 2016], "timestamps": [1421890022, 1453426022], "tags": ["gold"]}
|
||||
{"name": "Jeremy Howard", "age": 21, "years": [2016], "timestamps": [1453426022], "tags": ["bronze", "gold"]}
|
||||
{"name": "Jeremy Howard", "age": 63, "years": [1981, 1985], "timestamps": [348974822, 475205222], "tags": ["silver"]}
|
||||
{"name": "Jeremy Howard", "age": 32, "years": [1999, 2000, 2001, 2002], "timestamps": [916968422, 948504422, 980126822, 1011662822], "tags": ["silver", "gold", "bronze"]}
|
||||
{"name": "Jeremy Howard", "top_3": [1.09, 1.88, 0.001], "rating": 1.09, "age": 24, "years": [2014, 2015, 2016], "timestamps": [1390354022, 1421890022, 1453426022], "tags": ["gold", "silver"]}
|
||||
{"name": "Jeremy Howard", "top_3": [9.999, 8.89, 7.713], "rating": 9.999, "age": 44, "years": [2015, 2016], "timestamps": [1421890022, 1453426022], "tags": ["gold"]}
|
||||
{"name": "Jeremy Howard", "top_3": [7.812, 7.770, 6.66], "rating": 7.812, "age": 21, "years": [2016], "timestamps": [1453426022], "tags": ["bronze", "gold"]}
|
||||
{"name": "Jeremy Howard", "top_3": [0.0, 0.0, 0.0], "rating": 0.0, "age": 63, "years": [1981, 1985], "timestamps": [348974822, 475205222], "tags": ["silver"]}
|
||||
{"name": "Jeremy Howard", "top_3": [5.5, 5.431, 1.001], "rating": 5.5, "age": 32, "years": [1999, 2000, 2001, 2002], "timestamps": [916968422, 948504422, 980126822, 1011662822], "tags": ["silver", "gold", "bronze"]}
|
Loading…
x
Reference in New Issue
Block a user