mirror of
https://github.com/typesense/typesense.git
synced 2025-05-17 20:22:32 +08:00
Parse filter query string.
This commit is contained in:
parent
0760e4d01b
commit
96921be016
@ -46,7 +46,7 @@ target_compile_definitions(search PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/")
|
||||
target_compile_definitions(benchmark PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/")
|
||||
target_compile_definitions(typesense_test PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/")
|
||||
|
||||
target_link_libraries(typesense-server for curl h2o-evloop pthread rocksdb ssl crypto)
|
||||
target_link_libraries(typesense-server for h2o-evloop pthread rocksdb ssl crypto)
|
||||
target_link_libraries(search for pthread rocksdb)
|
||||
target_link_libraries(benchmark for pthread rocksdb)
|
||||
target_link_libraries(typesense_test pthread for rocksdb gtest gtest_main)
|
||||
|
4
TODO.md
4
TODO.md
@ -31,8 +31,10 @@
|
||||
- ~~Assumption that all tokens match for scoring is no longer true~~
|
||||
- Handle searching for non-existing fields gracefully
|
||||
- Intersection without unpacking
|
||||
- Facets
|
||||
- Filters
|
||||
- Facets
|
||||
- Iterator
|
||||
- Highlight
|
||||
- Support search operators like +, - etc.
|
||||
- Prefix-search strings should not be null terminated
|
||||
- string_utils::tokenize should not have max length
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <topster.h>
|
||||
#include <json.hpp>
|
||||
#include <field.h>
|
||||
#include <option.h>
|
||||
|
||||
class Collection {
|
||||
private:
|
||||
@ -41,7 +42,7 @@ private:
|
||||
|
||||
size_t union_of_leaf_ids(std::vector<const art_leaf *> &leaves, uint32_t **results_out);
|
||||
|
||||
uint32_t do_filtering(uint32_t** filter_ids_out, const std::vector<filter> & filters);
|
||||
Option<uint32_t> do_filtering(uint32_t** filter_ids_out, const std::string & simple_filter_str);
|
||||
|
||||
void search(uint32_t* filter_ids, size_t filter_ids_length, std::string & query, const std::string & field,
|
||||
const int num_typos, const size_t num_results, Topster<100> & topster, size_t & num_found,
|
||||
@ -92,7 +93,7 @@ public:
|
||||
|
||||
std::string add(std::string json_str);
|
||||
|
||||
nlohmann::json search(std::string query, const std::vector<std::string> fields, const std::vector<filter> filters,
|
||||
nlohmann::json search(std::string query, const std::vector<std::string> fields, const std::string & simple_filter_str,
|
||||
const int num_typos, const size_t num_results, const token_ordering token_order = FREQUENCY,
|
||||
const bool prefix = false);
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
#include <string>
|
||||
#include "art.h"
|
||||
#include "option.h"
|
||||
#include "string_utils.h"
|
||||
|
||||
namespace field_types {
|
||||
static const std::string STRING = "STRING";
|
||||
@ -24,24 +26,40 @@ struct field {
|
||||
field(std::string name, std::string type): name(name), type(type) {
|
||||
|
||||
}
|
||||
|
||||
bool integer() {
|
||||
return type == field_types::INT32 || type == field_types::INT32_ARRAY ||
|
||||
type == field_types::INT64 || type == field_types::INT64_ARRAY;
|
||||
}
|
||||
};
|
||||
|
||||
struct filter {
|
||||
std::string field_name;
|
||||
std::vector<std::string> values;
|
||||
std::string compare_operator;
|
||||
NUM_COMPARATOR compare_operator;
|
||||
|
||||
NUM_COMPARATOR get_comparator() const {
|
||||
if(compare_operator == "LESS_THAN") {
|
||||
return LESS_THAN;
|
||||
} else if(compare_operator == "LESS_THAN_EQUALS") {
|
||||
return LESS_THAN_EQUALS;
|
||||
} else if(compare_operator == "EQUALS") {
|
||||
return EQUALS;
|
||||
} else if(compare_operator == "GREATER_THAN") {
|
||||
return GREATER_THAN;
|
||||
} else {
|
||||
return GREATER_THAN_EQUALS;
|
||||
static Option<NUM_COMPARATOR> extract_num_comparator(const std::string & comp_and_value) {
|
||||
if(StringUtils::is_integer(comp_and_value)) {
|
||||
return Option<NUM_COMPARATOR>(EQUALS);
|
||||
}
|
||||
|
||||
// the ordering is important - we have to compare 2-letter operators first
|
||||
if(comp_and_value.compare(0, 2, "<=") == 0) {
|
||||
return Option<NUM_COMPARATOR>(LESS_THAN_EQUALS);
|
||||
}
|
||||
|
||||
if(comp_and_value.compare(0, 2, ">=") == 0) {
|
||||
return Option<NUM_COMPARATOR>(GREATER_THAN_EQUALS);
|
||||
}
|
||||
|
||||
if(comp_and_value.compare(0, 1, "<") == 0) {
|
||||
return Option<NUM_COMPARATOR>(LESS_THAN);
|
||||
}
|
||||
|
||||
if(comp_and_value.compare(0, 1, ">") == 0) {
|
||||
return Option<NUM_COMPARATOR>(GREATER_THAN);
|
||||
}
|
||||
|
||||
return Option<NUM_COMPARATOR>(400, "Numerical field has an invalid comparator.");
|
||||
}
|
||||
};
|
35
include/option.h
Normal file
35
include/option.h
Normal file
@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
template <typename T=uint32_t>
|
||||
class Option {
|
||||
private:
|
||||
|
||||
T value;
|
||||
bool is_ok;
|
||||
|
||||
std::string error_msg;
|
||||
uint32_t code;
|
||||
|
||||
public:
|
||||
|
||||
Option(const T & value): value(value), is_ok(true) {
|
||||
|
||||
}
|
||||
|
||||
Option(uint32_t code, const std::string & error_msg): code(code), error_msg(error_msg), is_ok(false) {
|
||||
|
||||
}
|
||||
|
||||
bool ok() {
|
||||
return is_ok;
|
||||
}
|
||||
|
||||
T get() {
|
||||
return value;
|
||||
}
|
||||
|
||||
std::string error() {
|
||||
return error_msg;
|
||||
}
|
||||
};
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
struct StringUtils {
|
||||
|
||||
@ -42,4 +43,82 @@ struct StringUtils {
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
// Adapted from: http://stackoverflow.com/a/236180/131050
|
||||
static void split(const std::string& s, std::vector<std::string> & result, const std::string& delim, const bool keep_empty = false) {
|
||||
if (delim.empty()) {
|
||||
result.push_back(s);
|
||||
return ;
|
||||
}
|
||||
std::string::const_iterator substart = s.begin(), subend;
|
||||
while (true) {
|
||||
subend = search(substart, s.end(), delim.begin(), delim.end());
|
||||
std::string temp(substart, subend);
|
||||
temp = trim(temp);
|
||||
|
||||
if (keep_empty || !temp.empty()) {
|
||||
result.push_back(temp);
|
||||
}
|
||||
if (subend == s.end()) {
|
||||
break;
|
||||
}
|
||||
substart = subend + delim.size();
|
||||
}
|
||||
}
|
||||
|
||||
// Adapted from: http://stackoverflow.com/a/36000453/131050
|
||||
static std::string & trim(std::string & str) {
|
||||
// right trim
|
||||
while (str.length () > 0 && (str [str.length ()-1] == ' ')) {
|
||||
str.erase (str.length ()-1, 1);
|
||||
}
|
||||
|
||||
// left trim
|
||||
while (str.length () > 0 && (str [0] == ' ')) {
|
||||
str.erase (0, 1);
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
// URL decoding - adapted from: http://stackoverflow.com/a/32595923/131050
|
||||
|
||||
static char from_hex(char ch) {
|
||||
return isdigit(ch) ? ch - '0' : tolower(ch) - 'a' + 10;
|
||||
}
|
||||
|
||||
static std::string url_decode(std::string text) {
|
||||
char h;
|
||||
std::ostringstream escaped;
|
||||
escaped.fill('0');
|
||||
|
||||
for (auto i = text.begin(), n = text.end(); i != n; ++i) {
|
||||
std::string::value_type c = (*i);
|
||||
|
||||
if (c == '%') {
|
||||
if (i[1] && i[2]) {
|
||||
h = from_hex(i[1]) << 4 | from_hex(i[2]);
|
||||
escaped << h;
|
||||
i += 2;
|
||||
}
|
||||
} else if (c == '+') {
|
||||
escaped << ' ';
|
||||
} else {
|
||||
escaped << c;
|
||||
}
|
||||
}
|
||||
|
||||
return escaped.str();
|
||||
}
|
||||
|
||||
// Adapted from: http://stackoverflow.com/a/2845275/131050
|
||||
static bool is_integer(const std::string &s) {
|
||||
if(s.empty() || ((!isdigit(s[0])) && (s[0] != '-') && (s[0] != '+'))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
char * p ;
|
||||
strtol(s.c_str(), &p, 10);
|
||||
return (*p == 0);
|
||||
}
|
||||
};
|
@ -266,7 +266,81 @@ size_t Collection::union_of_leaf_ids(std::vector<const art_leaf *> &leaves, uint
|
||||
return results_length;
|
||||
}
|
||||
|
||||
uint32_t Collection::do_filtering(uint32_t** filter_ids_out, const std::vector<filter> & filters) {
|
||||
Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::string & simple_filter_str) {
|
||||
// parse the filter string
|
||||
std::vector<std::string> filter_blocks;
|
||||
StringUtils::split(simple_filter_str, filter_blocks, "&&");
|
||||
|
||||
std::vector<filter> filters;
|
||||
|
||||
for(const std::string & filter_block: filter_blocks) {
|
||||
// split into [field_name, value]
|
||||
std::vector<std::string> expression_parts;
|
||||
StringUtils::split(filter_block, expression_parts, ":");
|
||||
if(expression_parts.size() != 2) {
|
||||
return Option<>(400, "Could not parse the filter query.");
|
||||
}
|
||||
|
||||
const std::string & field_name = expression_parts[0];
|
||||
if(schema.count(field_name) == 0) {
|
||||
return Option<>(400, "Could not find a filter field named `" + field_name + "` in the schema.");
|
||||
}
|
||||
|
||||
field _field = schema.at(field_name);
|
||||
const std::string & raw_value = expression_parts[1];
|
||||
filter f;
|
||||
|
||||
if(_field.integer()) {
|
||||
// could be a single value or a list
|
||||
if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
|
||||
std::vector<std::string> filter_values;
|
||||
StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");
|
||||
|
||||
for(const std::string & filter_value: filter_values) {
|
||||
if(!StringUtils::is_integer(filter_value)) {
|
||||
return Option<>(400, "Error with field `" + _field.name + "`: Not an integer.");
|
||||
}
|
||||
}
|
||||
|
||||
f = {field_name, filter_values, EQUALS};
|
||||
} else {
|
||||
Option<NUM_COMPARATOR> op_comparator = filter::extract_num_comparator(raw_value);
|
||||
if(!op_comparator.ok()) {
|
||||
return Option<>(400, "Error with field `" + _field.name + "`: " + op_comparator.error());
|
||||
}
|
||||
|
||||
// extract numerical value
|
||||
std::string filter_value;
|
||||
if(op_comparator.get() == LESS_THAN || op_comparator.get() == GREATER_THAN) {
|
||||
filter_value = raw_value.substr(1);
|
||||
} else if(op_comparator.get() == LESS_THAN_EQUALS || op_comparator.get() == GREATER_THAN_EQUALS) {
|
||||
filter_value = raw_value.substr(2);
|
||||
} else {
|
||||
// EQUALS
|
||||
filter_value = raw_value;
|
||||
}
|
||||
|
||||
filter_value = StringUtils::trim(filter_value);
|
||||
|
||||
if(!StringUtils::is_integer(filter_value)) {
|
||||
return Option<>(400, "Error with field `" + _field.name + "`: Not an integer.");
|
||||
}
|
||||
|
||||
f = {field_name, {filter_value}, op_comparator.get()};
|
||||
}
|
||||
} else {
|
||||
if(raw_value[0] == '[' && raw_value[raw_value.size() - 1] == ']') {
|
||||
std::vector<std::string> filter_values;
|
||||
StringUtils::split(raw_value.substr(1, raw_value.size() - 2), filter_values, ",");
|
||||
f = {field_name, filter_values, EQUALS};
|
||||
} else {
|
||||
f = {field_name, {raw_value}, EQUALS};
|
||||
}
|
||||
}
|
||||
|
||||
filters.push_back(f);
|
||||
}
|
||||
|
||||
uint32_t* filter_ids = nullptr;
|
||||
uint32_t filter_ids_length = 0;
|
||||
|
||||
@ -277,17 +351,14 @@ uint32_t Collection::do_filtering(uint32_t** filter_ids_out, const std::vector<f
|
||||
field f = schema.at(a_filter.field_name);
|
||||
std::vector<const art_leaf*> leaves;
|
||||
|
||||
if(f.type == field_types::INT32 || f.type == field_types::INT32_ARRAY ||
|
||||
f.type == field_types::INT64 || f.type == field_types::INT64_ARRAY) {
|
||||
if(f.integer()) {
|
||||
for(const std::string & filter_value: a_filter.values) {
|
||||
if(f.type == field_types::INT32 || f.type == field_types::INT32_ARRAY) {
|
||||
int32_t value = (int32_t) std::stoi(filter_value);
|
||||
NUM_COMPARATOR comparator = a_filter.get_comparator();
|
||||
art_int32_search(t, value, comparator, leaves);
|
||||
art_int32_search(t, value, a_filter.compare_operator, leaves);
|
||||
} else {
|
||||
int64_t value = (int64_t) std::stoi(filter_value);
|
||||
NUM_COMPARATOR comparator = a_filter.get_comparator();
|
||||
art_int64_search(t, value, comparator, leaves);
|
||||
art_int64_search(t, value, a_filter.compare_operator, leaves);
|
||||
}
|
||||
}
|
||||
} else if(f.type == field_types::STRING || f.type == field_types::STRING_ARRAY) {
|
||||
@ -316,17 +387,25 @@ uint32_t Collection::do_filtering(uint32_t** filter_ids_out, const std::vector<f
|
||||
}
|
||||
|
||||
*filter_ids_out = filter_ids;
|
||||
return filter_ids_length;
|
||||
return Option<>(filter_ids_length);
|
||||
}
|
||||
|
||||
nlohmann::json Collection::search(std::string query, const std::vector<std::string> fields, const std::vector<filter> filters,
|
||||
nlohmann::json Collection::search(std::string query, const std::vector<std::string> fields,
|
||||
const std::string & simple_filter_str,
|
||||
const int num_typos, const size_t num_results,
|
||||
const token_ordering token_order, const bool prefix) {
|
||||
size_t num_found = 0;
|
||||
nlohmann::json result = nlohmann::json::object();
|
||||
|
||||
// process the filters first
|
||||
uint32_t* filter_ids = nullptr;
|
||||
uint32_t filter_ids_length = do_filtering(&filter_ids, filters);
|
||||
Option<uint32_t> op_filter_ids_length = do_filtering(&filter_ids, simple_filter_str);
|
||||
if(!op_filter_ids_length.ok()) {
|
||||
result["error"] = op_filter_ids_length.error();
|
||||
return result;
|
||||
}
|
||||
|
||||
const uint32_t filter_ids_length = op_filter_ids_length.get();
|
||||
|
||||
// Order of `fields` are used to rank results
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
@ -336,7 +415,7 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
|
||||
Topster<100> topster;
|
||||
const std::string & field = fields[i];
|
||||
// proceed to query search only when no filters are provided or when filtering produces results
|
||||
if(filters.size() == 0 || filter_ids_length > 0) {
|
||||
if(simple_filter_str.size() == 0 || filter_ids_length > 0) {
|
||||
search(filter_ids, filter_ids_length, query, field, num_typos, num_results,
|
||||
topster, num_found, token_order, prefix);
|
||||
topster.sort();
|
||||
@ -358,7 +437,6 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
|
||||
return a.second.key > b.second.key;
|
||||
});
|
||||
|
||||
nlohmann::json result = nlohmann::json::object();
|
||||
result["hits"] = nlohmann::json::array();
|
||||
|
||||
for(auto field_order_kv: field_order_kvs) {
|
||||
|
@ -28,7 +28,7 @@
|
||||
static h2o_globalconf_t config;
|
||||
static h2o_context_t ctx;
|
||||
static h2o_accept_ctx_t accept_ctx;
|
||||
std::vector<field> search_fields = {field("title", field_types::STRING)};
|
||||
std::vector<field> search_fields = {field("title", field_types::STRING), field("points", field_types::INT32)};
|
||||
std::vector<std::string> rank_fields = {"points"};
|
||||
Store *store = new Store("/tmp/typesense-data");
|
||||
|
||||
@ -52,14 +52,18 @@ std::map<std::string, std::string> parse_query(const std::string& query) {
|
||||
|
||||
for (std::sregex_iterator i = words_begin; i != words_end; i++) {
|
||||
std::string key = (*i)[1].str();
|
||||
std::string value = (*i)[2].str();
|
||||
query_map[key] = StringUtils::replace_all(value, "%20", " ");
|
||||
std::string raw_value = (*i)[2].str();
|
||||
std::string value = StringUtils::url_decode(raw_value);
|
||||
if(query_map.count(value) == 0) {
|
||||
query_map[key] = value;
|
||||
} else {
|
||||
query_map[key] = query_map[key] + "&&" + value;
|
||||
}
|
||||
}
|
||||
|
||||
return query_map;
|
||||
}
|
||||
|
||||
|
||||
static int get_search(h2o_handler_t *self, h2o_req_t *req) {
|
||||
static h2o_generator_t generator = {NULL, NULL};
|
||||
h2o_iovec_t query = req->query_at != SIZE_MAX ?
|
||||
@ -71,6 +75,7 @@ static int get_search(h2o_handler_t *self, h2o_req_t *req) {
|
||||
const char *NUM_TYPOS = "num_typos";
|
||||
const char *PREFIX = "prefix";
|
||||
const char *TOKEN_ORDERING = "token_ordering";
|
||||
const char *FILTERS = "filters";
|
||||
|
||||
if(query_map.count(NUM_TYPOS) == 0) {
|
||||
query_map[NUM_TYPOS] = "2";
|
||||
@ -84,6 +89,9 @@ static int get_search(h2o_handler_t *self, h2o_req_t *req) {
|
||||
query_map[TOKEN_ORDERING] = "FREQUENCY";
|
||||
}
|
||||
|
||||
std::string filter_str = query_map.count(FILTERS) != 0 ? query_map[FILTERS] : "";
|
||||
std::cout << "filter_str: " << filter_str << std::endl;
|
||||
|
||||
token_ordering token_order = (query_map[TOKEN_ORDERING] == "MAX_SCORE") ? MAX_SCORE : FREQUENCY;
|
||||
|
||||
//printf("Query: %s\n", query_map["q"].c_str());
|
||||
@ -91,7 +99,7 @@ static int get_search(h2o_handler_t *self, h2o_req_t *req) {
|
||||
|
||||
std::vector<std::string> search_fields = {"title"};
|
||||
|
||||
nlohmann::json result = collection->search(query_map["q"], search_fields, {}, std::stoi(query_map[NUM_TYPOS]),
|
||||
nlohmann::json result = collection->search(query_map["q"], search_fields, filter_str, std::stoi(query_map[NUM_TYPOS]),
|
||||
100, token_order, false);
|
||||
std::string json_str = result.dump();
|
||||
//std::cout << "JSON:" << json_str << std::endl;
|
||||
|
@ -402,7 +402,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
|
||||
// Plain search with no filters - results should be sorted by rank fields
|
||||
search_fields = {"name"};
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, {}, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, "", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"3", "1", "4", "0", "2"};
|
||||
@ -415,9 +415,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// Searching on an int32 field
|
||||
std::vector<filter> filters = {(filter) {"age", {"24"}, "GREATER_THAN"}};
|
||||
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "age:>24", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"3", "1", "4"};
|
||||
@ -429,17 +427,14 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
filters = {(filter) {"age", {"24"}, "GREATER_THAN_EQUALS"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "age:>=24", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
filters = {(filter) {"age", {"24"}, "EQUALS"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "age:24", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
// Searching a number against an int32 array field
|
||||
filters = {(filter) {"years", {"2002"}, "GREATER_THAN"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "years:>2002", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"1", "0", "2"};
|
||||
@ -450,8 +445,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
filters = {(filter) {"years", {"1989"}, "LESS_THAN"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "years:<1989", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ids = {"3"};
|
||||
@ -463,8 +457,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// multiple filters
|
||||
filters = {(filter) {"years", {"2005"}, "LESS_THAN"}, (filter) {"years", {"1987"}, "GREATER_THAN"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "years:<2005 && years:>1987", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ids = {"4"};
|
||||
@ -476,8 +469,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// multiple search values (works like SQL's IN operator) against a single int field
|
||||
filters = {(filter) {"age", {"21", "24", "63"}, "EQUALS"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "age:[21, 24, 63]", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"3", "0", "2"};
|
||||
@ -488,9 +480,8 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// multiple search values against an int32 array field
|
||||
filters = {(filter) {"years", {"2015", "1985", "1999"}, "EQUALS"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
// multiple search values against an int32 array field - also use extra padding between symbols
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "years : [ 2015, 1985 , 1999]", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
ids = {"3", "1", "4", "0"};
|
||||
@ -501,10 +492,8 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// searching on an int64 array field
|
||||
filters = {(filter) {"timestamps", {"475205222"}, "GREATER_THAN"}};
|
||||
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
// searching on an int64 array field - also ensure that padded space causes no issues
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "timestamps : > 475205222", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
ids = {"1", "4", "0", "2"};
|
||||
@ -517,8 +506,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// when filters don't match any record, no results should be returned
|
||||
filters = {(filter) {"timestamps", {"1"}, "LESS_THAN"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "timestamps:<1", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
@ -547,9 +535,7 @@ TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
infile.close();
|
||||
|
||||
search_fields = {"name"};
|
||||
std::vector<filter> filters = {(filter) {"tags", {"gold"}, "EQUALS"}};
|
||||
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, "tags: gold", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"1", "4", "0", "2"};
|
||||
@ -561,9 +547,7 @@ TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
filters = {(filter) {"tags", {"bronze"}, "EQUALS"}};
|
||||
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "tags : bronze", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ids = {"4", "2"};
|
||||
@ -575,10 +559,73 @@ TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// search with a list of tags, also testing extra padding of space
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "tags: [bronze, silver]", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
ids = {"3", "4", "0", "2"};
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// should be exact matches (no normalization or fuzzy searching should happen)
|
||||
filters = {(filter) {"tags", {"BRONZE"}, "EQUALS"}};
|
||||
results = coll_array_fields->search("Jeremy", search_fields, filters, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "tags: BRONZE", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, HandleBadlyFormedFilterQuery) {
|
||||
// should not crash when filter query is malformed!
|
||||
Collection *coll_array_fields;
|
||||
|
||||
std::ifstream infile(std::string(ROOT_DIR)+"test/numeric_array_documents.jsonl");
|
||||
std::vector<field> fields = {field("name", field_types::STRING), field("age", field_types::INT32),
|
||||
field("years", field_types::INT32_ARRAY),
|
||||
field("timestamps", field_types::INT64_ARRAY),
|
||||
field("tags", field_types::STRING_ARRAY)};
|
||||
std::vector<std::string> rank_fields = {"age"};
|
||||
|
||||
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
||||
if(coll_array_fields == nullptr) {
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, rank_fields);
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
|
||||
while (std::getline(infile, json_line)) {
|
||||
coll_array_fields->add(json_line);
|
||||
}
|
||||
|
||||
infile.close();
|
||||
|
||||
search_fields = {"name"};
|
||||
|
||||
// when filter field does not exist in the schema
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", search_fields, "tagzz: gold", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// searching using a string for a numeric field
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "age: abcdef", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// searching using a string for a numeric array field
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "timestamps: abcdef", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// malformed k:v syntax
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "timestamps abcdef", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// just empty spaces
|
||||
results = coll_array_fields->search("Jeremy", search_fields, " ", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// wrapping number with quotes
|
||||
results = coll_array_fields->search("Jeremy", search_fields, "age: '21'", 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user