Merge branch 'v0.24-nested' into v0.25

# Conflicts:
#	include/collection.h
#	src/collection.cpp
#	src/collection_manager.cpp
#	test/collection_faceting_test.cpp
This commit is contained in:
Kishore Nallan 2023-01-03 08:56:48 +05:30
commit b8b9fb20b3
29 changed files with 576 additions and 289 deletions

View File

@ -409,6 +409,8 @@ public:
const size_t filter_curated_hits_option = 2,
const bool prioritize_token_position = false,
const std::string& vector_query_str = "",
const bool enable_highlight_v1 = true,
const uint64_t search_time_start_us = 0,
const size_t facet_sample_percent = 100,
const size_t facet_sample_threshold = 0) const;

View File

@ -177,12 +177,11 @@ public:
static Option<bool> do_search(std::map<std::string, std::string>& req_params,
nlohmann::json& embedded_params,
std::string& results_json_str);
std::string& results_json_str,
uint64_t start_ts);
static bool parse_sort_by_str(std::string sort_by_str, std::vector<sort_by>& sort_fields);
static bool parse_vector_query_str(std::string vector_query_str, vector_query_t& vector_query);
// symlinks
Option<std::string> resolve_symlink(const std::string & symlink_name) const;

View File

@ -59,6 +59,8 @@ private:
std::atomic<bool> skip_writes;
std::atomic<int> log_slow_searches_time_ms;
protected:
Config() {
@ -80,6 +82,7 @@ protected:
this->disk_used_max_percentage = 100;
this->memory_used_max_percentage = 100;
this->skip_writes = false;
this->log_slow_searches_time_ms = 30 * 1000;
}
Config(Config const&) {
@ -142,6 +145,10 @@ public:
this->log_slow_requests_time_ms = log_slow_requests_time_ms;
}
void set_log_slow_searches_time_ms(int log_slow_searches_time_ms) {
this->log_slow_searches_time_ms = log_slow_searches_time_ms;
}
void set_healthy_read_lag(size_t healthy_read_lag) {
this->healthy_read_lag = healthy_read_lag;
}
@ -245,6 +252,10 @@ public:
return this->log_slow_requests_time_ms;
}
int get_log_slow_searches_time_ms() const {
return this->log_slow_searches_time_ms;
}
size_t get_num_collections_parallel_load() const {
return this->num_collections_parallel_load;
}
@ -364,6 +375,10 @@ public:
this->log_slow_requests_time_ms = std::stoi(get_env("TYPESENSE_LOG_SLOW_REQUESTS_TIME_MS"));
}
if(!get_env("TYPESENSE_LOG_SLOW_SEARCHES_TIME_MS").empty()) {
this->log_slow_searches_time_ms = std::stoi(get_env("TYPESENSE_LOG_SLOW_SEARCHES_TIME_MS"));
}
if(!get_env("TYPESENSE_NUM_COLLECTIONS_PARALLEL_LOAD").empty()) {
this->num_collections_parallel_load = std::stoi(get_env("TYPESENSE_NUM_COLLECTIONS_PARALLEL_LOAD"));
}
@ -513,6 +528,10 @@ public:
this->log_slow_requests_time_ms = (int) reader.GetInteger("server", "log-slow-requests-time-ms", -1);
}
if(reader.Exists("server", "log-slow-searches-time-ms")) {
this->log_slow_searches_time_ms = (int) reader.GetInteger("server", "log-slow-searches-time-ms", 30*1000);
}
if(reader.Exists("server", "num-collections-parallel-load")) {
this->num_collections_parallel_load = (int) reader.GetInteger("server", "num-collections-parallel-load", 0);
}
@ -643,6 +662,10 @@ public:
this->log_slow_requests_time_ms = options.get<int>("log-slow-requests-time-ms");
}
if(options.exist("log-slow-searches-time-ms")) {
this->log_slow_searches_time_ms = options.get<int>("log-slow-searches-time-ms");
}
if(options.exist("num-collections-parallel-load")) {
this->num_collections_parallel_load = options.get<uint32_t>("num-collections-parallel-load");
}

View File

@ -24,6 +24,7 @@ struct export_state_t: public req_state_t {
std::vector<size_t> offsets;
std::set<std::string> include_fields;
std::set<std::string> exclude_fields;
size_t export_batch_size = 100;
std::string* res_body;
bool filtered_export = false;

View File

@ -609,20 +609,6 @@ struct sort_by {
}
};
struct vector_query_t {
std::string field_name;
size_t k = 0;
size_t flat_search_cutoff = 0;
std::vector<float> values;
void _reset() {
// used for testing only
field_name.clear();
k = 0;
values.clear();
}
};
class GeoPoint {
constexpr static const double EARTH_RADIUS = 3958.75;
constexpr static const double METER_CONVERT = 1609.00;

View File

@ -261,11 +261,13 @@ struct http_req {
chunk_len(0), body(body), body_index(0), data(nullptr), ready(false),
log_index(0), is_diposed(false), client_ip(client_ip) {
start_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
if(_req != nullptr) {
const auto& tv = _req->processed_at.at;
start_ts = (tv.tv_sec * 1000 * 1000) + tv.tv_usec;
is_http_v1 = (_req->version < 0x200);
} else {
start_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
}
}
@ -279,21 +281,40 @@ struct http_req {
std::chrono::system_clock::now().time_since_epoch()).count();
uint64_t ms_since_start = (now - start_ts) / 1000;
std::string metric_identifier = http_method + " " + path_without_query;
const std::string metric_identifier = http_method + " " + path_without_query;
AppMetrics::get_instance().increment_duration(metric_identifier, ms_since_start);
AppMetrics::get_instance().increment_write_metrics(route_hash, ms_since_start);
if(config.get_log_slow_requests_time_ms() >= 0 && int(ms_since_start) >= config.get_log_slow_requests_time_ms()) {
bool log_slow_searches = config.get_log_slow_searches_time_ms() >= 0 &&
int(ms_since_start) >= config.get_log_slow_searches_time_ms() &&
(path_without_query == "/multi_search" ||
StringUtils::ends_with(path_without_query, "/documents/search"));
bool log_slow_requests = config.get_log_slow_requests_time_ms() >= 0 &&
int(ms_since_start) >= config.get_log_slow_requests_time_ms();
if(log_slow_searches || log_slow_requests) {
// log slow request if logging is enabled
std::string query_string = "?";
for(const auto& kv: params) {
if(kv.first != AUTH_HEADER) {
query_string += kv.first + "=" + kv.second + "&";
bool is_multi_search_query = (path_without_query == "/multi_search");
if(is_multi_search_query) {
StringUtils::erase_char(body, '\n');
} else {
// ignore params map of multi_search since it is mutated for every search object in the POST body
for(const auto& kv: params) {
if(kv.first != AUTH_HEADER) {
query_string += kv.first + "=" + kv.second + "&";
}
}
}
std::string full_url_path = metric_identifier + query_string;
LOG(INFO) << "SLOW REQUEST: " << "(" + std::to_string(ms_since_start) + " ms) "
<< client_ip << " " << full_url_path;
// NOTE: we log the `body` ONLY for multi-search query
LOG(INFO) << "event=slow_request, time=" << ms_since_start << " ms"
<< ", client_ip=" << client_ip << ", endpoint=" << full_url_path
<< ", body=" << (is_multi_search_query ? body : "");
}
}

View File

@ -27,6 +27,7 @@
#include "id_list.h"
#include "synonym_index.h"
#include "override.h"
#include "vector_query_ops.h"
#include "hnswlib/hnswlib.h"
static constexpr size_t ARRAY_FACET_DIM = 4;

View File

@ -68,9 +68,8 @@ bool or_iterator_t::intersect(std::vector<or_iterator_t>& its, result_iter_state
while(its.size() == it_size && its[0].valid()) {
num_processed++;
if (num_processed % 65536 == 0 &&
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
search_cutoff = true;
break;
}
@ -100,9 +99,8 @@ bool or_iterator_t::intersect(std::vector<or_iterator_t>& its, result_iter_state
while(its.size() == it_size && !at_end2(its)) {
num_processed++;
if (num_processed % 65536 == 0 &&
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
search_cutoff = true;
break;
}
@ -138,9 +136,8 @@ bool or_iterator_t::intersect(std::vector<or_iterator_t>& its, result_iter_state
while(its.size() == it_size && !at_end(its)) {
num_processed++;
if (num_processed % 65536 == 0 &&
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
search_cutoff = true;
break;
}

View File

@ -211,9 +211,8 @@ bool posting_list_t::block_intersect(std::vector<posting_list_t::iterator_t>& it
case 1:
while(its[0].valid()) {
num_processed++;
if (num_processed % 65536 == 0 &&
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
search_cutoff = true;
break;
}
@ -228,9 +227,8 @@ bool posting_list_t::block_intersect(std::vector<posting_list_t::iterator_t>& it
case 2:
while(!at_end2(its)) {
num_processed++;
if (num_processed % 65536 == 0 &&
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
search_cutoff = true;
break;
}
@ -249,9 +247,8 @@ bool posting_list_t::block_intersect(std::vector<posting_list_t::iterator_t>& it
default:
while(!at_end(its)) {
num_processed++;
if (num_processed % 65536 == 0 &&
std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) {
if (num_processed % 65536 == 0 && (std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) {
search_cutoff = true;
break;
}

View File

@ -366,6 +366,8 @@ struct StringUtils {
static void replace_all(std::string& subject, const std::string& search,
const std::string& replace);
static void erase_char(std::string& str, const char c);
static std::string trim_curly_spaces(const std::string& str);
static bool ends_with(std::string const &str, std::string const &ending);

View File

@ -4,6 +4,6 @@ extern thread_local int64_t write_log_index;
// These are used for circuit breaking search requests
// NOTE: if you fork off main search thread, care must be taken to initialize these from parent thread values
extern thread_local std::chrono::high_resolution_clock::time_point search_begin;
extern thread_local int64_t search_stop_ms;
extern thread_local uint64_t search_begin_us;
extern thread_local uint64_t search_stop_us;
extern thread_local bool search_cutoff;

View File

@ -0,0 +1,32 @@
#pragma once
#include <string>
#include <vector>
#include "option.h"
class Collection;
struct vector_query_t {
std::string field_name;
size_t k = 0;
size_t flat_search_cutoff = 0;
std::vector<float> values;
uint32_t seq_id = 0;
bool query_doc_given = false;
void _reset() {
// used for testing only
field_name.clear();
k = 0;
values.clear();
seq_id = 0;
query_doc_given = false;
}
};
class VectorQueryOps {
public:
static Option<bool> parse_vector_query_str(std::string vector_query_str, vector_query_t& vector_query,
const Collection* coll);
};

View File

@ -394,7 +394,13 @@ bool AuthManager::add_item_to_params(std::map<std::string, std::string>& req_par
if(req_params.count(item.key()) == 0) {
req_params[item.key()] = str_value;
} else if(item.key() == "filter_by") {
req_params[item.key()] = "(" + req_params[item.key()] + ") && (" + str_value + ")";
if(!req_params[item.key()].empty() && !str_value.empty()) {
req_params[item.key()] = "(" + req_params[item.key()] + ") && (" + str_value + ")";
} else if(req_params[item.key()].empty() && !str_value.empty()) {
req_params[item.key()] = "(" + str_value + ")";
} else if(!req_params[item.key()].empty() && str_value.empty()) {
req_params[item.key()] = "(" + req_params[item.key()] + ")";
}
} else if(overwrite) {
req_params[item.key()] = str_value;
}

View File

@ -15,6 +15,7 @@
#include "topster.h"
#include "logger.h"
#include "thread_local_vars.h"
#include "vector_query_ops.h"
const std::string override_t::MATCH_EXACT = "exact";
const std::string override_t::MATCH_CONTAINS = "contains";
@ -867,14 +868,18 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
const size_t filter_curated_hits_option,
const bool prioritize_token_position,
const std::string& vector_query_str,
const bool enable_highlight_v1,
const uint64_t search_time_start_us,
const size_t facet_sample_percent,
const size_t facet_sample_threshold) const {
std::shared_lock lock(mutex);
// setup thread local vars
search_stop_ms = search_stop_millis;
search_begin = std::chrono::high_resolution_clock::now();
search_stop_us = search_stop_millis * 1000;
search_begin_us = (search_time_start_us != 0) ? search_time_start_us :
std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
search_cutoff = false;
if(raw_query != "*" && raw_search_fields.empty()) {
@ -927,8 +932,9 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
return Option<nlohmann::json>(400, "Vector query is supported only on wildcard (q=*) searches.");
}
if(!CollectionManager::parse_vector_query_str(vector_query_str, vector_query)) {
return Option<nlohmann::json>(400, "The `vector_query` parameter is malformed.");
auto parse_vector_op = VectorQueryOps::parse_vector_query_str(vector_query_str, vector_query, this);
if(!parse_vector_op.ok()) {
return Option<nlohmann::json>(400, parse_vector_op.error());
}
auto vector_field_it = search_schema.find(vector_query.field_name);
@ -1491,7 +1497,11 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
}
nlohmann::json wrapper_doc;
wrapper_doc["highlights"] = nlohmann::json::array();
if(enable_highlight_v1) {
wrapper_doc["highlights"] = nlohmann::json::array();
}
std::vector<highlight_t> highlights;
StringUtils string_utils;
@ -1562,34 +1572,36 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
prune_doc(highlight_res, hfield_names, tsl::htrie_set<char>(), "");
}
std::sort(highlights.begin(), highlights.end());
if(enable_highlight_v1) {
std::sort(highlights.begin(), highlights.end());
for(const auto & highlight: highlights) {
auto field_it = search_schema.find(highlight.field);
if(field_it == search_schema.end() || field_it->nested) {
// nested field highlighting will be available only in the new highlight structure.
continue;
}
nlohmann::json h_json = nlohmann::json::object();
h_json["field"] = highlight.field;
if(!highlight.indices.empty()) {
h_json["matched_tokens"] = highlight.matched_tokens;
h_json["indices"] = highlight.indices;
h_json["snippets"] = highlight.snippets;
if(!highlight.values.empty()) {
h_json["values"] = highlight.values;
for(const auto & highlight: highlights) {
auto field_it = search_schema.find(highlight.field);
if(field_it == search_schema.end() || field_it->nested) {
// nested field highlighting will be available only in the new highlight structure.
continue;
}
} else {
h_json["matched_tokens"] = highlight.matched_tokens[0];
h_json["snippet"] = highlight.snippets[0];
if(!highlight.values.empty() && !highlight.values[0].empty()) {
h_json["value"] = highlight.values[0];
}
}
wrapper_doc["highlights"].push_back(h_json);
nlohmann::json h_json = nlohmann::json::object();
h_json["field"] = highlight.field;
if(!highlight.indices.empty()) {
h_json["matched_tokens"] = highlight.matched_tokens;
h_json["indices"] = highlight.indices;
h_json["snippets"] = highlight.snippets;
if(!highlight.values.empty()) {
h_json["values"] = highlight.values;
}
} else {
h_json["matched_tokens"] = highlight.matched_tokens[0];
h_json["snippet"] = highlight.snippets[0];
if(!highlight.values.empty() && !highlight.values[0].empty()) {
h_json["value"] = highlight.values[0];
}
}
wrapper_doc["highlights"].push_back(h_json);
}
}
//wrapper_doc["seq_id"] = (uint32_t) field_order_kv->key;
@ -1654,8 +1666,8 @@ Option<nlohmann::json> Collection::search(const std::string & raw_query,
facet_result["counts"] = nlohmann::json::array();
std::vector<facet_value_t> facet_values;
std::vector<std::pair<int64_t, facet_count_t>> facet_hash_counts;
std::vector<std::pair<uint64_t, facet_count_t>> facet_hash_counts;
for (const auto & kv : a_facet.result_map) {
facet_hash_counts.emplace_back(kv);
}

View File

@ -630,7 +630,9 @@ Option<bool> add_unsigned_int_list_param(const std::string& param_name, const st
Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& req_params,
nlohmann::json& embedded_params,
std::string& results_json_str) {
std::string& results_json_str,
uint64_t start_ts) {
auto begin = std::chrono::high_resolution_clock::now();
const char *NUM_TYPOS = "num_typos";
@ -695,6 +697,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
const char *EXHAUSTIVE_SEARCH = "exhaustive_search";
const char *SPLIT_JOIN_TOKENS = "split_join_tokens";
const char *ENABLE_HIGHLIGHT_V1 = "enable_highlight_v1";
const char *FACET_SAMPLE_PERCENT = "facet_sample_percent";
const char *FACET_SAMPLE_THRESHOLD = "facet_sample_threshold";
@ -767,12 +771,13 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
size_t filter_curated_hits_option = 2;
std::string highlight_fields;
bool exhaustive_search = false;
size_t search_cutoff_ms = 3600000;
size_t search_cutoff_ms = 30 * 1000;
enable_t split_join_tokens = fallback;
size_t max_candidates = 0;
std::vector<enable_t> infixes;
size_t max_extra_prefix = INT16_MAX;
size_t max_extra_suffix = INT16_MAX;
bool enable_highlight_v1 = true;
size_t facet_sample_percent = 100;
size_t facet_sample_threshold = 0;
@ -817,6 +822,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
{PRE_SEGMENTED_QUERY, &pre_segmented_query},
{EXHAUSTIVE_SEARCH, &exhaustive_search},
{ENABLE_OVERRIDES, &enable_overrides},
{ENABLE_HIGHLIGHT_V1, &enable_highlight_v1},
};
std::unordered_map<std::string, std::vector<std::string>*> str_list_values = {
@ -990,6 +996,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
filter_curated_hits_option,
prioritize_token_position,
vector_query,
enable_highlight_v1,
start_ts,
facet_sample_percent,
facet_sample_threshold
);
@ -1237,6 +1245,7 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
size_t num_found_docs = 0;
size_t num_valid_docs = 0;
size_t num_indexed_docs = 0;
size_t batch_doc_str_size = 0;
auto begin = std::chrono::high_resolution_clock::now();
@ -1245,14 +1254,17 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
const uint32_t seq_id = Collection::get_seq_id_from_key(iter->key().ToString());
nlohmann::json document;
const std::string& doc_string = iter->value().ToString();
try {
document = nlohmann::json::parse(iter->value().ToString());
document = nlohmann::json::parse(doc_string);
} catch(const std::exception& e) {
LOG(ERROR) << "JSON error: " << e.what();
return Option<bool>(400, "Bad JSON.");
}
batch_doc_str_size += doc_string.size();
if(collection->get_enable_nested_fields()) {
std::vector<field> flattened_fields;
field::flatten_doc(document, collection->get_nested_fields(), true, flattened_fields);
@ -1269,10 +1281,14 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
iter->Next();
bool last_record = !(iter->Valid() && iter->key().starts_with(seq_id_prefix));
// if expected memory usage exceeds 250M, we index the accumulated set without caring about batch size
bool exceeds_batch_mem_threshold = ((batch_doc_str_size * 7) > (250 * 1014 * 1024));
// batch must match atleast the number of shards
if((num_valid_docs % batch_size == 0) || last_record) {
if(exceeds_batch_mem_threshold || (num_valid_docs % batch_size == 0) || last_record) {
size_t num_records = index_records.size();
size_t num_indexed = collection->batch_index_in_memory(index_records);
batch_doc_str_size = 0;
if(num_indexed != num_records) {
const Option<std::string> & index_error_op = get_first_index_error(index_records);
@ -1413,112 +1429,3 @@ Option<Collection*> CollectionManager::clone_collection(const string& existing_n
return Option<Collection*>(new_coll);
}
bool CollectionManager::parse_vector_query_str(std::string vector_query_str, vector_query_t& vector_query) {
// FORMAT:
// field_name([0.34, 0.66, 0.12, 0.68], exact: false, k: 10)
size_t i = 0;
while(i < vector_query_str.size()) {
if(vector_query_str[i] != ':') {
vector_query.field_name += vector_query_str[i];
i++;
} else {
if(vector_query_str[i] != ':') {
// missing ":"
return false;
}
// field name is done
i++;
StringUtils::trim(vector_query.field_name);
while(i < vector_query_str.size() && vector_query_str[i] != '(') {
i++;
}
if(vector_query_str[i] != '(') {
// missing "("
return false;
}
i++;
while(i < vector_query_str.size() && vector_query_str[i] != '[') {
i++;
}
if(vector_query_str[i] != '[') {
// missing opening "["
return false;
}
i++;
std::string values_str;
while(i < vector_query_str.size() && vector_query_str[i] != ']') {
values_str += vector_query_str[i];
i++;
}
if(vector_query_str[i] != ']') {
// missing closing "]"
return false;
}
i++;
std::vector<std::string> svalues;
StringUtils::split(values_str, svalues, ",");
for(auto& svalue: svalues) {
if(!StringUtils::is_float(svalue)) {
return false;
}
vector_query.values.push_back(std::stof(svalue));
}
if(i == vector_query_str.size()-1) {
// missing params
return true;
}
std::string param_str = vector_query_str.substr(i, (vector_query_str.size() - i));
std::vector<std::string> param_kvs;
StringUtils::split(param_str, param_kvs, ",");
for(auto& param_kv_str: param_kvs) {
if(param_kv_str.back() == ')') {
param_kv_str.pop_back();
}
std::vector<std::string> param_kv;
StringUtils::split(param_kv_str, param_kv, ":");
if(param_kv.size() != 2) {
return false;
}
if(param_kv[0] == "k") {
if(!StringUtils::is_uint32_t(param_kv[1])) {
return false;
}
vector_query.k = std::stoul(param_kv[1]);
}
if(param_kv[0] == "flat_search_cutoff") {
if(!StringUtils::is_uint32_t(param_kv[1])) {
return false;
}
vector_query.flat_search_cutoff = std::stoi(param_kv[1]);
}
}
return true;
}
}
return false;
}

View File

@ -376,7 +376,8 @@ bool get_search(const std::shared_ptr<http_req>& req, const std::shared_ptr<http
}
std::string results_json_str;
Option<bool> search_op = CollectionManager::do_search(req->params, req->embedded_params_vec[0], results_json_str);
Option<bool> search_op = CollectionManager::do_search(req->params, req->embedded_params_vec[0],
results_json_str, req->start_ts);
if(!search_op.ok()) {
res->set(search_op.code(), search_op.error());
@ -523,7 +524,8 @@ bool post_multi_search(const std::shared_ptr<http_req>& req, const std::shared_p
}
std::string results_json_str;
Option<bool> search_op = CollectionManager::do_search(req->params, req->embedded_params_vec[i], results_json_str);
Option<bool> search_op = CollectionManager::do_search(req->params, req->embedded_params_vec[i],
results_json_str, req->start_ts);
if(search_op.ok()) {
response["results"].push_back(nlohmann::json::parse(results_json_str));
@ -588,6 +590,7 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
const char* FILTER_BY = "filter_by";
const char* INCLUDE_FIELDS = "include_fields";
const char* EXCLUDE_FIELDS = "exclude_fields";
const char* BATCH_SIZE = "batch_size";
export_state_t* export_state = nullptr;
@ -617,6 +620,10 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
export_state->exclude_fields = std::set<std::string>(exclude_fields_vec.begin(), exclude_fields_vec.end());
}
if(req->params.count(BATCH_SIZE) != 0 && StringUtils::is_uint32_t(req->params[BATCH_SIZE])) {
export_state->export_batch_size = std::stoul(req->params[BATCH_SIZE]);
}
if(simple_filter_query.empty()) {
export_state->iter_upper_bound_key = collection->get_seq_id_collection_prefix() + "`"; // cannot inline this
export_state->iter_upper_bound = new rocksdb::Slice(export_state->iter_upper_bound_key);
@ -644,10 +651,12 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
if(export_state->it != nullptr) {
rocksdb::Iterator* it = export_state->it;
size_t batch_counter = 0;
res->body.clear();
if(it->Valid() && it->key().ToString().compare(0, seq_id_prefix.size(), seq_id_prefix) == 0) {
while(it->Valid() && it->key().ToString().compare(0, seq_id_prefix.size(), seq_id_prefix) == 0) {
if(export_state->include_fields.empty() && export_state->exclude_fields.empty()) {
res->body = it->value().ToString();
res->body += it->value().ToString();
} else {
nlohmann::json doc = nlohmann::json::parse(it->value().ToString());
nlohmann::json filtered_doc;
@ -663,7 +672,7 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
}
}
res->body = filtered_doc.dump();
res->body += filtered_doc.dump();
}
it->Next();
@ -677,10 +686,15 @@ bool get_export_documents(const std::shared_ptr<http_req>& req, const std::share
req->last_chunk_aggregate = true;
res->final = true;
}
batch_counter++;
if(batch_counter == export_state->export_batch_size) {
break;
}
}
} else {
bool done;
stateful_export_docs(export_state, 100, done);
stateful_export_docs(export_state, export_state->export_batch_size, done);
if(!done) {
req->last_chunk_aggregate = false;

View File

@ -474,8 +474,6 @@ int HttpServer::catch_all_handler(h2o_handler_t *_h2o_handler, h2o_req_t *req) {
}
}
std::shared_ptr<http_req> request = std::make_shared<http_req>(req, rpath->http_method, path_without_query,
route_hash, query_map, embedded_params_vec,
api_auth_key_sent, body, client_ip);

View File

@ -24,17 +24,17 @@
#include <timsort.hpp>
#include "logger.h"
#define RETURN_CIRCUIT_BREAKER if(std::chrono::duration_cast<std::chrono::milliseconds>(\
std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) { \
search_cutoff = true; \
return ;\
}
#define RETURN_CIRCUIT_BREAKER if((std::chrono::duration_cast<std::chrono::microseconds>( \
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) { \
search_cutoff = true; \
return ;\
}
#define BREAK_CIRCUIT_BREAKER if(std::chrono::duration_cast<std::chrono::milliseconds>(\
std::chrono::high_resolution_clock::now() - search_begin).count() > search_stop_ms) { \
search_cutoff = true; \
break;\
}
#define BREAK_CIRCUIT_BREAKER if((std::chrono::duration_cast<std::chrono::microseconds>( \
std::chrono::system_clock::now().time_since_epoch()).count() - search_begin_us) > search_stop_us) { \
search_cutoff = true; \
break;\
}
spp::sparse_hash_map<uint32_t, int64_t> Index::text_match_sentinel_value;
spp::sparse_hash_map<uint32_t, int64_t> Index::seq_id_sentinel_value;
@ -1171,16 +1171,14 @@ void Index::tokenize_string_array_with_facets(const std::vector<std::string>& st
}
}
//LOG(INFO) << "Str: " << str << ", last_token: " << last_token;
if(is_facet) {
facet_hashes.push_back(facet_hash);
}
if(token_set.empty()) {
continue;
}
if(is_facet) {
facet_hashes.push_back(facet_hash);
}
for(auto& the_token: token_set) {
// repeat last element to indicate end of offsets for this array index
token_to_offsets[the_token].push_back(token_to_offsets[the_token].back());
@ -2362,8 +2360,8 @@ void Index::search_infix(const std::string& query, const std::string& field_name
auto search_tree = search_index.at(field_name);
const auto parent_search_begin = search_begin;
const auto parent_search_stop_ms = search_stop_ms;
const auto parent_search_begin = search_begin_us;
const auto parent_search_stop_ms = search_stop_us;
auto parent_search_cutoff = search_cutoff;
for(auto infix_set: infix_sets) {
@ -2371,7 +2369,7 @@ void Index::search_infix(const std::string& query, const std::string& field_name
&num_processed, &m_process, &cv_process,
&parent_search_begin, &parent_search_stop_ms, &parent_search_cutoff]() {
search_begin = parent_search_begin;
search_begin_us = parent_search_begin;
search_cutoff = parent_search_cutoff;
auto op_search_stop_ms = parent_search_stop_ms/2;
@ -2396,8 +2394,8 @@ void Index::search_infix(const std::string& query, const std::string& field_name
// check for search cutoff but only once every 2^10 docs to reduce overhead
if(((num_iterated + 1) % (1 << 12)) == 0) {
if (std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - search_begin).count() > op_search_stop_ms) {
if ((std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().
time_since_epoch()).count() - search_begin_us) > op_search_stop_ms) {
search_cutoff = true;
break;
}
@ -2596,6 +2594,11 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
for (const auto& dist_label : dist_labels) {
uint32 seq_id = dist_label.second;
if(vector_query.query_doc_given && vector_query.seq_id == seq_id) {
continue;
}
uint64_t distinct_id = seq_id;
if (group_limit != 0) {
distinct_id = get_distinct_id(group_by_fields, seq_id);
@ -4386,8 +4389,8 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
size_t num_queued = 0;
size_t filter_index = 0;
const auto parent_search_begin = search_begin;
const auto parent_search_stop_ms = search_stop_ms;
const auto parent_search_begin = search_begin_us;
const auto parent_search_stop_ms = search_stop_us;
auto parent_search_cutoff = search_cutoff;
for(size_t thread_id = 0; thread_id < num_threads && filter_index < filter_ids_length; thread_id++) {
@ -4412,8 +4415,8 @@ void Index::search_wildcard(filter_node_t const* const& filter_tree_root,
batch_result_ids, batch_res_len,
&num_processed, &m_process, &cv_process]() {
search_begin = parent_search_begin;
search_stop_ms = parent_search_stop_ms;
search_begin_us = parent_search_begin;
search_stop_us = parent_search_stop_ms;
search_cutoff = parent_search_cutoff;
size_t filter_index = 0;

View File

@ -217,6 +217,10 @@ void StringUtils::replace_all(std::string& subject, const std::string& search, c
}
}
void StringUtils::erase_char(std::string& str, const char c) {
str.erase(std::remove(str.begin(), str.end(), c), str.cend());
}
std::string StringUtils::trim_curly_spaces(const std::string& str) {
std::string left_trimmed;
int i = 0;

View File

@ -262,9 +262,9 @@ nlohmann::json synonym_t::to_view_json() const {
}
if(!symbols.empty()) {
obj["symbols"] = nlohmann::json::array();
obj["symbols_to_index"] = nlohmann::json::array();
for(char c: symbols) {
obj["symbols"].push_back(std::string(1, c));
obj["symbols_to_index"].push_back(std::string(1, c));
}
}

View File

@ -2,6 +2,6 @@
#include "thread_local_vars.h"
thread_local int64_t write_log_index = 0;
thread_local std::chrono::high_resolution_clock::time_point search_begin;
thread_local int64_t search_stop_ms;
thread_local uint64_t search_begin_us;
thread_local uint64_t search_stop_us;
thread_local bool search_cutoff = false;

View File

@ -105,6 +105,8 @@ void init_cmdline_options(cmdline::parser & options, int argc, char **argv) {
options.add<int>("memory-used-max-percentage", '\0', "Reject writes when memory usage exceeds this percentage. Default: 100 (never reject).", false, 100);
options.add<bool>("skip-writes", '\0', "Skip all writes except config changes. Default: false.", false, false);
options.add<int>("log-slow-searches-time-ms", '\0', "When >= 0, searches that take longer than this duration are logged.", false, 30*1000);
// DEPRECATED
options.add<std::string>("listen-address", 'h', "[DEPRECATED: use `api-address`] Address to which Typesense API service binds.", false, "0.0.0.0");
options.add<uint32_t>("listen-port", 'p', "[DEPRECATED: use `api-port`] Port on which Typesense API service listens.", false, 8108);

159
src/vector_query_ops.cpp Normal file
View File

@ -0,0 +1,159 @@
#include "vector_query_ops.h"
#include "string_utils.h"
#include "collection.h"
Option<bool> VectorQueryOps::parse_vector_query_str(std::string vector_query_str, vector_query_t& vector_query,
const Collection* coll) {
// FORMAT:
// field_name([0.34, 0.66, 0.12, 0.68], exact: false, k: 10)
size_t i = 0;
while(i < vector_query_str.size()) {
if(vector_query_str[i] != ':') {
vector_query.field_name += vector_query_str[i];
i++;
} else {
if(vector_query_str[i] != ':') {
// missing ":"
return Option<bool>(400, "Malformed vector query string: `:` is missing.");
}
// field name is done
i++;
StringUtils::trim(vector_query.field_name);
while(i < vector_query_str.size() && vector_query_str[i] != '(') {
i++;
}
if(vector_query_str[i] != '(') {
// missing "("
return Option<bool>(400, "Malformed vector query string.");
}
i++;
while(i < vector_query_str.size() && vector_query_str[i] != '[') {
i++;
}
if(vector_query_str[i] != '[') {
// missing opening "["
return Option<bool>(400, "Malformed vector query string.");
}
i++;
std::string values_str;
while(i < vector_query_str.size() && vector_query_str[i] != ']') {
values_str += vector_query_str[i];
i++;
}
if(vector_query_str[i] != ']') {
// missing closing "]"
return Option<bool>(400, "Malformed vector query string.");
}
i++;
std::vector<std::string> svalues;
StringUtils::split(values_str, svalues, ",");
for(auto& svalue: svalues) {
if(!StringUtils::is_float(svalue)) {
return Option<bool>(400, "Malformed vector query string: one of the vector values is not a float.");
}
vector_query.values.push_back(std::stof(svalue));
}
if(i == vector_query_str.size()-1) {
// missing params
if(vector_query.values.empty()) {
// when query values are missing, atleast the `id` parameter must be present
return Option<bool>(400, "When a vector query value is empty, an `id` parameter must be present.");
}
return Option<bool>(true);
}
std::string param_str = vector_query_str.substr(i, (vector_query_str.size() - i));
std::vector<std::string> param_kvs;
StringUtils::split(param_str, param_kvs, ",");
for(auto& param_kv_str: param_kvs) {
if(param_kv_str.back() == ')') {
param_kv_str.pop_back();
}
std::vector<std::string> param_kv;
StringUtils::split(param_kv_str, param_kv, ":");
if(param_kv.size() != 2) {
return Option<bool>(400, "Malformed vector query string.");
}
if(param_kv[0] == "id") {
if(!vector_query.values.empty()) {
// cannot pass both vector values and id
return Option<bool>(400, "Malformed vector query string: cannot pass both vector query "
"and `id` parameter.");
}
Option<uint32_t> id_op = coll->doc_id_to_seq_id(param_kv[1]);
if(!id_op.ok()) {
return Option<bool>(400, "Document id referenced in vector query is not found.");
}
nlohmann::json document;
auto doc_op = coll->get_document_from_store(id_op.get(), document);
if(!doc_op.ok()) {
return Option<bool>(400, "Document id referenced in vector query is not found.");
}
if(!document.contains(vector_query.field_name) || !document[vector_query.field_name].is_array()) {
return Option<bool>(400, "Document referenced in vector query does not contain a valid "
"vector field.");
}
for(auto& fvalue: document[vector_query.field_name]) {
if(!fvalue.is_number_float()) {
return Option<bool>(400, "Document referenced in vector query does not contain a valid "
"vector field.");
}
vector_query.values.push_back(fvalue.get<float>());
}
vector_query.query_doc_given = true;
vector_query.seq_id = id_op.get();
}
if(param_kv[0] == "k") {
if(!StringUtils::is_uint32_t(param_kv[1])) {
return Option<bool>(400, "Malformed vector query string: `k` parameter must be an integer.");
}
vector_query.k = std::stoul(param_kv[1]);
}
if(param_kv[0] == "flat_search_cutoff") {
if(!StringUtils::is_uint32_t(param_kv[1])) {
return Option<bool>(400, "Malformed vector query string: "
"`flat_search_cutoff` parameter must be an integer.");
}
vector_query.flat_search_cutoff = std::stoi(param_kv[1]);
}
}
if(!vector_query.query_doc_given && vector_query.values.empty()) {
return Option<bool>(400, "When a vector query value is empty, an `id` parameter must be present.");
}
return Option<bool>(true);
}
}
return Option<bool>(400, "Malformed vector query string.");
}

View File

@ -983,23 +983,23 @@ TEST_F(CollectionFacetingTest, FacetByNestedIntField) {
TEST_F(CollectionFacetingTest, FacetParseTest){
std::vector<field> fields = {
field("score", field_types::INT32, true),
field("grade", field_types::INT32, true),
field("rank", field_types::INT32, true),
field("score", field_types::INT32, true),
field("grade", field_types::INT32, true),
field("rank", field_types::INT32, true),
};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
std::vector<std::string> range_facet_fields {
"score(fail:[0, 40], pass:[40, 100])",
"grade(A:[80, 100], B:[60, 80], C:[40, 60])"
"score(fail:[0, 40], pass:[40, 100])",
"grade(A:[80, 100], B:[60, 80], C:[40, 60])"
};
std::vector<facet> range_facets;
for(const std::string & facet_field: range_facet_fields) {
coll1->parse_facet(facet_field, range_facets);
}
ASSERT_EQ(2, range_facets.size());
ASSERT_STREQ("score", range_facets[0].field_name.c_str());
ASSERT_TRUE(range_facets[0].is_range_query);
ASSERT_GT(range_facets[0].facet_range_map.size(), 0);
@ -1009,8 +1009,8 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
ASSERT_GT(range_facets[1].facet_range_map.size(), 0);
std::vector<std::string> normal_facet_fields {
"score",
"grade"
"score",
"grade"
};
std::vector<facet> normal_facets;
for(const std::string & facet_field: normal_facet_fields) {
@ -1022,18 +1022,18 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
ASSERT_STREQ("grade", normal_facets[1].field_name.c_str());
std::vector<std::string> mixed_facet_fields {
"score",
"grade(A:[80, 100], B:[60, 80], C:[40, 60])",
"rank"
"score",
"grade(A:[80, 100], B:[60, 80], C:[40, 60])",
"rank"
};
std::vector<facet> mixed_facets;
for(const std::string & facet_field: mixed_facet_fields) {
coll1->parse_facet(facet_field, mixed_facets);
}
ASSERT_EQ(3, mixed_facets.size());
ASSERT_STREQ("score", mixed_facets[0].field_name.c_str());
ASSERT_STREQ("grade", mixed_facets[1].field_name.c_str());
ASSERT_TRUE(mixed_facets[1].is_range_query);
ASSERT_GT(mixed_facets[1].facet_range_map.size(), 0);
@ -1041,7 +1041,6 @@ TEST_F(CollectionFacetingTest, FacetParseTest){
ASSERT_STREQ("rank", mixed_facets[2].field_name.c_str());
}
TEST_F(CollectionFacetingTest, RangeFacetTest) {
std::vector<field> fields = {field("place", field_types::STRING, false),
field("state", field_types::STRING, false),
@ -1345,11 +1344,44 @@ TEST_F(CollectionFacetingTest, SampleFacetCounts) {
// test for sample percent > 100
auto res_op = coll1->search("*", {}, "", {"color"}, {}, {0}, 3, 1, FREQUENCY, {true}, 5,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", 200, 0);
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 3, 3, 2, 2, false, "", 200, 0);
ASSERT_FALSE(res_op.ok());
ASSERT_EQ("Value of `facet_sample_percent` must be less than 100.", res_op.error());
}
TEST_F(CollectionFacetingTest, FacetOnArrayFieldWithSpecialChars) {
std::vector<field> fields = {
field("tags", field_types::STRING_ARRAY, true),
field("points", field_types::INT32, true),
};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
nlohmann::json doc;
doc["tags"] = {"gamma"};
doc["points"] = 10;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["tags"] = {"alpha", "| . |", "beta", "gamma"};
doc["points"] = 10;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
auto results = coll1->search("*", {},
"", {"tags"}, {}, {2}, 10, 1, FREQUENCY, {true}, 1).get();
ASSERT_EQ(1, results["facet_counts"].size());
ASSERT_EQ(4, results["facet_counts"][0]["counts"].size());
for(size_t i = 0; i < results["facet_counts"][0]["counts"].size(); i++) {
auto fvalue = results["facet_counts"][0]["counts"][i]["value"].get<std::string>();
if(fvalue == "gamma") {
ASSERT_EQ(2, results["facet_counts"][0]["counts"][i]["count"].get<size_t>());
} else {
ASSERT_EQ(1, results["facet_counts"][0]["counts"][i]["count"].get<size_t>());
}
}
}

View File

@ -526,7 +526,10 @@ TEST_F(CollectionManagerTest, VerifyEmbeddedParametersOfScopedAPIKey) {
embedded_params["filter_by"] = "points: 200";
std::string json_res;
auto search_op = collectionManager.do_search(req_params, embedded_params, json_res);
auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_TRUE(search_op.ok());
nlohmann::json res_obj = nlohmann::json::parse(json_res);
@ -540,7 +543,7 @@ TEST_F(CollectionManagerTest, VerifyEmbeddedParametersOfScopedAPIKey) {
req_params["filter_by"] = "year: 1922";
req_params["q"] = "*";
search_op = collectionManager.do_search(req_params, embedded_params, json_res);
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_TRUE(search_op.ok());
res_obj = nlohmann::json::parse(json_res);
@ -989,43 +992,6 @@ TEST_F(CollectionManagerTest, ParseSortByClause) {
ASSERT_FALSE(sort_by_parsed);
}
TEST_F(CollectionManagerTest, ParseVectorQueryString) {
vector_query_t vector_query;
bool parsed = CollectionManager::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10)", vector_query);
ASSERT_TRUE(parsed);
ASSERT_EQ("vec", vector_query.field_name);
ASSERT_EQ(10, vector_query.k);
std::vector<float> fvs = {0.34, 0.66, 0.12, 0.68};
ASSERT_EQ(fvs.size(), vector_query.values.size());
for(size_t i = 0; i < fvs.size(); i++) {
ASSERT_EQ(fvs[i], vector_query.values[i]);
}
vector_query._reset();
parsed = CollectionManager::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10)", vector_query);
ASSERT_TRUE(parsed);
vector_query._reset();
parsed = CollectionManager::parse_vector_query_str("vec:[0.34, 0.66, 0.12, 0.68], k: 10)", vector_query);
ASSERT_FALSE(parsed);
vector_query._reset();
parsed = CollectionManager::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10", vector_query);
ASSERT_TRUE(parsed);
vector_query._reset();
parsed = CollectionManager::parse_vector_query_str("vec:(0.34, 0.66, 0.12, 0.68, k: 10)", vector_query);
ASSERT_FALSE(parsed);
vector_query._reset();
parsed = CollectionManager::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], )", vector_query);
ASSERT_FALSE(parsed);
vector_query._reset();
parsed = CollectionManager::parse_vector_query_str("vec([0.34, 0.66, 0.12, 0.68])", vector_query);
ASSERT_FALSE(parsed);
}
TEST_F(CollectionManagerTest, Presets) {
// try getting on a blank slate
auto presets = collectionManager.get_presets();

View File

@ -97,9 +97,9 @@ TEST_F(CollectionSynonymsTest, SynonymParsingFromJson) {
ASSERT_STREQ("#", synonym_plus.synonyms[1][0].c_str());
nlohmann::json view_json = synonym_plus.to_view_json();
ASSERT_EQ(2, view_json["symbols"].size());
ASSERT_EQ("+", view_json["symbols"][0].get<std::string>());
ASSERT_EQ("#", view_json["symbols"][1].get<std::string>());
ASSERT_EQ(2, view_json["symbols_to_index"].size());
ASSERT_EQ("+", view_json["symbols_to_index"][0].get<std::string>());
ASSERT_EQ("#", view_json["symbols_to_index"][1].get<std::string>());
// when `id` is not given
nlohmann::json syn_json_without_id = {

View File

@ -144,6 +144,33 @@ TEST_F(CollectionVectorTest, BasicVectorQuerying) {
ASSERT_FALSE(res_op.ok());
ASSERT_EQ("Field `zec` does not have a vector query index.", res_op.error());
// pass `id` of existing doc instead of vector, query doc should be omitted from results
results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
"", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 32767, 32767, 2,
false, true, "vec:([], id: 1)").get();
ASSERT_EQ(2, results["found"].get<size_t>());
ASSERT_EQ(2, results["hits"].size());
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
ASSERT_STREQ("2", results["hits"][1]["document"]["id"].get<std::string>().c_str());
// when `id` does not exist, return appropriate error
res_op = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
"", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
4, {off}, 32767, 32767, 2,
false, true, "vec:([], id: 100)");
ASSERT_FALSE(res_op.ok());
ASSERT_EQ("Document id referenced in vector query is not found.", res_op.error());
// only supported with wildcard queries
res_op = coll1->search("title", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),

View File

@ -199,6 +199,29 @@ TEST_F(CoreAPIUtilsTest, MultiSearchEmbeddedKeys) {
// ensure that req params are appended to (embedded params are also rolled into req params)
ASSERT_EQ("((user_id: 100) && (age: > 100)) && (foo: bar)", req->params["filter_by"]);
// when empty filter_by is present in req params, don't add ()
req->params["filter_by"] = "";
post_multi_search(req, res);
ASSERT_EQ("((age: > 100)) && (foo: bar)", req->params["filter_by"]);
// when empty filter_by in collection search params, don't add ()
req->params["filter_by"] = "user_id: 100";
search["filter_by"] = "";
body["searches"].clear();
body["searches"].push_back(search);
req->body = body.dump();
post_multi_search(req, res);
ASSERT_EQ("((user_id: 100)) && (foo: bar)", req->params["filter_by"]);
// when both are empty, don't add ()
req->params["filter_by"] = "";
search["filter_by"] = "";
body["searches"].clear();
body["searches"].push_back(search);
req->body = body.dump();
post_multi_search(req, res);
ASSERT_EQ("(foo: bar)", req->params["filter_by"]);
// try setting max search limit
req->embedded_params_vec[0]["limit_multi_searches"] = 0;
ASSERT_FALSE(post_multi_search(req, res));

View File

@ -0,0 +1,73 @@
#include <gtest/gtest.h>
#include "vector_query_ops.h"
class VectorQueryOpsTest : public ::testing::Test {
protected:
void setupCollection() {
}
virtual void SetUp() {
setupCollection();
}
virtual void TearDown() {
}
};
TEST_F(VectorQueryOpsTest, ParseVectorQueryString) {
vector_query_t vector_query;
auto parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10)", vector_query, nullptr);
ASSERT_TRUE(parsed.ok());
ASSERT_EQ("vec", vector_query.field_name);
ASSERT_EQ(10, vector_query.k);
std::vector<float> fvs = {0.34, 0.66, 0.12, 0.68};
ASSERT_EQ(fvs.size(), vector_query.values.size());
for (size_t i = 0; i < fvs.size(); i++) {
ASSERT_EQ(fvs[i], vector_query.values[i]);
}
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10)", vector_query, nullptr);
ASSERT_TRUE(parsed.ok());
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec:([])", vector_query, nullptr);
ASSERT_FALSE(parsed.ok());
ASSERT_EQ("When a vector query value is empty, an `id` parameter must be present.", parsed.error());
// cannot pass both vector and id
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], id: 10)", vector_query, nullptr);
ASSERT_FALSE(parsed.ok());
ASSERT_EQ("Malformed vector query string: cannot pass both vector query and `id` parameter.", parsed.error());
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec:([], k: 10)", vector_query, nullptr);
ASSERT_FALSE(parsed.ok());
ASSERT_EQ("When a vector query value is empty, an `id` parameter must be present.", parsed.error());
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec:[0.34, 0.66, 0.12, 0.68], k: 10)", vector_query, nullptr);
ASSERT_FALSE(parsed.ok());
ASSERT_EQ("Malformed vector query string.", parsed.error());
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], k: 10", vector_query, nullptr);
ASSERT_TRUE(parsed.ok());
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec:(0.34, 0.66, 0.12, 0.68, k: 10)", vector_query, nullptr);
ASSERT_FALSE(parsed.ok());
ASSERT_EQ("Malformed vector query string.", parsed.error());
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec:([0.34, 0.66, 0.12, 0.68], )", vector_query, nullptr);
ASSERT_FALSE(parsed.ok());
ASSERT_EQ("Malformed vector query string.", parsed.error());
vector_query._reset();
parsed = VectorQueryOps::parse_vector_query_str("vec([0.34, 0.66, 0.12, 0.68])", vector_query, nullptr);
ASSERT_FALSE(parsed.ok());
ASSERT_EQ("Malformed vector query string.", parsed.error());
}