Merge branch 'v0.25-join' of https://github.com/ozanarmagan/typesense into v0.25-join

This commit is contained in:
ozanarmagan 2023-07-08 13:28:49 +03:00
commit 222c50e2c1
24 changed files with 563 additions and 301 deletions

View File

@ -24,10 +24,11 @@ private:
void to_json(nlohmann::json& obj) const {
obj["name"] = name;
obj["type"] = POPULAR_QUERIES_TYPE;
obj["params"] = nlohmann::json::object();
obj["params"]["suggestion_collection"] = suggestion_collection;
obj["params"]["query_collections"] = query_collections;
obj["params"]["limit"] = limit;
obj["params"]["source"]["collections"] = query_collections;
obj["params"]["destination"]["collection"] = suggestion_collection;
}
};
@ -48,7 +49,9 @@ private:
Option<bool> remove_popular_queries_index(const std::string& name);
Option<bool> create_popular_queries_index(nlohmann::json &payload, bool write_to_disk);
Option<bool> create_popular_queries_index(nlohmann::json &payload,
bool upsert,
bool write_to_disk);
public:
@ -69,12 +72,14 @@ public:
Option<nlohmann::json> list_rules();
Option<bool> create_rule(nlohmann::json& payload, bool write_to_disk = true);
Option<nlohmann::json> get_rule(const std::string& name);
Option<bool> create_rule(nlohmann::json& payload, bool upsert, bool write_to_disk);
Option<bool> remove_rule(const std::string& name);
void add_suggestion(const std::string& query_collection,
std::string& query, const bool live_query, const std::string& user_id);
std::string& query, bool live_query, const std::string& user_id);
void stop();

View File

@ -463,8 +463,10 @@ public:
const text_match_type_t match_type = max_score,
const size_t facet_sample_percent = 100,
const size_t facet_sample_threshold = 0,
const size_t page_offset = UINT32_MAX,
const size_t vector_query_hits = 250) const;
const size_t page_offset = 0,
const size_t vector_query_hits = 250,
const size_t remote_embedding_timeout_ms = 30000,
const size_t remote_embedding_num_try = 2) const;
Option<bool> get_filter_ids(const std::string & filter_query, filter_result_t& filter_result) const;

View File

@ -147,8 +147,12 @@ bool post_create_event(const std::shared_ptr<http_req>& req, const std::shared_p
bool get_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);
bool get_analytics_rule(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);
bool post_create_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);
bool put_upsert_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);
bool del_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res);
// Misc helpers

View File

@ -205,6 +205,49 @@ public:
virtual ~req_state_t() = default;
};
struct stream_response_state_t {
private:
h2o_req_t* req = nullptr;
public:
bool is_req_early_exit = false;
bool is_res_start = true;
h2o_send_state_t send_state = H2O_SEND_STATE_IN_PROGRESS;
std::string res_body;
h2o_iovec_t res_buff;
std::string res_content_type;
int status = 0;
const char* reason = nullptr;
h2o_generator_t* generator = nullptr;
void set_response(uint32_t status_code, const std::string& content_type, std::string& body) {
std::string().swap(res_body);
res_body = std::move(body);
res_buff = h2o_iovec_t{.base = res_body.data(), .len = res_body.size()};
if(is_res_start) {
res_content_type = std::move(content_type);
status = (int)status_code;
reason = http_res::get_status_reason(status_code);
is_res_start = false;
}
}
void set_req(h2o_req_t* _req) {
req = _req;
}
h2o_req_t* get_req() {
return req;
}
};
struct http_req {
static constexpr const char* AUTH_HEADER = "x-typesense-api-key";
static constexpr const char* USER_HEADER = "x-typesense-user-id";
@ -248,6 +291,9 @@ struct http_req {
std::atomic<bool> is_diposed;
std::string client_ip = "0.0.0.0";
// stores http lib related datastructures to avoid race conditions between indexing and http write threads
stream_response_state_t res_state;
http_req(): _req(nullptr), route_hash(1),
first_chunk_aggregate(true), last_chunk_aggregate(false),
chunk_len(0), body_index(0), data(nullptr), ready(false), log_index(0),

View File

@ -32,11 +32,13 @@ class HttpProxy {
void operator=(const HttpProxy&) = delete;
HttpProxy(HttpProxy&&) = delete;
void operator=(HttpProxy&&) = delete;
http_proxy_res_t send(const std::string& url, const std::string& method, const std::string& body, const std::unordered_map<std::string, std::string>& headers);
http_proxy_res_t send(const std::string& url, const std::string& method, const std::string& body, std::unordered_map<std::string, std::string>& headers);
private:
HttpProxy();
~HttpProxy() = default;
http_proxy_res_t call(const std::string& url, const std::string& method, const std::string& body = "", const std::unordered_map<std::string, std::string>& headers = {});
http_proxy_res_t call(const std::string& url, const std::string& method,
const std::string& body = "", const std::unordered_map<std::string, std::string>& headers = {},
const size_t timeout_ms = 30000);
// lru cache for http requests

View File

@ -40,50 +40,6 @@ struct h2o_custom_generator_t {
}
};
struct stream_response_state_t {
private:
h2o_req_t* req = nullptr;
public:
bool is_req_early_exit = false;
bool is_res_start = true;
h2o_send_state_t send_state = H2O_SEND_STATE_IN_PROGRESS;
std::string res_body;
h2o_iovec_t res_buff;
h2o_iovec_t res_content_type{};
int status = 0;
const char* reason = nullptr;
h2o_generator_t* generator = nullptr;
explicit stream_response_state_t(h2o_req_t* _req): req(_req) {
if(req != nullptr) {
is_res_start = (req->res.status == 0);
}
}
void set_response(uint32_t status_code, const std::string& content_type, std::string& body) {
std::string().swap(res_body);
res_body = std::move(body);
res_buff = h2o_iovec_t{.base = res_body.data(), .len = res_body.size()};
if(is_res_start) {
res_content_type = h2o_strdup(&req->pool, content_type.c_str(), SIZE_MAX);
status = status_code;
reason = http_res::get_status_reason(status_code);
}
}
h2o_req_t* get_req() {
return req;
}
};
struct deferred_req_res_t {
const std::shared_ptr<http_req> req;
const std::shared_ptr<http_res> res;
@ -110,13 +66,9 @@ public:
// used to manage lifecycle of async actions
const bool destroy_after_use;
// stores http lib related datastructures to avoid race conditions between indexing and http write threads
stream_response_state_t res_state;
async_req_res_t(const std::shared_ptr<http_req>& h_req, const std::shared_ptr<http_res>& h_res,
const bool destroy_after_use) :
req(h_req), res(h_res), destroy_after_use(destroy_after_use),
res_state((std::shared_lock(res->mres), h_req->is_diposed ? nullptr : h_req->_req)) {
req(h_req), res(h_res), destroy_after_use(destroy_after_use) {
std::shared_lock lk(res->mres);
@ -124,12 +76,10 @@ public:
return;
}
// ***IMPORTANT***
// We limit writing to fields of `res_state.req` to prevent race conditions with http thread
// Check `HttpServer::stream_response()` for overlapping writes.
h2o_custom_generator_t* res_generator = static_cast<h2o_custom_generator_t*>(res->generator.load());
auto& res_state = req->res_state;
res_state.set_req(h_req->is_diposed ? nullptr : h_req->_req);
res_state.is_req_early_exit = (res_generator->rpath->async_req && res->final && !req->last_chunk_aggregate);
res_state.send_state = res->final ? H2O_SEND_STATE_FINAL : H2O_SEND_STATE_IN_PROGRESS;
res_state.generator = (res_generator == nullptr) ? nullptr : &res_generator->h2o_generator;
@ -147,6 +97,10 @@ public:
void res_notify() {
return res->notify();
}
stream_response_state_t& get_res_state() {
return req->res_state;
}
};
struct defer_processing_t {

View File

@ -150,7 +150,7 @@ struct search_args {
filter_node_t* filter_tree_root, std::vector<facet>& facets,
std::vector<std::pair<uint32_t, uint32_t>>& included_ids, std::vector<uint32_t> excluded_ids,
std::vector<sort_by>& sort_fields_std, facet_query_t facet_query, const std::vector<uint32_t>& num_typos,
size_t max_facet_values, size_t max_hits, size_t per_page, size_t page, token_ordering token_order,
size_t max_facet_values, size_t max_hits, size_t per_page, size_t offset, token_ordering token_order,
const std::vector<bool>& prefixes, size_t drop_tokens_threshold, size_t typo_tokens_threshold,
const std::vector<std::string>& group_by_fields, size_t group_limit,
const string& default_sorting_field, bool prioritize_exact_match,

View File

@ -16,8 +16,8 @@ class TextEmbedder {
// Constructor for remote models
TextEmbedder(const nlohmann::json& model_config);
~TextEmbedder();
embedding_res_t Embed(const std::string& text);
std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 0);
embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2);
std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200);
const std::string& get_vocab_file_name() const;
bool is_remote() {
return remote_embedder_ != nullptr;

View File

@ -28,7 +28,8 @@ class RemoteEmbedder {
static long call_remote_api(const std::string& method, const std::string& url, const std::string& body, std::string& res_body, std::map<std::string, std::string>& headers, const std::unordered_map<std::string, std::string>& req_headers);
static inline ReplicationState* raft_server = nullptr;
public:
virtual embedding_res_t Embed(const std::string& text) = 0;
virtual nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) = 0;
virtual embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2) = 0;
virtual std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) = 0;
static void init(ReplicationState* rs) {
raft_server = rs;
@ -47,8 +48,9 @@ class OpenAIEmbedder : public RemoteEmbedder {
public:
OpenAIEmbedder(const std::string& openai_model_path, const std::string& api_key);
static Option<bool> is_model_valid(const nlohmann::json& model_config, unsigned int& num_dims);
embedding_res_t Embed(const std::string& text) override;
embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2) override;
std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
};
@ -59,11 +61,13 @@ class GoogleEmbedder : public RemoteEmbedder {
inline static constexpr short GOOGLE_EMBEDDING_DIM = 768;
inline static constexpr char* GOOGLE_CREATE_EMBEDDING = "https://generativelanguage.googleapis.com/v1beta2/models/embedding-gecko-001:embedText?key=";
std::string google_api_key;
public:
GoogleEmbedder(const std::string& google_api_key);
static Option<bool> is_model_valid(const nlohmann::json& model_config, unsigned int& num_dims);
embedding_res_t Embed(const std::string& text) override;
embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2) override;
std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
};
@ -88,8 +92,9 @@ class GCPEmbedder : public RemoteEmbedder {
GCPEmbedder(const std::string& project_id, const std::string& model_name, const std::string& access_token,
const std::string& refresh_token, const std::string& client_id, const std::string& client_secret);
static Option<bool> is_model_valid(const nlohmann::json& model_config, unsigned int& num_dims);
embedding_res_t Embed(const std::string& text) override;
embedding_res_t Embed(const std::string& text, const size_t remote_embedder_timeout_ms = 30000, const size_t remote_embedding_num_try = 2) override;
std::vector<embedding_res_t> batch_embed(const std::vector<std::string>& inputs, const size_t remote_embedding_batch_size = 200) override;
nlohmann::json get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) override;
};

View File

@ -5,7 +5,7 @@
#include "http_client.h"
#include "collection_manager.h"
Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool write_to_disk) {
Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool upsert, bool write_to_disk) {
/*
Sample payload:
@ -37,16 +37,23 @@ Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool write_t
}
if(payload["type"] == POPULAR_QUERIES_TYPE) {
return create_popular_queries_index(payload, write_to_disk);
return create_popular_queries_index(payload, upsert, write_to_disk);
}
return Option<bool>(400, "Invalid type.");
}
Option<bool> AnalyticsManager::create_popular_queries_index(nlohmann::json &payload, bool write_to_disk) {
Option<bool> AnalyticsManager::create_popular_queries_index(nlohmann::json &payload, bool upsert, bool write_to_disk) {
// params and name are validated upstream
const auto& params = payload["params"];
const std::string& suggestion_config_name = payload["name"].get<std::string>();
bool already_exists = suggestion_configs.find(suggestion_config_name) != suggestion_configs.end();
if(!upsert && already_exists) {
return Option<bool>(400, "There's already another configuration with the name `" +
suggestion_config_name + "`.");
}
const auto& params = payload["params"];
if(!params.contains("source") || !params["source"].is_object()) {
return Option<bool>(400, "Bad or missing source.");
@ -56,18 +63,12 @@ Option<bool> AnalyticsManager::create_popular_queries_index(nlohmann::json &payl
return Option<bool>(400, "Bad or missing destination.");
}
size_t limit = 1000;
if(params.contains("limit") && params["limit"].is_number_integer()) {
limit = params["limit"].get<size_t>();
}
if(suggestion_configs.find(suggestion_config_name) != suggestion_configs.end()) {
return Option<bool>(400, "There's already another configuration with the name `" +
suggestion_config_name + "`.");
}
if(!params["source"].contains("collections") || !params["source"]["collections"].is_array()) {
return Option<bool>(400, "Must contain a valid list of source collections.");
}
@ -93,6 +94,14 @@ Option<bool> AnalyticsManager::create_popular_queries_index(nlohmann::json &payl
std::unique_lock lock(mutex);
if(already_exists) {
// remove the previous configuration with same name (upsert)
Option<bool> remove_op = remove_popular_queries_index(suggestion_config_name);
if(!remove_op.ok()) {
return Option<bool>(500, "Error erasing the existing configuration.");;
}
}
suggestion_configs.emplace(suggestion_config_name, suggestion_config);
for(const auto& query_coll: suggestion_config.query_collections) {
@ -130,13 +139,25 @@ Option<nlohmann::json> AnalyticsManager::list_rules() {
for(const auto& suggestion_config: suggestion_configs) {
nlohmann::json rule;
suggestion_config.second.to_json(rule);
rule["type"] = POPULAR_QUERIES_TYPE;
rules["rules"].push_back(rule);
}
return Option<nlohmann::json>(rules);
}
Option<nlohmann::json> AnalyticsManager::get_rule(const string& name) {
nlohmann::json rule;
std::unique_lock lock(mutex);
auto suggestion_config_it = suggestion_configs.find(name);
if(suggestion_config_it == suggestion_configs.end()) {
return Option<nlohmann::json>(404, "Rule not found.");
}
suggestion_config_it->second.to_json(rule);
return Option<nlohmann::json>(rule);
}
Option<bool> AnalyticsManager::remove_rule(const string &name) {
std::unique_lock lock(mutex);

View File

@ -1108,7 +1108,9 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
const size_t facet_sample_percent,
const size_t facet_sample_threshold,
const size_t page_offset,
const size_t vector_query_hits) const {
const size_t vector_query_hits,
const size_t remote_embedding_timeout_ms,
const size_t remote_embedding_num_try) const {
std::shared_lock lock(mutex);
@ -1234,10 +1236,15 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
std::string error = "Prefix search is not supported for remote embedders. Please set `prefix=false` as an additional search parameter to disable prefix searching.";
return Option<nlohmann::json>(400, error);
}
if(remote_embedding_num_try == 0) {
std::string error = "`remote-embedding-num-try` must be greater than 0.";
return Option<nlohmann::json>(400, error);
}
}
std::string embed_query = embedder_manager.get_query_prefix(search_field.embed[fields::model_config]) + raw_query;
auto embedding_op = embedder->Embed(embed_query);
auto embedding_op = embedder->Embed(embed_query, remote_embedding_timeout_ms, remote_embedding_num_try);
if(!embedding_op.success) {
if(!embedding_op.error["error"].get<std::string>().empty()) {
return Option<nlohmann::json>(400, embedding_op.error["error"].get<std::string>());
@ -1393,7 +1400,17 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
return Option<nlohmann::json>(422, message);
}
size_t offset = (page != 0) ? (per_page * (page - 1)) : page_offset;
size_t offset = 0;
if(page == 0 && page_offset != 0) {
// if only offset is set, use that
offset = page_offset;
} else {
// if both are set or none set, use page value (default is 1)
size_t actual_page = (page == 0) ? 1 : page;
offset = (per_page * (actual_page - 1));
}
size_t fetch_size = offset + per_page;
if(fetch_size > limit_hits) {

View File

@ -285,6 +285,17 @@ Option<bool> CollectionManager::load(const size_t collection_batch_size, const s
iter->Next();
}
// restore query suggestions configs
std::vector<std::string> analytics_config_jsons;
store->scan_fill(AnalyticsManager::ANALYTICS_RULE_PREFIX,
std::string(AnalyticsManager::ANALYTICS_RULE_PREFIX) + "`",
analytics_config_jsons);
for(const auto& analytics_config_json: analytics_config_jsons) {
nlohmann::json analytics_config = nlohmann::json::parse(analytics_config_json);
AnalyticsManager::get_instance().create_rule(analytics_config, false, false);
}
delete iter;
LOG(INFO) << "Loaded " << num_collections << " collection(s).";
@ -671,6 +682,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
const char *VECTOR_QUERY = "vector_query";
const char *VECTOR_QUERY_HITS = "vector_query_hits";
const char* REMOTE_EMBEDDING_TIMEOUT_MS = "remote_embedding_timeout_ms";
const char* REMOTE_EMBEDDING_NUM_TRY = "remote_embedding_num_try";
const char *GROUP_BY = "group_by";
const char *GROUP_LIMIT = "group_limit";
@ -783,7 +797,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
std::vector<sort_by> sort_fields;
size_t per_page = 10;
size_t page = 0;
size_t offset = UINT32_MAX;
size_t offset = 0;
token_ordering token_order = NOT_SET;
std::string vector_query;
@ -824,6 +838,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
text_match_type_t match_type = max_score;
size_t vector_query_hits = 250;
size_t remote_embedding_timeout_ms = 30000;
size_t remote_embedding_num_try = 2;
size_t facet_sample_percent = 100;
size_t facet_sample_threshold = 0;
@ -850,6 +867,8 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
{FACET_SAMPLE_PERCENT, &facet_sample_percent},
{FACET_SAMPLE_THRESHOLD, &facet_sample_threshold},
{VECTOR_QUERY_HITS, &vector_query_hits},
{REMOTE_EMBEDDING_TIMEOUT_MS, &remote_embedding_timeout_ms},
{REMOTE_EMBEDDING_NUM_TRY, &remote_embedding_num_try},
};
std::unordered_map<std::string, std::string*> str_values = {
@ -978,14 +997,6 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
per_page = 0;
}
if(!req_params[PAGE].empty() && page == 0 && offset == UINT32_MAX) {
return Option<bool>(422, "Parameter `page` must be an integer of value greater than 0.");
}
if(req_params[PAGE].empty() && req_params[OFFSET].empty()) {
page = 1;
}
include_fields.insert(include_fields_vec.begin(), include_fields_vec.end());
exclude_fields.insert(exclude_fields_vec.begin(), exclude_fields_vec.end());
@ -1070,7 +1081,10 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
match_type,
facet_sample_percent,
facet_sample_threshold,
offset
offset,
vector_query_hits,
remote_embedding_timeout_ms,
remote_embedding_num_try
);
uint64_t timeMillis = std::chrono::duration_cast<std::chrono::milliseconds>(
@ -1097,10 +1111,10 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
result["search_time_ms"] = timeMillis;
}
if(page != 0) {
result["page"] = page;
} else {
if(page == 0 && offset != 0) {
result["offset"] = offset;
} else {
result["page"] = (page == 0) ? 1 : page;
}
results_json_str = result.dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore);
@ -1320,17 +1334,6 @@ Option<bool> CollectionManager::load_collection(const nlohmann::json &collection
collection->add_synonym(collection_synonym, false);
}
// restore query suggestions configs
std::vector<std::string> analytics_config_jsons;
cm.store->scan_fill(AnalyticsManager::ANALYTICS_RULE_PREFIX,
std::string(AnalyticsManager::ANALYTICS_RULE_PREFIX) + "`",
analytics_config_jsons);
for(const auto& analytics_config_json: analytics_config_jsons) {
nlohmann::json analytics_config = nlohmann::json::parse(analytics_config_json);
AnalyticsManager::get_instance().create_rule(analytics_config, false);
}
// Fetch records from the store and re-create memory index
const std::string seq_id_prefix = collection->get_seq_id_collection_prefix();
std::string upper_bound_key = collection->get_seq_id_collection_prefix() + "`"; // cannot inline this

View File

@ -2091,13 +2091,25 @@ bool post_create_event(const std::shared_ptr<http_req>& req, const std::shared_p
bool get_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
auto rules_op = AnalyticsManager::get_instance().list_rules();
if(rules_op.ok()) {
res->set_200(rules_op.get().dump());
return true;
if(!rules_op.ok()) {
res->set(rules_op.code(), rules_op.error());
return false;
}
res->set(rules_op.code(), rules_op.error());
return false;
res->set_200(rules_op.get().dump());
return true;
}
bool get_analytics_rule(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
auto rules_op = AnalyticsManager::get_instance().get_rule(req->params["name"]);
if(!rules_op.ok()) {
res->set(rules_op.code(), rules_op.error());
return false;
}
res->set_200(rules_op.get().dump());
return true;
}
bool post_create_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
@ -2111,7 +2123,7 @@ bool post_create_analytics_rules(const std::shared_ptr<http_req>& req, const std
return false;
}
auto op = AnalyticsManager::get_instance().create_rule(req_json);
auto op = AnalyticsManager::get_instance().create_rule(req_json, false, true);
if(!op.ok()) {
res->set(op.code(), op.error());
@ -2122,6 +2134,29 @@ bool post_create_analytics_rules(const std::shared_ptr<http_req>& req, const std
return true;
}
bool put_upsert_analytics_rules(const std::shared_ptr<http_req> &req, const std::shared_ptr<http_res> &res) {
nlohmann::json req_json;
try {
req_json = nlohmann::json::parse(req->body);
} catch(const std::exception& e) {
LOG(ERROR) << "JSON error: " << e.what();
res->set_400("Bad JSON.");
return false;
}
req_json["name"] = req->params["name"];
auto op = AnalyticsManager::get_instance().create_rule(req_json, true, true);
if(!op.ok()) {
res->set(op.code(), op.error());
return false;
}
res->set_200(req_json.dump());
return true;
}
bool del_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
auto op = AnalyticsManager::get_instance().remove_rule(req->params["name"]);
if(!op.ok()) {
@ -2129,11 +2164,10 @@ bool del_analytics_rules(const std::shared_ptr<http_req>& req, const std::shared
return false;
}
res->set_200(R"({"ok": true)");
res->set_200(R"({"ok": true})");
return true;
}
bool post_proxy(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
HttpProxy& proxy = HttpProxy::get_instance();
@ -2193,4 +2227,4 @@ bool post_proxy(const std::shared_ptr<http_req>& req, const std::shared_ptr<http
res->set_200(response.body);
return true;
}
}

View File

@ -8,17 +8,19 @@ HttpProxy::HttpProxy() : cache(30s){
}
http_proxy_res_t HttpProxy::call(const std::string& url, const std::string& method, const std::string& body, const std::unordered_map<std::string, std::string>& headers) {
http_proxy_res_t HttpProxy::call(const std::string& url, const std::string& method,
const std::string& body, const std::unordered_map<std::string, std::string>& headers,
const size_t timeout_ms) {
HttpClient& client = HttpClient::get_instance();
http_proxy_res_t res;
if(method == "GET") {
res.status_code = client.get_response(url, res.body, res.headers, headers, 20 * 1000);
res.status_code = client.get_response(url, res.body, res.headers, headers, timeout_ms);
} else if(method == "POST") {
res.status_code = client.post_response(url, body, res.body, res.headers, headers, 20 * 1000);
res.status_code = client.post_response(url, body, res.body, res.headers, headers, timeout_ms);
} else if(method == "PUT") {
res.status_code = client.put_response(url, body, res.body, res.headers, 20 * 1000);
res.status_code = client.put_response(url, body, res.body, res.headers, timeout_ms);
} else if(method == "DELETE") {
res.status_code = client.delete_response(url, res.body, res.headers, 20 * 1000);
res.status_code = client.delete_response(url, res.body, res.headers, timeout_ms);
} else {
res.status_code = 400;
nlohmann::json j;
@ -29,11 +31,25 @@ http_proxy_res_t HttpProxy::call(const std::string& url, const std::string& meth
}
http_proxy_res_t HttpProxy::send(const std::string& url, const std::string& method, const std::string& body, const std::unordered_map<std::string, std::string>& headers) {
http_proxy_res_t HttpProxy::send(const std::string& url, const std::string& method, const std::string& body, std::unordered_map<std::string, std::string>& headers) {
// check if url is in cache
uint64_t key = StringUtils::hash_wy(url.c_str(), url.size());
key = StringUtils::hash_combine(key, StringUtils::hash_wy(method.c_str(), method.size()));
key = StringUtils::hash_combine(key, StringUtils::hash_wy(body.c_str(), body.size()));
size_t timeout_ms = 30000;
size_t num_try = 2;
if(headers.find("timeout_ms") != headers.end()){
timeout_ms = std::stoul(headers.at("timeout_ms"));
headers.erase("timeout_ms");
}
if(headers.find("num_try") != headers.end()){
num_try = std::stoul(headers.at("num_try"));
headers.erase("num_try");
}
for(auto& header : headers){
key = StringUtils::hash_combine(key, StringUtils::hash_wy(header.first.c_str(), header.first.size()));
key = StringUtils::hash_combine(key, StringUtils::hash_wy(header.second.c_str(), header.second.size()));
@ -42,11 +58,13 @@ http_proxy_res_t HttpProxy::send(const std::string& url, const std::string& meth
return cache[key];
}
auto res = call(url, method, body, headers);
http_proxy_res_t res;
for(size_t i = 0; i < num_try; i++){
res = call(url, method, body, headers, timeout_ms);
if(res.status_code == 408){
// retry
res = call(url, method, body, headers);
if(res.status_code != 408 && res.status_code < 500){
break;
}
}
if(res.status_code == 408){
@ -54,10 +72,9 @@ http_proxy_res_t HttpProxy::send(const std::string& url, const std::string& meth
j["message"] = "Server error on remote server. Please try again later.";
res.body = j.dump();
}
// add to cache
if(res.status_code != 408){
if(res.status_code == 200){
cache.insert(key, res);
}

View File

@ -808,9 +808,11 @@ void HttpServer::stream_response(stream_response_state_t& state) {
h2o_req_t* req = state.get_req();
if(state.is_res_start) {
bool start_of_res = (req->res.status == 0);
if(start_of_res) {
h2o_add_header(&req->pool, &req->res.headers, H2O_TOKEN_CONTENT_TYPE, NULL,
state.res_content_type.base, state.res_content_type.len);
state.res_content_type.data(), state.res_content_type.size());
req->res.status = state.status;
req->res.reason = state.reason;
}
@ -829,7 +831,7 @@ void HttpServer::stream_response(stream_response_state_t& state) {
return ;
}
if (state.is_res_start) {
if (start_of_res) {
/*LOG(INFO) << "h2o_start_response, content_type=" << state.res_content_type
<< ",response.status_code=" << state.res_status_code;*/
h2o_start_response(req, state.generator);
@ -969,7 +971,7 @@ bool HttpServer::on_stream_response_message(void *data) {
// NOTE: access to `req` and `res` objects must be synchronized and wrapped by `req_res`
if(req_res->is_alive()) {
stream_response(req_res->res_state);
stream_response(req_res->get_res_state());
} else {
// serialized request or generator has been disposed (underlying request is probably dead)
req_res->req_notify();

View File

@ -6268,7 +6268,9 @@ void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<cha
if(it.value().is_null()) {
// null values should not be indexed
new_doc.erase(it.key());
del_doc[it.key()] = old_doc[it.key()];
if(old_doc.contains(it.key())) {
del_doc[it.key()] = old_doc[it.key()];
}
it = update_doc.erase(it);
continue;
}

View File

@ -70,7 +70,9 @@ void master_server_routes() {
// analytics
server->get("/analytics/rules", get_analytics_rules);
server->get("/analytics/rules/:name", get_analytics_rule);
server->post("/analytics/rules", post_create_analytics_rules);
server->put("/analytics/rules/:name", put_upsert_analytics_rules);
server->del("/analytics/rules/:name", del_analytics_rules);
server->post("/analytics/events", post_create_event);

View File

@ -83,9 +83,9 @@ std::vector<float> TextEmbedder::mean_pooling(const std::vector<std::vector<floa
return pooled_output;
}
embedding_res_t TextEmbedder::Embed(const std::string& text) {
embedding_res_t TextEmbedder::Embed(const std::string& text, const size_t remote_embedder_timeout_ms, const size_t remote_embedding_num_try) {
if(is_remote()) {
return remote_embedder_->Embed(text);
return remote_embedder_->Embed(text, remote_embedder_timeout_ms, remote_embedding_num_try);
} else {
// Cannot run same model in parallel, so lock the mutex
std::lock_guard<std::mutex> lock(mutex_);

View File

@ -132,53 +132,27 @@ Option<bool> OpenAIEmbedder::is_model_valid(const nlohmann::json& model_config,
return Option<bool>(true);
}
embedding_res_t OpenAIEmbedder::Embed(const std::string& text) {
embedding_res_t OpenAIEmbedder::Embed(const std::string& text, const size_t remote_embedder_timeout_ms, const size_t remote_embedding_num_try) {
std::unordered_map<std::string, std::string> headers;
std::map<std::string, std::string> res_headers;
headers["Authorization"] = "Bearer " + api_key;
headers["Content-Type"] = "application/json";
headers["timeout_ms"] = std::to_string(remote_embedder_timeout_ms);
headers["num_try"] = std::to_string(remote_embedding_num_try);
std::string res;
nlohmann::json req_body;
req_body["input"] = text;
req_body["input"] = std::vector<std::string>{text};
// remove "openai/" prefix
req_body["model"] = TextEmbedderManager::get_model_name_without_namespace(openai_model_path);
auto res_code = call_remote_api("POST", OPENAI_CREATE_EMBEDDING, req_body.dump(), res, res_headers, headers);
if (res_code != 200) {
nlohmann::json json_res;
try {
json_res = nlohmann::json::parse(res);
} catch (const std::exception& e) {
json_res = nlohmann::json::object();
json_res["error"] = "Malformed response from OpenAI API.";
}
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["response"] = json_res;
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
embedding_res["error"] = "OpenAI API error: " + json_res["error"]["message"].get<std::string>();
}
if(res_code == 408) {
embedding_res["error"] = "OpenAI API timeout.";
}
return embedding_res_t(res_code, embedding_res);
return embedding_res_t(res_code, get_error_json(req_body, res_code, res));
}
try {
embedding_res_t embedding_res = embedding_res_t(nlohmann::json::parse(res)["data"][0]["embedding"].get<std::vector<float>>());
return embedding_res;
} catch (const std::exception& e) {
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
embedding_res["error"] = "Malformed response from OpenAI API.";
return embedding_res_t(500, embedding_res);
return embedding_res_t(500, get_error_json(req_body, res_code, res));
}
}
@ -206,46 +180,19 @@ std::vector<embedding_res_t> OpenAIEmbedder::batch_embed(const std::vector<std::
if(res_code != 200) {
std::vector<embedding_res_t> outputs;
nlohmann::json json_res;
try {
json_res = nlohmann::json::parse(res);
} catch (const std::exception& e) {
json_res = nlohmann::json::object();
json_res["error"] = "Malformed response from OpenAI API.";
}
LOG(INFO) << "OpenAI API error: " << json_res.dump();
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["response"] = json_res;
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
embedding_res["request"]["body"]["input"] = std::vector<std::string>{inputs[0]};
if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
embedding_res["error"] = "OpenAI API error: " + json_res["error"]["message"].get<std::string>();
}
if(res_code == 408) {
embedding_res["error"] = "OpenAI API timeout.";
}
nlohmann::json embedding_res = get_error_json(req_body, res_code, res);
for(size_t i = 0; i < inputs.size(); i++) {
embedding_res["request"]["body"]["input"][0] = inputs[i];
outputs.push_back(embedding_res_t(res_code, embedding_res));
}
return outputs;
}
nlohmann::json res_json;
try {
res_json = nlohmann::json::parse(res);
} catch (const std::exception& e) {
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
embedding_res["request"]["body"]["input"] = std::vector<std::string>{inputs[0]};
embedding_res["error"] = "Malformed response from OpenAI API";
nlohmann::json embedding_res = get_error_json(req_body, res_code, res);
std::vector<embedding_res_t> outputs;
for(size_t i = 0; i < inputs.size(); i++) {
embedding_res["request"]["body"]["input"][0] = inputs[i];
@ -262,6 +209,36 @@ std::vector<embedding_res_t> OpenAIEmbedder::batch_embed(const std::vector<std::
}
nlohmann::json OpenAIEmbedder::get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) {
nlohmann::json json_res;
try {
json_res = nlohmann::json::parse(res_body);
} catch (const std::exception& e) {
json_res = nlohmann::json::object();
json_res["error"] = "Malformed response from OpenAI API.";
}
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["response"] = json_res;
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = OPENAI_CREATE_EMBEDDING;
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
if(embedding_res["request"]["body"].count("input") > 0 && embedding_res["request"]["body"]["input"].get<std::vector<std::string>>().size() > 1) {
auto vec = embedding_res["request"]["body"]["input"].get<std::vector<std::string>>();
vec.resize(1);
embedding_res["request"]["body"]["input"] = vec;
}
if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
embedding_res["error"] = "OpenAI API error: " + json_res["error"]["message"].get<std::string>();
}
if(res_code == 408) {
embedding_res["error"] = "OpenAI API timeout.";
}
return embedding_res;
}
GoogleEmbedder::GoogleEmbedder(const std::string& google_api_key) : google_api_key(google_api_key) {
}
@ -320,10 +297,12 @@ Option<bool> GoogleEmbedder::is_model_valid(const nlohmann::json& model_config,
return Option<bool>(true);
}
embedding_res_t GoogleEmbedder::Embed(const std::string& text) {
embedding_res_t GoogleEmbedder::Embed(const std::string& text, const size_t remote_embedder_timeout_ms, const size_t remote_embedding_num_try) {
std::unordered_map<std::string, std::string> headers;
std::map<std::string, std::string> res_headers;
headers["Content-Type"] = "application/json";
headers["timeout_ms"] = std::to_string(remote_embedder_timeout_ms);
headers["num_try"] = std::to_string(remote_embedding_num_try);
std::string res;
nlohmann::json req_body;
req_body["text"] = text;
@ -331,37 +310,13 @@ embedding_res_t GoogleEmbedder::Embed(const std::string& text) {
auto res_code = call_remote_api("POST", std::string(GOOGLE_CREATE_EMBEDDING) + google_api_key, req_body.dump(), res, res_headers, headers);
if(res_code != 200) {
nlohmann::json json_res;
try {
nlohmann::json json_res = nlohmann::json::parse(res);
} catch (const std::exception& e) {
json_res = nlohmann::json::object();
json_res["error"] = "Malformed response from Google API.";
}
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["response"] = json_res;
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = GOOGLE_CREATE_EMBEDDING;
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
embedding_res["error"] = "Google API error: " + json_res["error"]["message"].get<std::string>();
}
if(res_code == 408) {
embedding_res["error"] = "Google API timeout.";
}
return embedding_res_t(res_code, embedding_res);
return embedding_res_t(res_code, get_error_json(req_body, res_code, res));
}
try {
return embedding_res_t(nlohmann::json::parse(res)["embedding"]["value"].get<std::vector<float>>());
} catch (const std::exception& e) {
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = GOOGLE_CREATE_EMBEDDING;
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
embedding_res["error"] = "Malformed response from Google API.";
return embedding_res_t(500, embedding_res);
return embedding_res_t(500, get_error_json(req_body, res_code, res));
}
}
@ -376,6 +331,30 @@ std::vector<embedding_res_t> GoogleEmbedder::batch_embed(const std::vector<std::
return outputs;
}
nlohmann::json GoogleEmbedder::get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) {
nlohmann::json json_res;
try {
nlohmann::json json_res = nlohmann::json::parse(res_body);
} catch (const std::exception& e) {
json_res = nlohmann::json::object();
json_res["error"] = "Malformed response from Google API.";
}
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["response"] = json_res;
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = GOOGLE_CREATE_EMBEDDING;
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
embedding_res["error"] = "Google API error: " + json_res["error"]["message"].get<std::string>();
}
if(res_code == 408) {
embedding_res["error"] = "Google API timeout.";
}
return embedding_res;
}
GCPEmbedder::GCPEmbedder(const std::string& project_id, const std::string& model_name, const std::string& access_token,
const std::string& refresh_token, const std::string& client_id, const std::string& client_secret) :
@ -453,7 +432,7 @@ Option<bool> GCPEmbedder::is_model_valid(const nlohmann::json& model_config, uns
return Option<bool>(true);
}
embedding_res_t GCPEmbedder::Embed(const std::string& text) {
embedding_res_t GCPEmbedder::Embed(const std::string& text, const size_t remote_embedder_timeout_ms, const size_t remote_embedding_num_try) {
nlohmann::json req_body;
req_body["instances"] = nlohmann::json::array();
nlohmann::json instance;
@ -462,6 +441,8 @@ embedding_res_t GCPEmbedder::Embed(const std::string& text) {
std::unordered_map<std::string, std::string> headers;
headers["Authorization"] = "Bearer " + access_token;
headers["Content-Type"] = "application/json";
headers["timeout_ms"] = std::to_string(remote_embedder_timeout_ms);
headers["num_try"] = std::to_string(remote_embedding_num_try);
std::map<std::string, std::string> res_headers;
std::string res;
@ -483,41 +464,13 @@ embedding_res_t GCPEmbedder::Embed(const std::string& text) {
}
if(res_code != 200) {
nlohmann::json json_res;
try {
json_res = nlohmann::json::parse(res);
} catch (const std::exception& e) {
json_res = nlohmann::json::object();
json_res["error"] = "Malformed response from GCP API.";
}
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["response"] = json_res;
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
embedding_res["error"] = "GCP API error: " + json_res["error"]["message"].get<std::string>();
} else {
embedding_res["error"] = "Malformed response from GCP API.";
}
if(res_code == 408) {
embedding_res["error"] = "GCP API timeout.";
}
return embedding_res_t(res_code, embedding_res);
return embedding_res_t(res_code, get_error_json(req_body, res_code, res));
}
nlohmann::json res_json;
try {
res_json = nlohmann::json::parse(res);
} catch (const std::exception& e) {
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
embedding_res["error"] = "Malformed response from GCP API.";
return embedding_res_t(500, embedding_res);
return embedding_res_t(500, get_error_json(req_body, res_code, res));
}
return embedding_res_t(res_json["predictions"][0]["embeddings"]["values"].get<std::vector<float>>());
}
@ -566,28 +519,7 @@ std::vector<embedding_res_t> GCPEmbedder::batch_embed(const std::vector<std::str
}
if(res_code != 200) {
nlohmann::json json_res;
try {
json_res = nlohmann::json::parse(res);
} catch (const std::exception& e) {
json_res = nlohmann::json::object();
json_res["error"] = "Malformed response from GCP API.";
}
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["response"] = json_res;
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
embedding_res["error"] = "GCP API error: " + json_res["error"]["message"].get<std::string>();
} else {
embedding_res["error"] = "Malformed response from GCP API.";
}
if(res_code == 408) {
embedding_res["error"] = "GCP API timeout.";
}
auto embedding_res = get_error_json(req_body, res_code, res);
std::vector<embedding_res_t> outputs;
for(size_t i = 0; i < inputs.size(); i++) {
outputs.push_back(embedding_res_t(res_code, embedding_res));
@ -598,12 +530,7 @@ std::vector<embedding_res_t> GCPEmbedder::batch_embed(const std::vector<std::str
try {
res_json = nlohmann::json::parse(res);
} catch (const std::exception& e) {
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["error"] = "Nalformed response from GCP API.";
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
nlohmann::json embedding_res = get_error_json(req_body, res_code, res);
std::vector<embedding_res_t> outputs;
for(size_t i = 0; i < inputs.size(); i++) {
outputs.push_back(embedding_res_t(400, embedding_res));
@ -618,6 +545,34 @@ std::vector<embedding_res_t> GCPEmbedder::batch_embed(const std::vector<std::str
return outputs;
}
nlohmann::json GCPEmbedder::get_error_json(const nlohmann::json& req_body, long res_code, const std::string& res_body) {
nlohmann::json json_res;
try {
json_res = nlohmann::json::parse(res_body);
} catch (const std::exception& e) {
json_res = nlohmann::json::object();
json_res["error"] = "Malformed response from GCP API.";
}
nlohmann::json embedding_res = nlohmann::json::object();
embedding_res["response"] = json_res;
embedding_res["request"] = nlohmann::json::object();
embedding_res["request"]["url"] = get_gcp_embedding_url(project_id, model_name);
embedding_res["request"]["method"] = "POST";
embedding_res["request"]["body"] = req_body;
if(json_res.count("error") != 0 && json_res["error"].count("message") != 0) {
embedding_res["error"] = "GCP API error: " + json_res["error"]["message"].get<std::string>();
} else {
embedding_res["error"] = "Malformed response from GCP API.";
}
if(res_code == 408) {
embedding_res["error"] = "GCP API timeout.";
}
return embedding_res;
}
Option<std::string> GCPEmbedder::generate_access_token(const std::string& refresh_token, const std::string& client_id, const std::string& client_secret) {
std::unordered_map<std::string, std::string> headers;
headers["Content-Type"] = "application/x-www-form-urlencoded";
@ -653,5 +608,3 @@ Option<std::string> GCPEmbedder::generate_access_token(const std::string& refres
return Option<std::string>(access_token);
}

View File

@ -78,7 +78,7 @@ TEST_F(AnalyticsManagerTest, AddSuggestion) {
}
})"_json;
auto create_op = analyticsManager.create_rule(analytics_rule);
auto create_op = analyticsManager.create_rule(analytics_rule, false, true);
ASSERT_TRUE(create_op.ok());
std::string q = "foobar";
@ -88,4 +88,97 @@ TEST_F(AnalyticsManagerTest, AddSuggestion) {
auto userQueries = popularQueries["top_queries"]->get_user_prefix_queries()["1"];
ASSERT_EQ(1, userQueries.size());
ASSERT_EQ("foobar", userQueries[0].query);
// add another query which is more popular
q = "buzzfoo";
analyticsManager.add_suggestion("titles", q, true, "1");
analyticsManager.add_suggestion("titles", q, true, "2");
analyticsManager.add_suggestion("titles", q, true, "3");
popularQueries = analyticsManager.get_popular_queries();
userQueries = popularQueries["top_queries"]->get_user_prefix_queries()["1"];
ASSERT_EQ(2, userQueries.size());
ASSERT_EQ("foobar", userQueries[0].query);
ASSERT_EQ("buzzfoo", userQueries[1].query);
}
TEST_F(AnalyticsManagerTest, GetAndDeleteSuggestions) {
nlohmann::json analytics_rule = R"({
"name": "top_search_queries",
"type": "popular_queries",
"params": {
"limit": 100,
"source": {
"collections": ["titles"]
},
"destination": {
"collection": "top_queries"
}
}
})"_json;
auto create_op = analyticsManager.create_rule(analytics_rule, false, true);
ASSERT_TRUE(create_op.ok());
analytics_rule = R"({
"name": "top_search_queries2",
"type": "popular_queries",
"params": {
"limit": 100,
"source": {
"collections": ["titles"]
},
"destination": {
"collection": "top_queries"
}
}
})"_json;
create_op = analyticsManager.create_rule(analytics_rule, false, true);
ASSERT_TRUE(create_op.ok());
auto rules = analyticsManager.list_rules().get()["rules"];
ASSERT_EQ(2, rules.size());
ASSERT_TRUE(analyticsManager.get_rule("top_search_queries").ok());
ASSERT_TRUE(analyticsManager.get_rule("top_search_queries2").ok());
auto missing_rule_op = analyticsManager.get_rule("top_search_queriesX");
ASSERT_FALSE(missing_rule_op.ok());
ASSERT_EQ(404, missing_rule_op.code());
ASSERT_EQ("Rule not found.", missing_rule_op.error());
// upsert rule that already exists
analytics_rule = R"({
"name": "top_search_queries2",
"type": "popular_queries",
"params": {
"limit": 100,
"source": {
"collections": ["titles"]
},
"destination": {
"collection": "top_queriesUpdated"
}
}
})"_json;
create_op = analyticsManager.create_rule(analytics_rule, true, true);
ASSERT_TRUE(create_op.ok());
auto existing_rule = analyticsManager.get_rule("top_search_queries2").get();
ASSERT_EQ("top_queriesUpdated", existing_rule["params"]["destination"]["collection"].get<std::string>());
// reject when upsert is not enabled
create_op = analyticsManager.create_rule(analytics_rule, false, true);
ASSERT_FALSE(create_op.ok());
ASSERT_EQ("There's already another configuration with the name `top_search_queries2`.", create_op.error());
// try deleting both rules
analyticsManager.remove_rule("top_search_queries");
analyticsManager.remove_rule("top_search_queries2");
missing_rule_op = analyticsManager.get_rule("top_search_queries");
ASSERT_FALSE(missing_rule_op.ok());
missing_rule_op = analyticsManager.get_rule("top_search_queries2");
ASSERT_FALSE(missing_rule_op.ok());
}

View File

@ -1311,6 +1311,21 @@ TEST_F(CollectionSpecificMoreTest, UpdateArrayWithNullValue) {
auto results = coll1->search("alpha", {"tags"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(0, results["found"].get<size_t>());
// update document with no value (optional field) with a null value
auto doc3 = R"({
"id": "2"
})"_json;
ASSERT_TRUE(coll1->add(doc3.dump(), CREATE).ok());
results = coll1->search("alpha", {"tags"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {false}).get();
ASSERT_EQ(0, results["found"].get<size_t>());
doc_update = R"({
"id": "2",
"tags": null
})"_json;
ASSERT_TRUE(coll1->add(doc_update.dump(), UPDATE).ok());
// via upsert
doc_update = R"({

View File

@ -1054,11 +1054,11 @@ TEST_F(CollectionTest, KeywordQueryReturnsResultsBasedOnPerPageParam) {
ASSERT_EQ(422, res_op.code());
ASSERT_STREQ("Only upto 250 hits can be fetched per page.", res_op.error().c_str());
// when page number is not valid
res_op = coll_mul_fields->search("w", query_fields, "", facets, sort_fields, {0}, 10, 0,
FREQUENCY, {true}, 1000, empty, empty, 10);
ASSERT_FALSE(res_op.ok());
ASSERT_EQ(422, res_op.code());
// when page number is zero, use the first page
results = coll_mul_fields->search("w", query_fields, "", facets, sort_fields, {0}, 3, 0,
FREQUENCY, {true}, 1000, empty, empty, 10).get();
ASSERT_EQ(3, results["hits"].size());
ASSERT_EQ(6, results["found"].get<int>());
// do pagination
@ -3026,11 +3026,11 @@ TEST_F(CollectionTest, WildcardQueryReturnsResultsBasedOnPerPageParam) {
ASSERT_EQ(422, res_op.code());
ASSERT_STREQ("Only upto 250 hits can be fetched per page.", res_op.error().c_str());
// when page number is not valid
res_op = collection->search("*", query_fields, "", facets, sort_fields, {0}, 10, 0,
FREQUENCY, {false}, 1000, empty, empty, 10);
ASSERT_FALSE(res_op.ok());
ASSERT_EQ(422, res_op.code());
// when page number is 0, just fetch first page
results = collection->search("*", query_fields, "", facets, sort_fields, {0}, 10, 0,
FREQUENCY, {false}, 1000, empty, empty, 10).get();
ASSERT_EQ(10, results["hits"].size());
ASSERT_EQ(25, results["found"].get<int>());
// do pagination
@ -5176,4 +5176,45 @@ TEST_F(CollectionTest, EmbeddingFieldEmptyArrayInDocument) {
ASSERT_FALSE(get_op.get()["embedding"].is_null());
ASSERT_EQ(384, get_op.get()["embedding"].size());
}
TEST_F(CollectionTest, CatchPartialResponseFromRemoteEmbedding) {
std::string partial_json = R"({
"results": [
{
"embedding": [
0.0,
0.0,
0.0
],
"text": "butter"
},
{
"embedding": [
0.0,
0.0,
0.0
],
"text": "butterball"
},
{
"embedding": [
0.0,
0.0)";
nlohmann::json req_body = R"({
"inputs": [
"butter",
"butterball",
"butterfly"
]
})"_json;
OpenAIEmbedder embedder("", "");
auto res = embedder.get_error_json(req_body, 200, partial_json);
ASSERT_EQ(res["response"]["error"], "Malformed response from OpenAI API.");
ASSERT_EQ(res["request"]["body"], req_body);
}

View File

@ -768,7 +768,7 @@ TEST_F(CoreAPIUtilsTest, SearchPagination) {
ASSERT_EQ(400, results["code"].get<size_t>());
ASSERT_EQ("Parameter `offset` must be an unsigned integer.", results["error"].get<std::string>());
// when page is 0 and no offset is sent
// when page is 0 and offset is NOT sent, we will treat as page=1
search.clear();
req->params.clear();
body["searches"] = nlohmann::json::array();
@ -782,8 +782,29 @@ TEST_F(CoreAPIUtilsTest, SearchPagination) {
post_multi_search(req, res);
results = nlohmann::json::parse(res->body)["results"][0];
ASSERT_EQ(422, results["code"].get<size_t>());
ASSERT_EQ("Parameter `page` must be an integer of value greater than 0.", results["error"].get<std::string>());
ASSERT_EQ(10, results["hits"].size());
ASSERT_EQ(1, results["page"].get<size_t>());
ASSERT_EQ(0, results.count("offset"));
// when both page and offset are sent, use page
search.clear();
req->params.clear();
body["searches"] = nlohmann::json::array();
search["collection"] = "coll1";
search["q"] = "title";
search["page"] = "2";
search["offset"] = "30";
search["query_by"] = "name";
search["sort_by"] = "points:desc";
body["searches"].push_back(search);
req->body = body.dump();
post_multi_search(req, res);
results = nlohmann::json::parse(res->body)["results"][0];
ASSERT_EQ(10, results["hits"].size());
ASSERT_EQ(2, results["page"].get<size_t>());
ASSERT_EQ(0, results.count("offset"));
}
TEST_F(CoreAPIUtilsTest, ExportWithFilter) {
@ -1116,4 +1137,27 @@ TEST_F(CoreAPIUtilsTest, TestProxyInvalid) {
ASSERT_EQ(400, resp->status_code);
ASSERT_EQ("Headers must be a JSON object.", nlohmann::json::parse(resp->body)["message"]);
}
TEST_F(CoreAPIUtilsTest, TestProxyTimeout) {
nlohmann::json body;
auto req = std::make_shared<http_req>();
auto resp = std::make_shared<http_res>(nullptr);
// test with url as empty string
body["url"] = "https://typesense.org/docs/";
body["method"] = "GET";
body["headers"] = nlohmann::json::object();
body["headers"]["timeout_ms"] = "1";
body["headers"]["num_retry"] = "1";
req->body = body.dump();
post_proxy(req, resp);
ASSERT_EQ(408, resp->status_code);
ASSERT_EQ("Server error on remote server. Please try again later.", nlohmann::json::parse(resp->body)["message"]);
}

View File

@ -238,7 +238,7 @@ TEST(TokenizerTest, ShouldTokenizeLocaleText) {
// window used to locate the starting offset for snippet on the text
while(tokenizer.next(raw_token, raw_token_index, tok_start, tok_end)) {
LOG(INFO) << "tok_start: " << tok_start;
//LOG(INFO) << "tok_start: " << tok_start;
}
return ;