mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 05:08:43 +08:00
Merge branch 'typesense:v0.26-facets' into v0.26-facets
This commit is contained in:
commit
cc299dbda2
1
.bazelversion
Normal file
1
.bazelversion
Normal file
@ -0,0 +1 @@
|
||||
5.2.0
|
1
.github/workflows/tests.yml
vendored
1
.github/workflows/tests.yml
vendored
@ -45,6 +45,7 @@ jobs:
|
||||
workflow_conclusion: ""
|
||||
if_no_artifact_found: warn
|
||||
skip_unpack: true
|
||||
branch: ${{ github.base_ref || github.head_ref || github.ref_name }}
|
||||
|
||||
- name: Uncompress bazel cache
|
||||
run: |
|
||||
|
@ -46,6 +46,11 @@ struct click_event_t {
|
||||
}
|
||||
};
|
||||
|
||||
struct popular_clicks_t {
|
||||
std::string counter_field;
|
||||
std::map<std::string, uint64_t> docid_counts;
|
||||
};
|
||||
|
||||
struct query_hits_count_t {
|
||||
std::string query;
|
||||
uint64_t timestamp;
|
||||
@ -137,6 +142,9 @@ private:
|
||||
// suggestion collection => nohits queries
|
||||
std::unordered_map<std::string, QueryAnalytics*> nohits_queries;
|
||||
|
||||
// collection => popular clicks
|
||||
std::unordered_map<std::string, popular_clicks_t> popular_clicks;
|
||||
|
||||
//query collection => click events
|
||||
std::unordered_map<std::string, std::vector<click_event_t>> query_collection_click_events;
|
||||
|
||||
@ -163,6 +171,7 @@ public:
|
||||
static constexpr const char* QUERY_HITS_COUNT = "$QH";
|
||||
static constexpr const char* POPULAR_QUERIES_TYPE = "popular_queries";
|
||||
static constexpr const char* NOHITS_QUERIES_TYPE = "nohits_queries";
|
||||
static constexpr const char* POPULAR_CLICKS_TYPE = "popular_clicks";
|
||||
|
||||
static AnalyticsManager& get_instance() {
|
||||
static AnalyticsManager instance;
|
||||
@ -191,6 +200,8 @@ public:
|
||||
|
||||
void dispose();
|
||||
|
||||
Store* get_analytics_store();
|
||||
|
||||
void persist_query_events(ReplicationState *raft_server, uint64_t prev_persistence_s);
|
||||
|
||||
std::unordered_map<std::string, QueryAnalytics*> get_popular_queries();
|
||||
@ -200,8 +211,12 @@ public:
|
||||
|
||||
void persist_query_hits_click_events(ReplicationState *raft_server, uint64_t prev_persistence_s);
|
||||
|
||||
void persist_popular_clicks(ReplicationState *raft_server, uint64_t prev_persistence_s);
|
||||
|
||||
nlohmann::json get_click_events();
|
||||
|
||||
std::unordered_map<std::string, popular_clicks_t> get_popular_clicks();
|
||||
|
||||
Option<bool> write_events_to_store(nlohmann::json& event_jsons);
|
||||
|
||||
void add_nohits_query(const std::string& query_collection,
|
||||
|
@ -399,6 +399,8 @@ public:
|
||||
|
||||
std::vector<field> get_fields();
|
||||
|
||||
bool contains_field(const std::string&);
|
||||
|
||||
std::unordered_map<std::string, field> get_dynamic_fields();
|
||||
|
||||
tsl::htrie_map<char, field> get_schema();
|
||||
|
@ -84,7 +84,8 @@ public:
|
||||
|
||||
Store(const std::string & state_dir_path,
|
||||
const size_t wal_ttl_secs = 24*60*60,
|
||||
const size_t wal_size_mb = 1024, bool disable_wal = true): state_dir_path(state_dir_path) {
|
||||
const size_t wal_size_mb = 1024, bool disable_wal = true,
|
||||
const size_t db_compaction_interval = 604800): state_dir_path(state_dir_path) {
|
||||
// Optimize RocksDB
|
||||
options.IncreaseParallelism();
|
||||
options.OptimizeLevelStyleCompaction();
|
||||
@ -94,7 +95,7 @@ public:
|
||||
options.max_write_buffer_number = 2;
|
||||
options.merge_operator.reset(new UInt64AddOperator);
|
||||
options.compression = rocksdb::CompressionType::kSnappyCompression;
|
||||
options.periodic_compaction_seconds = 604800;
|
||||
options.periodic_compaction_seconds = db_compaction_interval;
|
||||
|
||||
options.max_log_file_size = 4*1048576;
|
||||
options.keep_log_file_num = 5;
|
||||
|
@ -335,4 +335,6 @@ struct StringUtils {
|
||||
static Option<bool> tokenize_filter_query(const std::string& filter_query, std::queue<std::string>& tokens);
|
||||
|
||||
static Option<bool> split_include_fields(const std::string& include_fields, std::vector<std::string>& tokens);
|
||||
|
||||
static size_t get_occurence_count(const std::string& str, char symbol);
|
||||
};
|
||||
|
@ -72,6 +72,8 @@ private:
|
||||
|
||||
uint32_t housekeeping_interval;
|
||||
|
||||
uint32_t db_compaction_interval;
|
||||
|
||||
protected:
|
||||
|
||||
Config() {
|
||||
@ -100,6 +102,7 @@ protected:
|
||||
this->enable_search_analytics = false;
|
||||
this->analytics_flush_interval = 3600; // in seconds
|
||||
this->housekeeping_interval = 1800; // in seconds
|
||||
this->db_compaction_interval = 604800; // in seconds
|
||||
}
|
||||
|
||||
Config(Config const&) {
|
||||
@ -309,6 +312,10 @@ public:
|
||||
return this->housekeeping_interval;
|
||||
}
|
||||
|
||||
size_t get_db_compaction_interval() const {
|
||||
return this->db_compaction_interval;
|
||||
}
|
||||
|
||||
size_t get_thread_pool_size() const {
|
||||
return this->thread_pool_size;
|
||||
}
|
||||
@ -449,6 +456,10 @@ public:
|
||||
this->housekeeping_interval = std::stoi(get_env("TYPESENSE_HOUSEKEEPING_INTERVAL"));
|
||||
}
|
||||
|
||||
if(!get_env("TYPESENSE_DB_COMPACTION_INTERVAL").empty()) {
|
||||
this->db_compaction_interval = std::stoi(get_env("TYPESENSE_DB_COMPACTION_INTERVAL"));
|
||||
}
|
||||
|
||||
if(!get_env("TYPESENSE_THREAD_POOL_SIZE").empty()) {
|
||||
this->thread_pool_size = std::stoi(get_env("TYPESENSE_THREAD_POOL_SIZE"));
|
||||
}
|
||||
@ -620,6 +631,10 @@ public:
|
||||
this->housekeeping_interval = (int) reader.GetInteger("server", "housekeeping-interval", 1800);
|
||||
}
|
||||
|
||||
if(reader.Exists("server", "db-compaction-interval")) {
|
||||
this->db_compaction_interval = (int) reader.GetInteger("server", "db-compaction-interval", 1800);
|
||||
}
|
||||
|
||||
if(reader.Exists("server", "thread-pool-size")) {
|
||||
this->thread_pool_size = (int) reader.GetInteger("server", "thread-pool-size", 0);
|
||||
}
|
||||
@ -782,6 +797,10 @@ public:
|
||||
this->housekeeping_interval = options.get<uint32_t>("housekeeping-interval");
|
||||
}
|
||||
|
||||
if(options.exist("db-compaction-interval")) {
|
||||
this->db_compaction_interval = options.get<uint32_t>("db-compaction-interval");
|
||||
}
|
||||
|
||||
if(options.exist("thread-pool-size")) {
|
||||
this->thread_pool_size = options.get<uint32_t>("thread-pool-size");
|
||||
}
|
||||
|
@ -43,7 +43,8 @@ Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool upsert,
|
||||
return Option<bool>(400, "Bad or missing params.");
|
||||
}
|
||||
|
||||
if(payload["type"] == POPULAR_QUERIES_TYPE || payload["type"] == NOHITS_QUERIES_TYPE) {
|
||||
if(payload["type"] == POPULAR_QUERIES_TYPE || payload["type"] == NOHITS_QUERIES_TYPE
|
||||
|| payload["type"] == POPULAR_CLICKS_TYPE) {
|
||||
return create_queries_index(payload, upsert, write_to_disk);
|
||||
}
|
||||
|
||||
@ -84,6 +85,15 @@ Option<bool> AnalyticsManager::create_queries_index(nlohmann::json &payload, boo
|
||||
return Option<bool>(400, "Must contain a valid destination collection.");
|
||||
}
|
||||
|
||||
std::string counter_field;
|
||||
|
||||
if(params["destination"].contains("counter_field")) {
|
||||
if (!params["destination"]["counter_field"].is_string()) {
|
||||
return Option<bool>(400, "Must contain a valid counter_field.");
|
||||
}
|
||||
counter_field = params["destination"]["counter_field"].get<std::string>();
|
||||
}
|
||||
|
||||
const std::string& suggestion_collection = params["destination"]["collection"].get<std::string>();
|
||||
suggestion_config_t suggestion_config;
|
||||
suggestion_config.name = suggestion_config_name;
|
||||
@ -98,6 +108,19 @@ Option<bool> AnalyticsManager::create_queries_index(nlohmann::json &payload, boo
|
||||
if (!upsert && nohits_queries.count(suggestion_collection) != 0) {
|
||||
return Option<bool>(400, "There's already another configuration for this destination collection.");
|
||||
}
|
||||
} else if(payload["type"] == POPULAR_CLICKS_TYPE) {
|
||||
if (!upsert && popular_clicks.count(suggestion_collection) != 0) {
|
||||
return Option<bool>(400, "There's already another configuration for this destination collection.");
|
||||
}
|
||||
|
||||
auto coll = CollectionManager::get_instance().get_collection(suggestion_collection).get();
|
||||
if(coll != nullptr) {
|
||||
if (!coll->contains_field(counter_field)) {
|
||||
return Option<bool>(404, "counter_field `" + counter_field + "` not found in destination collection.");
|
||||
}
|
||||
} else {
|
||||
return Option<bool>(404, "Collection `" + suggestion_collection + "` not found.");
|
||||
}
|
||||
}
|
||||
|
||||
for(const auto& coll: params["source"]["collections"]) {
|
||||
@ -131,6 +154,8 @@ Option<bool> AnalyticsManager::create_queries_index(nlohmann::json &payload, boo
|
||||
} else if(payload["type"] == NOHITS_QUERIES_TYPE) {
|
||||
QueryAnalytics *noresultsQueries = new QueryAnalytics(limit);
|
||||
nohits_queries.emplace(suggestion_collection, noresultsQueries);
|
||||
} else if(payload["type"] == POPULAR_CLICKS_TYPE) {
|
||||
popular_clicks.emplace(suggestion_collection, popular_clicks_t{counter_field, {}});
|
||||
}
|
||||
|
||||
if(write_to_disk) {
|
||||
@ -278,6 +303,13 @@ Option<bool> AnalyticsManager::add_click_event(const std::string &query_collecti
|
||||
click_event_t click_event(query, now_ts_useconds, user_id, doc_id, position);
|
||||
click_events_vec.emplace_back(click_event);
|
||||
|
||||
auto popular_clicks_it = popular_clicks.find(query_collection);
|
||||
if(popular_clicks_it != popular_clicks.end()) {
|
||||
popular_clicks_it->second.docid_counts[doc_id]++;
|
||||
} else {
|
||||
LOG(ERROR) << "collection " << query_collection << " not found in analytics rule.";
|
||||
}
|
||||
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
@ -346,6 +378,7 @@ void AnalyticsManager::run(ReplicationState* raft_server) {
|
||||
checkEventsExpiry();
|
||||
persist_query_events(raft_server, prev_persistence_s);
|
||||
persist_query_hits_click_events(raft_server, prev_persistence_s);
|
||||
persist_popular_clicks(raft_server, prev_persistence_s);
|
||||
|
||||
prev_persistence_s = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
@ -508,6 +541,37 @@ void AnalyticsManager::persist_query_hits_click_events(ReplicationState *raft_se
|
||||
}
|
||||
}
|
||||
|
||||
void AnalyticsManager::persist_popular_clicks(ReplicationState *raft_server, uint64_t prev_persistence_s) {
|
||||
auto send_http_response = [&](const std::string& import_payload, const std::string& collection) {
|
||||
std::string leader_url = raft_server->get_leader_url();
|
||||
if (!leader_url.empty()) {
|
||||
const std::string &base_url = leader_url + "collections/" + collection;
|
||||
std::string res;
|
||||
|
||||
const std::string &update_url = base_url + "/documents/import?action=update";
|
||||
std::map<std::string, std::string> res_headers;
|
||||
long status_code = HttpClient::post_response(update_url, import_payload,
|
||||
res, res_headers, {}, 10 * 1000, true);
|
||||
|
||||
if (status_code != 200) {
|
||||
LOG(ERROR) << "Error while sending popular_clicks events to leader. "
|
||||
<< "Status code: " << status_code << ", response: " << res;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for(const auto& popular_clicks_it : popular_clicks) {
|
||||
auto coll = popular_clicks_it.first;
|
||||
nlohmann::json doc;
|
||||
auto counter_field = popular_clicks_it.second.counter_field;
|
||||
for(const auto& popular_click : popular_clicks_it.second.docid_counts) {
|
||||
doc["id"] = popular_click.first;
|
||||
doc[counter_field] = popular_click.second;
|
||||
send_http_response(doc.dump(), coll);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AnalyticsManager::stop() {
|
||||
quit = true;
|
||||
cv.notify_all();
|
||||
@ -534,6 +598,10 @@ void AnalyticsManager::init(Store* store, Store* analytics_store) {
|
||||
this->analytics_store = analytics_store;
|
||||
}
|
||||
|
||||
Store* AnalyticsManager::get_analytics_store() {
|
||||
return this->analytics_store;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, QueryAnalytics*> AnalyticsManager::get_popular_queries() {
|
||||
std::unique_lock lk(mutex);
|
||||
return popular_queries;
|
||||
@ -544,6 +612,11 @@ std::unordered_map<std::string, QueryAnalytics*> AnalyticsManager::get_nohits_qu
|
||||
return nohits_queries;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, popular_clicks_t> AnalyticsManager::get_popular_clicks() {
|
||||
std::unique_lock lk(mutex);
|
||||
return popular_clicks;
|
||||
}
|
||||
|
||||
nlohmann::json AnalyticsManager::get_click_events() {
|
||||
std::unique_lock lk(mutex);
|
||||
std::vector<std::string> click_event_jsons;
|
||||
|
@ -2155,6 +2155,10 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
|
||||
parse_search_query(query, q_include_tokens,
|
||||
field_query_tokens[0].q_exclude_tokens, field_query_tokens[0].q_phrases, "",
|
||||
false, stopwords_set);
|
||||
|
||||
process_filter_overrides(filter_overrides, q_include_tokens, token_order, filter_tree_root,
|
||||
included_ids, excluded_ids, override_metadata);
|
||||
|
||||
for(size_t i = 0; i < q_include_tokens.size(); i++) {
|
||||
auto& q_include_token = q_include_tokens[i];
|
||||
field_query_tokens[0].q_include_tokens.emplace_back(i, q_include_token, (i == q_include_tokens.size() - 1),
|
||||
@ -3328,7 +3332,7 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
|
||||
if(exclude_operator_prior) {
|
||||
q_exclude_tokens.push_back(phrase);
|
||||
} else {
|
||||
q_phrases.push_back(phrase);
|
||||
q_include_tokens.insert(q_include_tokens.end(), phrase.begin(), phrase.end());
|
||||
}
|
||||
}
|
||||
|
||||
@ -4361,6 +4365,11 @@ std::vector<field> Collection::get_fields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
bool Collection::contains_field(const std::string &field) {
|
||||
std::shared_lock lock(mutex);
|
||||
return search_schema.find(field) != search_schema.end();
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, field> Collection::get_dynamic_fields() {
|
||||
std::shared_lock lock(mutex);
|
||||
return dynamic_fields;
|
||||
@ -5841,7 +5850,7 @@ bool Collection::get_enable_nested_fields() {
|
||||
|
||||
Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector<facet>& facets) const {
|
||||
const std::regex base_pattern(".+\\(.*\\)");
|
||||
const std::regex range_pattern("[[a-z A-Z]+:\\[([+-]?([0-9]*[.])?[0-9]*)\\,\\s*([+-]?([0-9]*[.])?[0-9]*)\\]");
|
||||
const std::regex range_pattern("[[0-9]*[a-z A-Z]+[0-9]*:\\[([+-]?([0-9]*[.])?[0-9]*)\\,\\s*([+-]?([0-9]*[.])?[0-9]*)\\]");
|
||||
const std::string _alpha = "_alpha";
|
||||
|
||||
if ((facet_field.find(":") != std::string::npos)
|
||||
|
@ -2791,10 +2791,71 @@ bool put_conversation_model(const std::shared_ptr<http_req>& req, const std::sha
|
||||
res->set_200(model.dump());
|
||||
return true;
|
||||
}
|
||||
bool get_click_events(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
|
||||
auto click_events = AnalyticsManager::get_instance().get_click_events();
|
||||
|
||||
res->set_200(click_events.dump());
|
||||
bool get_click_events(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
|
||||
auto analytics_store = AnalyticsManager::get_instance().get_analytics_store();
|
||||
if (!analytics_store) {
|
||||
LOG(ERROR) << "Analytics store not initialized.";
|
||||
return true;
|
||||
}
|
||||
|
||||
export_state_t *export_state = nullptr;
|
||||
auto click_event_prefix = std::string(AnalyticsManager::CLICK_EVENT) + "_";
|
||||
if (req->data == nullptr) {
|
||||
export_state = new export_state_t();
|
||||
req->data = export_state;
|
||||
|
||||
export_state->iter_upper_bound_key = std::string(AnalyticsManager::CLICK_EVENT) + "`";
|
||||
export_state->iter_upper_bound = new rocksdb::Slice(export_state->iter_upper_bound_key);
|
||||
export_state->it = analytics_store->scan(click_event_prefix, export_state->iter_upper_bound);
|
||||
} else {
|
||||
export_state = dynamic_cast<export_state_t *>(req->data);
|
||||
}
|
||||
|
||||
if (export_state->it != nullptr) {
|
||||
rocksdb::Iterator *it = export_state->it;
|
||||
size_t batch_counter = 0;
|
||||
std::string().swap(res->body);
|
||||
|
||||
if (!it->Valid()) {
|
||||
LOG(ERROR) << "No click events found in db.";
|
||||
req->last_chunk_aggregate = true;
|
||||
res->final = true;
|
||||
res->set_404();
|
||||
stream_response(req, res);
|
||||
return false;
|
||||
}
|
||||
|
||||
while (it->Valid() && it->key().ToString().compare(0, click_event_prefix.size(), click_event_prefix) == 0) {
|
||||
res->body += it->value().ToString();
|
||||
it->Next();
|
||||
|
||||
// append a new line character if there is going to be one more record to send
|
||||
if (it->Valid() &&
|
||||
it->key().ToString().compare(0, click_event_prefix.size(), click_event_prefix) == 0) {
|
||||
res->body += "\n";
|
||||
req->last_chunk_aggregate = false;
|
||||
res->final = false;
|
||||
} else {
|
||||
req->last_chunk_aggregate = true;
|
||||
res->final = true;
|
||||
}
|
||||
|
||||
batch_counter++;
|
||||
if (batch_counter == export_state->export_batch_size) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
req->last_chunk_aggregate = true;
|
||||
res->final = true;
|
||||
}
|
||||
|
||||
res->content_type_header = "text/plain; charset=utf-8";
|
||||
res->status_code = 200;
|
||||
|
||||
stream_response(req, res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,13 @@ Option<bool> EmbedderManager::validate_and_init_model(const nlohmann::json& mode
|
||||
return validate_and_init_remote_model(model_config, num_dims);
|
||||
} else {
|
||||
LOG(INFO) << "Validating and initializing local model: " << model_name;
|
||||
return validate_and_init_local_model(model_config, num_dims);
|
||||
auto op = validate_and_init_local_model(model_config, num_dims);
|
||||
if(op.ok()) {
|
||||
LOG(INFO) << "Finished initializing local model: " << model_name;
|
||||
} else {
|
||||
LOG(ERROR) << "Failed to initialize local model " << model_name << ", error: " << op.error();
|
||||
}
|
||||
return op;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -786,8 +786,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
|
||||
}
|
||||
else if(afield.type == field_types::FLOAT) {
|
||||
float raw_val = document[afield.name].get<float>();
|
||||
auto fhash = reinterpret_cast<uint32_t&>(raw_val);
|
||||
facet_value_id_t facet_value_id(StringUtils::float_to_str(raw_val), fhash);
|
||||
const std::string& float_str_val = StringUtils::float_to_str(raw_val);
|
||||
float normalized_raw_val = std::stof(float_str_val);
|
||||
auto fhash = reinterpret_cast<uint32_t&>(normalized_raw_val);
|
||||
facet_value_id_t facet_value_id(float_str_val, fhash);
|
||||
fvalue_to_seq_ids[facet_value_id].push_back(seq_id);
|
||||
seq_id_to_fvalues[seq_id].push_back(facet_value_id);
|
||||
}
|
||||
@ -1387,7 +1389,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
|
||||
if(a_facet.get_range(std::stoll(doc_val), range_pair)) {
|
||||
const auto& range_id = range_pair.first;
|
||||
facet_count_t& facet_count = a_facet.result_map[range_id];
|
||||
facet_count.count = kv.second.count;
|
||||
facet_count.count += kv.second.count;
|
||||
}
|
||||
} else {
|
||||
facet_count_t& facet_count = a_facet.value_result_map[kv.first];
|
||||
|
@ -82,7 +82,7 @@ void master_server_routes() {
|
||||
server->post("/analytics/events", post_create_event);
|
||||
|
||||
//collection based query click events
|
||||
server->get("/analytics/click_events", get_click_events);
|
||||
server->get("/analytics/click_events", get_click_events, false, true);
|
||||
server->post("/analytics/click_events", post_create_event);
|
||||
server->post("/analytics/click_events/replicate", post_replicate_events);
|
||||
server->get("/analytics/query_hits_counts", get_query_hits_counts);
|
||||
|
@ -594,3 +594,7 @@ size_t StringUtils::split_facet(const std::string &s, std::vector<std::string> &
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utf8conv;
|
||||
return utf8conv.from_bytes(bytes).size();
|
||||
}*/
|
||||
|
||||
size_t StringUtils::get_occurence_count(const std::string &str, char symbol) {
|
||||
return std::count(str.begin(), str.end(), symbol);
|
||||
}
|
@ -109,6 +109,7 @@ void init_cmdline_options(cmdline::parser & options, int argc, char **argv) {
|
||||
options.add<int>("cache-num-entries", '\0', "Number of entries to cache.", false, 1000);
|
||||
options.add<uint32_t>("analytics-flush-interval", '\0', "Frequency of persisting analytics data to disk (in seconds).", false, 3600);
|
||||
options.add<uint32_t>("housekeeping-interval", '\0', "Frequency of housekeeping background job (in seconds).", false, 1800);
|
||||
options.add<uint32_t>("db-compaction-interval", '\0', "Frequency of RocksDB compaction (in seconds).", false, 604800);
|
||||
|
||||
// DEPRECATED
|
||||
options.add<std::string>("listen-address", 'h', "[DEPRECATED: use `api-address`] Address to which Typesense API service binds.", false, "0.0.0.0");
|
||||
@ -394,7 +395,7 @@ int run_server(const Config & config, const std::string & version, void (*master
|
||||
ThreadPool replication_thread_pool(num_threads);
|
||||
|
||||
// primary DB used for storing the documents: we will not use WAL since Raft provides that
|
||||
Store store(db_dir);
|
||||
Store store(db_dir, 24*60*60, 1024, true, config.get_db_compaction_interval());
|
||||
|
||||
// meta DB for storing house keeping things
|
||||
Store meta_store(meta_dir, 24*60*60, 1024, false);
|
||||
@ -402,7 +403,7 @@ int run_server(const Config & config, const std::string & version, void (*master
|
||||
//analytics DB for storing query click events
|
||||
std::unique_ptr<Store> analytics_store = nullptr;
|
||||
if(!analytics_dir.empty()) {
|
||||
analytics_store.reset(new Store(analytics_dir, 24 * 60 * 60, 1024, false));
|
||||
analytics_store.reset(new Store(analytics_dir, 24 * 60 * 60, 1024, true, config.get_db_compaction_interval()));
|
||||
}
|
||||
|
||||
curl_global_init(CURL_GLOBAL_SSL);
|
||||
|
@ -767,4 +767,178 @@ TEST_F(AnalyticsManagerTest, EventsExpiryPartial) {
|
||||
ASSERT_EQ("management", resp[0]["q"]);
|
||||
ASSERT_EQ(13, resp[0]["user_id"]);
|
||||
ASSERT_EQ(834, resp[0]["hits_count"]);
|
||||
}
|
||||
|
||||
TEST_F(AnalyticsManagerTest, PopularityScore) {
|
||||
//reset click event rate limit
|
||||
analyticsManager.resetRateLimit();
|
||||
|
||||
nlohmann::json products_schema = R"({
|
||||
"name": "products",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string"},
|
||||
{"name": "popularity", "type": "int32"}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
Collection* products_coll = collectionManager.create_collection(products_schema).get();
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["popularity"] = 0;
|
||||
|
||||
doc["id"] = "0";
|
||||
doc["title"] = "Cool trousers";
|
||||
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "1";
|
||||
doc["title"] = "Funky trousers";
|
||||
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "2";
|
||||
doc["title"] = "Casual shorts";
|
||||
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "3";
|
||||
doc["title"] = "Trendy shorts";
|
||||
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "4";
|
||||
doc["title"] = "Formal pants";
|
||||
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
|
||||
|
||||
nlohmann::json analytics_rule = R"({
|
||||
"name": "product_popularity",
|
||||
"type": "popular_clicks",
|
||||
"params": {
|
||||
"source": {
|
||||
"collections": ["products"]
|
||||
},
|
||||
"destination": {
|
||||
"collection": "products",
|
||||
"counter_field": "popularity"
|
||||
}
|
||||
}
|
||||
})"_json;
|
||||
|
||||
auto create_op = analyticsManager.create_rule(analytics_rule, false, true);
|
||||
ASSERT_TRUE(create_op.ok());
|
||||
|
||||
std::shared_ptr<http_req> req = std::make_shared<http_req>();
|
||||
std::shared_ptr<http_res> res = std::make_shared<http_res>(nullptr);
|
||||
|
||||
nlohmann::json event1 = R"({
|
||||
"type": "query_click",
|
||||
"data": {
|
||||
"q": "trousers",
|
||||
"collection": "products",
|
||||
"doc_id": "1",
|
||||
"position": 2,
|
||||
"user_id": "13"
|
||||
}
|
||||
})"_json;
|
||||
|
||||
req->body = event1.dump();
|
||||
ASSERT_TRUE(post_create_event(req, res));
|
||||
|
||||
nlohmann::json event2 = R"({
|
||||
"type": "query_click",
|
||||
"data": {
|
||||
"q": "shorts",
|
||||
"collection": "products",
|
||||
"doc_id": "3",
|
||||
"position": 4,
|
||||
"user_id": "11"
|
||||
}
|
||||
})"_json;
|
||||
|
||||
req->body = event2.dump();
|
||||
ASSERT_TRUE(post_create_event(req, res));
|
||||
|
||||
ASSERT_TRUE(post_create_event(req, res));
|
||||
|
||||
auto popular_clicks = analyticsManager.get_popular_clicks();
|
||||
ASSERT_EQ(1, popular_clicks.size());
|
||||
ASSERT_EQ("popularity", popular_clicks["products"].counter_field);
|
||||
ASSERT_EQ(2, popular_clicks["products"].docid_counts.size());
|
||||
ASSERT_EQ(1, popular_clicks["products"].docid_counts["1"]);
|
||||
ASSERT_EQ(2, popular_clicks["products"].docid_counts["3"]);
|
||||
|
||||
//trigger persistance event
|
||||
for(const auto& popular_clicks_it : popular_clicks) {
|
||||
auto coll = popular_clicks_it.first;
|
||||
nlohmann::json doc;
|
||||
auto counter_field = popular_clicks_it.second.counter_field;
|
||||
req->params["collection"] = "products";
|
||||
req->params["action"] = "update";
|
||||
for(const auto& popular_click : popular_clicks_it.second.docid_counts) {
|
||||
doc["id"] = popular_click.first;
|
||||
doc[counter_field] = popular_click.second;
|
||||
req->body = doc.dump();
|
||||
post_import_documents(req, res);
|
||||
}
|
||||
}
|
||||
|
||||
sort_fields = {sort_by("popularity", "DESC")};
|
||||
auto results = products_coll->search("*", {}, "", {},
|
||||
sort_fields, {0}, 10, 1, FREQUENCY,{false},
|
||||
Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>()).get();
|
||||
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
|
||||
ASSERT_EQ("3", results["hits"][0]["document"]["id"]);
|
||||
ASSERT_EQ(2, results["hits"][0]["document"]["popularity"]);
|
||||
ASSERT_EQ("Trendy shorts", results["hits"][0]["document"]["title"]);
|
||||
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"]);
|
||||
ASSERT_EQ(1, results["hits"][1]["document"]["popularity"]);
|
||||
ASSERT_EQ("Funky trousers", results["hits"][1]["document"]["title"]);
|
||||
}
|
||||
|
||||
TEST_F(AnalyticsManagerTest, PopularityScoreValidation) {
|
||||
nlohmann::json products_schema = R"({
|
||||
"name": "books",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string"},
|
||||
{"name": "popularity", "type": "int32"}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
Collection* products_coll = collectionManager.create_collection(products_schema).get();
|
||||
|
||||
nlohmann::json analytics_rule = R"({
|
||||
"name": "books_popularity",
|
||||
"type": "popular_clicks",
|
||||
"params": {
|
||||
"source": {
|
||||
"collections": ["books"]
|
||||
},
|
||||
"destination": {
|
||||
"collection": "popular_books",
|
||||
"counter_field": "popularity"
|
||||
}
|
||||
}
|
||||
})"_json;
|
||||
|
||||
auto create_op = analyticsManager.create_rule(analytics_rule, false, true);
|
||||
ASSERT_FALSE(create_op.ok());
|
||||
ASSERT_EQ("Collection `popular_books` not found.", create_op.error());
|
||||
|
||||
analytics_rule = R"({
|
||||
"name": "books_popularity",
|
||||
"type": "popular_clicks",
|
||||
"params": {
|
||||
"source": {
|
||||
"collections": ["books"]
|
||||
},
|
||||
"destination": {
|
||||
"collection": "books",
|
||||
"counter_field": "popularity_score"
|
||||
}
|
||||
}
|
||||
})"_json;
|
||||
|
||||
create_op = analyticsManager.create_rule(analytics_rule, false, true);
|
||||
ASSERT_FALSE(create_op.ok());
|
||||
ASSERT_EQ("counter_field `popularity_score` not found in destination collection.", create_op.error());
|
||||
}
|
@ -1591,7 +1591,7 @@ TEST_F(CollectionAllFieldsTest, FieldNameMatchingRegexpShouldNotBeIndexedInNonAu
|
||||
}
|
||||
|
||||
TEST_F(CollectionAllFieldsTest, EmbedFromFieldJSONInvalidField) {
|
||||
EmbedderManager::set_model_dir("/tmp/typensense_test/models");
|
||||
EmbedderManager::set_model_dir("/tmp/typesense_test/models");
|
||||
nlohmann::json field_json;
|
||||
field_json["name"] = "embedding";
|
||||
field_json["type"] = "float[]";
|
||||
|
@ -2980,3 +2980,49 @@ TEST_F(CollectionFacetingTest, RangeFacetTestWithGroupBy) {
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionFacetingTest, RangeFacetAlphanumericLabels) {
|
||||
std::vector<field> fields = {field("monuments", field_types::STRING, false),
|
||||
field("year", field_types::INT32, true),};
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "",
|
||||
{},{}).get();
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["id"] = "0";
|
||||
doc["monuments"] = "Statue Of Unity";
|
||||
doc["year"] = 2018;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "1";
|
||||
doc["monuments"] = "Taj Mahal";
|
||||
doc["year"] = 1653;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "2";
|
||||
doc["monuments"] = "Mysore Palace";
|
||||
doc["year"] = 1897;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "3";
|
||||
doc["monuments"] = "Chennakesava Temple";
|
||||
doc["year"] = 1117;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
auto results = coll1->search("*", {},
|
||||
"", {"year(10thAD:[1000,1500], 15thAD:[1500,2000], 20thAD:[2000, ])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true).get();
|
||||
|
||||
ASSERT_EQ(3, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_EQ("15thAD", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"][1]["count"]);
|
||||
ASSERT_EQ("20thAD", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"][2]["count"]);
|
||||
ASSERT_EQ("10thAD", results["facet_counts"][0]["counts"][2]["value"]);
|
||||
}
|
@ -2648,3 +2648,50 @@ TEST_F(CollectionOptimizedFacetingTest, StringFacetsCountListRemoveTest) {
|
||||
ASSERT_EQ("The Shawshank Redemption", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"]);
|
||||
}
|
||||
|
||||
TEST_F(CollectionOptimizedFacetingTest, RangeFacetAlphanumericLabels) {
|
||||
std::vector<field> fields = {field("monuments", field_types::STRING, false),
|
||||
field("year", field_types::INT32, true),};
|
||||
Collection* coll1 = collectionManager.create_collection(
|
||||
"coll1", 1, fields, "", 0, "",
|
||||
{},{}).get();
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["id"] = "0";
|
||||
doc["monuments"] = "Statue Of Unity";
|
||||
doc["year"] = 2018;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "1";
|
||||
doc["monuments"] = "Taj Mahal";
|
||||
doc["year"] = 1653;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "2";
|
||||
doc["monuments"] = "Mysore Palace";
|
||||
doc["year"] = 1897;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
doc["id"] = "3";
|
||||
doc["monuments"] = "Chennakesava Temple";
|
||||
doc["year"] = 1117;
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
|
||||
auto results = coll1->search("*", {},
|
||||
"", {"year(10thAD:[1000,1500], 15thAD:[1500,2000], 20thAD:[2000, ])"},
|
||||
{}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000,
|
||||
true, false, true, "", true, 6000*1000, 4, 7, fallback, 4, {off}, INT16_MAX, INT16_MAX,
|
||||
2, 2, false, "", true, 0, max_score, 100, 0, 0, VALUE).get();
|
||||
|
||||
ASSERT_EQ(3, results["facet_counts"][0]["counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_EQ("15thAD", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"][1]["count"]);
|
||||
ASSERT_EQ("20thAD", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
ASSERT_EQ(1, results["facet_counts"][0]["counts"][2]["count"]);
|
||||
ASSERT_EQ("10thAD", results["facet_counts"][0]["counts"][2]["value"]);
|
||||
}
|
@ -3728,7 +3728,7 @@ TEST_F(CollectionOverrideTest, WildcardTagRuleThatMatchesAllQueries) {
|
||||
|
||||
// includes instead of filter_by
|
||||
coll1->remove_override("ov-1");
|
||||
override_json1 = R"({
|
||||
auto override_json2 = R"({
|
||||
"id": "ov-1",
|
||||
"rule": {
|
||||
"tags": ["*"]
|
||||
@ -3738,9 +3738,10 @@ TEST_F(CollectionOverrideTest, WildcardTagRuleThatMatchesAllQueries) {
|
||||
]
|
||||
})"_json;
|
||||
|
||||
op = override_t::parse(override_json1, "ov-1", override1);
|
||||
override_t override2;
|
||||
op = override_t::parse(override_json2, "ov-2", override2);
|
||||
ASSERT_TRUE(op.ok());
|
||||
coll1->add_override(override1);
|
||||
coll1->add_override(override2);
|
||||
|
||||
results = coll1->search("foobar", {"name"}, "",
|
||||
{}, sort_fields, {2}, 10, 1, FREQUENCY,
|
||||
@ -3907,3 +3908,98 @@ TEST_F(CollectionOverrideTest, MetadataValidation) {
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionOverrideTest, WildcardSearchOverride) {
|
||||
Collection* coll1;
|
||||
|
||||
std::vector<field> fields = {field("name", field_types::STRING, false),
|
||||
field("category", field_types::STRING, true),};
|
||||
|
||||
coll1 = collectionManager.get_collection("coll1").get();
|
||||
if (coll1 == nullptr) {
|
||||
coll1 = collectionManager.create_collection("coll1", 1, fields, "").get();
|
||||
}
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["name"] = "queryA";
|
||||
doc1["category"] = "kids";
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["name"] = "queryA";
|
||||
doc2["category"] = "kitchen";
|
||||
|
||||
nlohmann::json doc3;
|
||||
doc3["id"] = "2";
|
||||
doc3["name"] = "Clay Toy";
|
||||
doc3["category"] = "home";
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
|
||||
|
||||
std::vector<sort_by> sort_fields = {sort_by("_text_match", "DESC")};
|
||||
|
||||
nlohmann::json override_json1 = R"({
|
||||
"id": "ov-1",
|
||||
"rule": {
|
||||
"query": "*",
|
||||
"match": "exact"
|
||||
},
|
||||
"filter_by": "category: kids"
|
||||
})"_json;
|
||||
|
||||
override_t override1;
|
||||
auto op = override_t::parse(override_json1, "ov-1", override1);
|
||||
ASSERT_TRUE(op.ok());
|
||||
coll1->add_override(override1);
|
||||
|
||||
std::string override_tags = "";
|
||||
auto results = coll1->search("*", {}, "",
|
||||
{}, sort_fields, {2}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 10000,
|
||||
4, 7, fallback, 4, {off}, 100, 100, 2, 2, false, "", true, 0, max_score, 100, 0,
|
||||
0, HASH, 30000, 2, "", {}, {}, "right_to_left",
|
||||
true, true, false, -1, "", override_tags).get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
// includes instead of filter_by
|
||||
coll1->remove_override("ov-1");
|
||||
|
||||
override_t override2;
|
||||
auto override_json2 = R"({
|
||||
"id": "ov-2",
|
||||
"rule": {
|
||||
"query": "*",
|
||||
"match": "exact"
|
||||
},
|
||||
"includes": [
|
||||
{"id": "1", "position": 1}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
op = override_t::parse(override_json2, "ov-2", override2);
|
||||
ASSERT_TRUE(op.ok());
|
||||
coll1->add_override(override2);
|
||||
|
||||
results = coll1->search("*", {}, "",
|
||||
{}, sort_fields, {2}, 10, 1, FREQUENCY,
|
||||
{false}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 10000,
|
||||
4, 7, fallback, 4, {off}, 100, 100, 2, 2, false, "", true, 0, max_score, 100, 0,
|
||||
0, HASH, 30000, 2, "", {}, {}, "right_to_left",
|
||||
true, true, false, -1, "", override_tags).get();
|
||||
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
@ -1988,7 +1988,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) {
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string" },
|
||||
{"name": "brand", "type": "string" },
|
||||
{"name": "brand", "type": "string", "infix": true },
|
||||
{"name": "points", "type": "int32" }
|
||||
]
|
||||
}
|
||||
@ -2082,6 +2082,20 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) {
|
||||
ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get<std::string>());
|
||||
}
|
||||
|
||||
// Score associated with the first match is assigned to the document.
|
||||
sort_fields = {
|
||||
sort_by({"brand:nike", "brand:adidas", "points: 1"}, {3, 2, 5}, "DESC"),
|
||||
sort_by("points", "DESC"),
|
||||
};
|
||||
|
||||
results = coll1->search("*", {"title"}, "", {}, sort_fields, {2}, 10, 1, FREQUENCY, {true}, 10).get();
|
||||
ASSERT_EQ(6, results["hits"].size());
|
||||
|
||||
expected_ids = {"3", "0", "4", "2", "1", "5"};
|
||||
for(size_t i = 0; i < expected_ids.size(); i++) {
|
||||
ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get<std::string>());
|
||||
}
|
||||
|
||||
// bad syntax for eval query
|
||||
sort_fields = {
|
||||
sort_by({"brandnike || points:0"}, {1}, "DESC"),
|
||||
@ -2107,6 +2121,25 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) {
|
||||
ASSERT_FALSE(search_op.ok());
|
||||
ASSERT_EQ("The eval expression in sort_by is empty.", search_op.error());
|
||||
|
||||
req_params = {
|
||||
{"collection", "coll1"},
|
||||
{"q", "a"},
|
||||
{"query_by", "brand"},
|
||||
{"sort_by", "_eval(brand:puma):desc, _text_match:desc"},
|
||||
{"infix", "always"}
|
||||
};
|
||||
|
||||
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
ASSERT_TRUE(search_op.ok());
|
||||
results = nlohmann::json::parse(json_res);
|
||||
|
||||
ASSERT_EQ(4, results["hits"].size()); // 3 Adidas 1 Puma documents
|
||||
// Because of `_eval`, Puma document will be on top even when having a lower text match score than Adidas documents.
|
||||
expected_ids = {"5", "4", "2", "1"};
|
||||
for(size_t i = 0; i < expected_ids.size(); i++) {
|
||||
ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get<std::string>());
|
||||
}
|
||||
|
||||
// more bad syntax!
|
||||
sort_fields = {
|
||||
sort_by(")", "DESC"),
|
||||
|
@ -2179,6 +2179,10 @@ TEST_F(CollectionSpecificMoreTest, PhraseMatchAcrossArrayElements) {
|
||||
|
||||
auto res = coll1->search(R"("state of the art)", {"texts"}, "", {}, {}, {0}, 10, 1,
|
||||
FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>()).get();
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
|
||||
res = coll1->search(R"("state of the art")", {"texts"}, "", {}, {}, {0}, 10, 1,
|
||||
FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>()).get();
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
}
|
||||
|
||||
|
@ -2996,3 +2996,59 @@ TEST_F(CollectionSpecificTest, DontHighlightPunctuation) {
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, ExactMatchWithoutClosingSymbol) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
|
||||
|
||||
std::vector<std::vector<std::string>> records = {
|
||||
{"Hampi"},
|
||||
{"Mahabalipuram"},
|
||||
{"Taj Mahal"},
|
||||
{"Mysore Palace"}
|
||||
};
|
||||
|
||||
for(size_t i=0; i<records.size(); i++) {
|
||||
nlohmann::json doc;
|
||||
|
||||
doc["id"] = std::to_string(i);
|
||||
doc["title"] = records[i][0];
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> req_params = {
|
||||
{"collection", "coll1"},
|
||||
{"q", "\"Hamp"},
|
||||
{"query_by", "title"},
|
||||
};
|
||||
nlohmann::json embedded_params;
|
||||
std::string json_res;
|
||||
auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
|
||||
nlohmann::json result = nlohmann::json::parse(json_res);
|
||||
ASSERT_EQ(1, result["hits"].size());
|
||||
ASSERT_EQ("0", result["hits"][0]["document"]["id"]);
|
||||
ASSERT_EQ("Hampi", result["hits"][0]["document"]["title"]);
|
||||
|
||||
req_params = {
|
||||
{"collection", "coll1"},
|
||||
{"q", "\"Mah"},
|
||||
{"query_by", "title"},
|
||||
};
|
||||
now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
|
||||
result = nlohmann::json::parse(json_res);
|
||||
ASSERT_EQ(2, result["hits"].size());
|
||||
ASSERT_EQ("2", result["hits"][0]["document"]["id"]);
|
||||
ASSERT_EQ("Taj Mahal", result["hits"][0]["document"]["title"]);
|
||||
ASSERT_EQ("1", result["hits"][1]["document"]["id"]);
|
||||
ASSERT_EQ("Mahabalipuram", result["hits"][1]["document"]["title"]);
|
||||
}
|
@ -7,27 +7,33 @@
|
||||
#include "raft_server.h"
|
||||
#include "conversation_model_manager.h"
|
||||
#include "conversation_manager.h"
|
||||
#include <analytics_manager.h>
|
||||
|
||||
class CoreAPIUtilsTest : public ::testing::Test {
|
||||
protected:
|
||||
Store *store;
|
||||
Store *store, *analytics_store;
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
std::atomic<bool> quit = false;
|
||||
|
||||
std::vector<std::string> query_fields;
|
||||
std::vector<sort_by> sort_fields;
|
||||
|
||||
AnalyticsManager& analyticsManager = AnalyticsManager::get_instance();
|
||||
|
||||
void setupCollection() {
|
||||
std::string state_dir_path = "/tmp/typesense_test/core_api_utils";
|
||||
std::string analytics_db_path = "/tmp/typesense_test/analytics_db2";
|
||||
LOG(INFO) << "Truncating and creating: " << state_dir_path;
|
||||
system(("rm -rf "+state_dir_path+" && mkdir -p "+state_dir_path).c_str());
|
||||
|
||||
store = new Store(state_dir_path);
|
||||
analytics_store = new Store(analytics_db_path);
|
||||
collectionManager.init(store, 1.0, "auth_key", quit);
|
||||
collectionManager.load(8, 1000);
|
||||
|
||||
ConversationModelManager::init(store);
|
||||
ConversationManager::get_instance().init(store);
|
||||
analyticsManager.init(store, analytics_store);
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
@ -37,6 +43,7 @@ protected:
|
||||
virtual void TearDown() {
|
||||
collectionManager.dispose();
|
||||
delete store;
|
||||
delete analytics_store;
|
||||
}
|
||||
};
|
||||
|
||||
@ -1495,4 +1502,119 @@ TEST_F(CoreAPIUtilsTest, TestInvalidConversationModels) {
|
||||
|
||||
ASSERT_EQ(400, resp->status_code);
|
||||
ASSERT_EQ("Property `model_name` is not provided or not a string.", nlohmann::json::parse(resp->body)["message"]);
|
||||
}
|
||||
|
||||
TEST_F(CoreAPIUtilsTest, GetClickEvents) {
|
||||
//reset analytics store
|
||||
analyticsManager.resetRateLimit();
|
||||
analyticsManager.resetAnalyticsStore();
|
||||
|
||||
nlohmann::json schema = R"({
|
||||
"name": "titles",
|
||||
"fields": [
|
||||
{"name": "name", "type": "string" },
|
||||
{"name": "points", "type": "int32" }
|
||||
]
|
||||
})"_json;
|
||||
|
||||
auto op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* titles = op.get();
|
||||
|
||||
std::shared_ptr<http_req> req = std::make_shared<http_req>();
|
||||
std::shared_ptr<http_res> res = std::make_shared<http_res>(nullptr);
|
||||
|
||||
// no events in db
|
||||
get_click_events(req, res);
|
||||
ASSERT_EQ("{\"message\": \"Not Found\"}", res->body);
|
||||
|
||||
//add some events
|
||||
nlohmann::json event1 = R"({
|
||||
"type": "query_click",
|
||||
"data": {
|
||||
"q": "technology",
|
||||
"collection": "titles",
|
||||
"doc_id": "21",
|
||||
"position": 2,
|
||||
"user_id": "13"
|
||||
}
|
||||
})"_json;
|
||||
|
||||
req->body = event1.dump();
|
||||
ASSERT_TRUE(post_create_event(req, res));
|
||||
|
||||
nlohmann::json event2 = R"({
|
||||
"type": "query_click",
|
||||
"data": {
|
||||
"q": "technology",
|
||||
"collection": "titles",
|
||||
"doc_id": "12",
|
||||
"position": 1,
|
||||
"user_id": "13"
|
||||
}
|
||||
})"_json;
|
||||
req->body = event2.dump();
|
||||
ASSERT_TRUE(post_create_event(req, res));
|
||||
|
||||
nlohmann::json event3 = R"({
|
||||
"type": "query_click",
|
||||
"data": {
|
||||
"q": "technology",
|
||||
"collection": "titles",
|
||||
"doc_id": "52",
|
||||
"position": 5,
|
||||
"user_id": "13"
|
||||
}
|
||||
})"_json;
|
||||
req->body = event3.dump();
|
||||
ASSERT_TRUE(post_create_event(req, res));
|
||||
|
||||
event1["collection_id"] = "0";
|
||||
event1["timestamp"] = 1521512521;
|
||||
event1["event_type"] = "click_events";
|
||||
event2["collection_id"] = "0";
|
||||
event2["timestamp"] = 1521514354;
|
||||
event2["event_type"] = "click_events";
|
||||
event3["collection_id"] = "0";
|
||||
event3["timestamp"] = 1521515382;
|
||||
event3["event_type"] = "click_events";
|
||||
|
||||
|
||||
nlohmann::json click_events = nlohmann::json::array();
|
||||
click_events.push_back(event1);
|
||||
click_events.push_back(event2);
|
||||
click_events.push_back(event3);
|
||||
|
||||
req->body = click_events.dump();
|
||||
ASSERT_TRUE(post_replicate_events(req, res));
|
||||
|
||||
//get click events
|
||||
req->data = nullptr;
|
||||
get_click_events(req, res);
|
||||
|
||||
std::vector<std::string> res_strs;
|
||||
StringUtils::split(res->body, res_strs, "\n");
|
||||
|
||||
auto result = nlohmann::json::array();
|
||||
result.push_back(nlohmann::json::parse(res_strs[0]));
|
||||
result.push_back(nlohmann::json::parse(res_strs[1]));
|
||||
result.push_back(nlohmann::json::parse(res_strs[2]));
|
||||
|
||||
ASSERT_EQ("0", result[0]["collection_id"]);
|
||||
ASSERT_EQ("13", result[0]["data"]["user_id"]);
|
||||
ASSERT_EQ("21", result[0]["data"]["doc_id"]);
|
||||
ASSERT_EQ(2, result[0]["data"]["position"]);
|
||||
ASSERT_EQ("technology", result[0]["data"]["q"]);
|
||||
|
||||
ASSERT_EQ("0", result[1]["collection_id"]);
|
||||
ASSERT_EQ("13", result[1]["data"]["user_id"]);
|
||||
ASSERT_EQ("12", result[1]["data"]["doc_id"]);
|
||||
ASSERT_EQ(1, result[1]["data"]["position"]);
|
||||
ASSERT_EQ("technology", result[1]["data"]["q"]);
|
||||
|
||||
ASSERT_EQ("0", result[2]["collection_id"]);
|
||||
ASSERT_EQ("13", result[2]["data"]["user_id"]);
|
||||
ASSERT_EQ("52", result[2]["data"]["doc_id"]);
|
||||
ASSERT_EQ(5, result[2]["data"]["position"]);
|
||||
ASSERT_EQ("technology", result[2]["data"]["q"]);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user