Merge branch 'typesense:v0.26-facets' into v0.26-facets

This commit is contained in:
Ozan Armağan 2023-12-26 01:28:42 +03:00 committed by GitHub
commit cc299dbda2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 796 additions and 21 deletions

1
.bazelversion Normal file
View File

@ -0,0 +1 @@
5.2.0

View File

@ -45,6 +45,7 @@ jobs:
workflow_conclusion: ""
if_no_artifact_found: warn
skip_unpack: true
branch: ${{ github.base_ref || github.head_ref || github.ref_name }}
- name: Uncompress bazel cache
run: |

View File

@ -46,6 +46,11 @@ struct click_event_t {
}
};
struct popular_clicks_t {
std::string counter_field;
std::map<std::string, uint64_t> docid_counts;
};
struct query_hits_count_t {
std::string query;
uint64_t timestamp;
@ -137,6 +142,9 @@ private:
// suggestion collection => nohits queries
std::unordered_map<std::string, QueryAnalytics*> nohits_queries;
// collection => popular clicks
std::unordered_map<std::string, popular_clicks_t> popular_clicks;
//query collection => click events
std::unordered_map<std::string, std::vector<click_event_t>> query_collection_click_events;
@ -163,6 +171,7 @@ public:
static constexpr const char* QUERY_HITS_COUNT = "$QH";
static constexpr const char* POPULAR_QUERIES_TYPE = "popular_queries";
static constexpr const char* NOHITS_QUERIES_TYPE = "nohits_queries";
static constexpr const char* POPULAR_CLICKS_TYPE = "popular_clicks";
static AnalyticsManager& get_instance() {
static AnalyticsManager instance;
@ -191,6 +200,8 @@ public:
void dispose();
Store* get_analytics_store();
void persist_query_events(ReplicationState *raft_server, uint64_t prev_persistence_s);
std::unordered_map<std::string, QueryAnalytics*> get_popular_queries();
@ -200,8 +211,12 @@ public:
void persist_query_hits_click_events(ReplicationState *raft_server, uint64_t prev_persistence_s);
void persist_popular_clicks(ReplicationState *raft_server, uint64_t prev_persistence_s);
nlohmann::json get_click_events();
std::unordered_map<std::string, popular_clicks_t> get_popular_clicks();
Option<bool> write_events_to_store(nlohmann::json& event_jsons);
void add_nohits_query(const std::string& query_collection,

View File

@ -399,6 +399,8 @@ public:
std::vector<field> get_fields();
bool contains_field(const std::string&);
std::unordered_map<std::string, field> get_dynamic_fields();
tsl::htrie_map<char, field> get_schema();

View File

@ -84,7 +84,8 @@ public:
Store(const std::string & state_dir_path,
const size_t wal_ttl_secs = 24*60*60,
const size_t wal_size_mb = 1024, bool disable_wal = true): state_dir_path(state_dir_path) {
const size_t wal_size_mb = 1024, bool disable_wal = true,
const size_t db_compaction_interval = 604800): state_dir_path(state_dir_path) {
// Optimize RocksDB
options.IncreaseParallelism();
options.OptimizeLevelStyleCompaction();
@ -94,7 +95,7 @@ public:
options.max_write_buffer_number = 2;
options.merge_operator.reset(new UInt64AddOperator);
options.compression = rocksdb::CompressionType::kSnappyCompression;
options.periodic_compaction_seconds = 604800;
options.periodic_compaction_seconds = db_compaction_interval;
options.max_log_file_size = 4*1048576;
options.keep_log_file_num = 5;

View File

@ -335,4 +335,6 @@ struct StringUtils {
static Option<bool> tokenize_filter_query(const std::string& filter_query, std::queue<std::string>& tokens);
static Option<bool> split_include_fields(const std::string& include_fields, std::vector<std::string>& tokens);
static size_t get_occurence_count(const std::string& str, char symbol);
};

View File

@ -72,6 +72,8 @@ private:
uint32_t housekeeping_interval;
uint32_t db_compaction_interval;
protected:
Config() {
@ -100,6 +102,7 @@ protected:
this->enable_search_analytics = false;
this->analytics_flush_interval = 3600; // in seconds
this->housekeeping_interval = 1800; // in seconds
this->db_compaction_interval = 604800; // in seconds
}
Config(Config const&) {
@ -309,6 +312,10 @@ public:
return this->housekeeping_interval;
}
size_t get_db_compaction_interval() const {
return this->db_compaction_interval;
}
size_t get_thread_pool_size() const {
return this->thread_pool_size;
}
@ -449,6 +456,10 @@ public:
this->housekeeping_interval = std::stoi(get_env("TYPESENSE_HOUSEKEEPING_INTERVAL"));
}
if(!get_env("TYPESENSE_DB_COMPACTION_INTERVAL").empty()) {
this->db_compaction_interval = std::stoi(get_env("TYPESENSE_DB_COMPACTION_INTERVAL"));
}
if(!get_env("TYPESENSE_THREAD_POOL_SIZE").empty()) {
this->thread_pool_size = std::stoi(get_env("TYPESENSE_THREAD_POOL_SIZE"));
}
@ -620,6 +631,10 @@ public:
this->housekeeping_interval = (int) reader.GetInteger("server", "housekeeping-interval", 1800);
}
if(reader.Exists("server", "db-compaction-interval")) {
this->db_compaction_interval = (int) reader.GetInteger("server", "db-compaction-interval", 1800);
}
if(reader.Exists("server", "thread-pool-size")) {
this->thread_pool_size = (int) reader.GetInteger("server", "thread-pool-size", 0);
}
@ -782,6 +797,10 @@ public:
this->housekeeping_interval = options.get<uint32_t>("housekeeping-interval");
}
if(options.exist("db-compaction-interval")) {
this->db_compaction_interval = options.get<uint32_t>("db-compaction-interval");
}
if(options.exist("thread-pool-size")) {
this->thread_pool_size = options.get<uint32_t>("thread-pool-size");
}

View File

@ -43,7 +43,8 @@ Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool upsert,
return Option<bool>(400, "Bad or missing params.");
}
if(payload["type"] == POPULAR_QUERIES_TYPE || payload["type"] == NOHITS_QUERIES_TYPE) {
if(payload["type"] == POPULAR_QUERIES_TYPE || payload["type"] == NOHITS_QUERIES_TYPE
|| payload["type"] == POPULAR_CLICKS_TYPE) {
return create_queries_index(payload, upsert, write_to_disk);
}
@ -84,6 +85,15 @@ Option<bool> AnalyticsManager::create_queries_index(nlohmann::json &payload, boo
return Option<bool>(400, "Must contain a valid destination collection.");
}
std::string counter_field;
if(params["destination"].contains("counter_field")) {
if (!params["destination"]["counter_field"].is_string()) {
return Option<bool>(400, "Must contain a valid counter_field.");
}
counter_field = params["destination"]["counter_field"].get<std::string>();
}
const std::string& suggestion_collection = params["destination"]["collection"].get<std::string>();
suggestion_config_t suggestion_config;
suggestion_config.name = suggestion_config_name;
@ -98,6 +108,19 @@ Option<bool> AnalyticsManager::create_queries_index(nlohmann::json &payload, boo
if (!upsert && nohits_queries.count(suggestion_collection) != 0) {
return Option<bool>(400, "There's already another configuration for this destination collection.");
}
} else if(payload["type"] == POPULAR_CLICKS_TYPE) {
if (!upsert && popular_clicks.count(suggestion_collection) != 0) {
return Option<bool>(400, "There's already another configuration for this destination collection.");
}
auto coll = CollectionManager::get_instance().get_collection(suggestion_collection).get();
if(coll != nullptr) {
if (!coll->contains_field(counter_field)) {
return Option<bool>(404, "counter_field `" + counter_field + "` not found in destination collection.");
}
} else {
return Option<bool>(404, "Collection `" + suggestion_collection + "` not found.");
}
}
for(const auto& coll: params["source"]["collections"]) {
@ -131,6 +154,8 @@ Option<bool> AnalyticsManager::create_queries_index(nlohmann::json &payload, boo
} else if(payload["type"] == NOHITS_QUERIES_TYPE) {
QueryAnalytics *noresultsQueries = new QueryAnalytics(limit);
nohits_queries.emplace(suggestion_collection, noresultsQueries);
} else if(payload["type"] == POPULAR_CLICKS_TYPE) {
popular_clicks.emplace(suggestion_collection, popular_clicks_t{counter_field, {}});
}
if(write_to_disk) {
@ -278,6 +303,13 @@ Option<bool> AnalyticsManager::add_click_event(const std::string &query_collecti
click_event_t click_event(query, now_ts_useconds, user_id, doc_id, position);
click_events_vec.emplace_back(click_event);
auto popular_clicks_it = popular_clicks.find(query_collection);
if(popular_clicks_it != popular_clicks.end()) {
popular_clicks_it->second.docid_counts[doc_id]++;
} else {
LOG(ERROR) << "collection " << query_collection << " not found in analytics rule.";
}
return Option<bool>(true);
}
@ -346,6 +378,7 @@ void AnalyticsManager::run(ReplicationState* raft_server) {
checkEventsExpiry();
persist_query_events(raft_server, prev_persistence_s);
persist_query_hits_click_events(raft_server, prev_persistence_s);
persist_popular_clicks(raft_server, prev_persistence_s);
prev_persistence_s = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
@ -508,6 +541,37 @@ void AnalyticsManager::persist_query_hits_click_events(ReplicationState *raft_se
}
}
void AnalyticsManager::persist_popular_clicks(ReplicationState *raft_server, uint64_t prev_persistence_s) {
auto send_http_response = [&](const std::string& import_payload, const std::string& collection) {
std::string leader_url = raft_server->get_leader_url();
if (!leader_url.empty()) {
const std::string &base_url = leader_url + "collections/" + collection;
std::string res;
const std::string &update_url = base_url + "/documents/import?action=update";
std::map<std::string, std::string> res_headers;
long status_code = HttpClient::post_response(update_url, import_payload,
res, res_headers, {}, 10 * 1000, true);
if (status_code != 200) {
LOG(ERROR) << "Error while sending popular_clicks events to leader. "
<< "Status code: " << status_code << ", response: " << res;
}
}
};
for(const auto& popular_clicks_it : popular_clicks) {
auto coll = popular_clicks_it.first;
nlohmann::json doc;
auto counter_field = popular_clicks_it.second.counter_field;
for(const auto& popular_click : popular_clicks_it.second.docid_counts) {
doc["id"] = popular_click.first;
doc[counter_field] = popular_click.second;
send_http_response(doc.dump(), coll);
}
}
}
void AnalyticsManager::stop() {
quit = true;
cv.notify_all();
@ -534,6 +598,10 @@ void AnalyticsManager::init(Store* store, Store* analytics_store) {
this->analytics_store = analytics_store;
}
Store* AnalyticsManager::get_analytics_store() {
return this->analytics_store;
}
std::unordered_map<std::string, QueryAnalytics*> AnalyticsManager::get_popular_queries() {
std::unique_lock lk(mutex);
return popular_queries;
@ -544,6 +612,11 @@ std::unordered_map<std::string, QueryAnalytics*> AnalyticsManager::get_nohits_qu
return nohits_queries;
}
std::unordered_map<std::string, popular_clicks_t> AnalyticsManager::get_popular_clicks() {
std::unique_lock lk(mutex);
return popular_clicks;
}
nlohmann::json AnalyticsManager::get_click_events() {
std::unique_lock lk(mutex);
std::vector<std::string> click_event_jsons;

View File

@ -2155,6 +2155,10 @@ Option<nlohmann::json> Collection::search(std::string raw_query,
parse_search_query(query, q_include_tokens,
field_query_tokens[0].q_exclude_tokens, field_query_tokens[0].q_phrases, "",
false, stopwords_set);
process_filter_overrides(filter_overrides, q_include_tokens, token_order, filter_tree_root,
included_ids, excluded_ids, override_metadata);
for(size_t i = 0; i < q_include_tokens.size(); i++) {
auto& q_include_token = q_include_tokens[i];
field_query_tokens[0].q_include_tokens.emplace_back(i, q_include_token, (i == q_include_tokens.size() - 1),
@ -3328,7 +3332,7 @@ void Collection::parse_search_query(const std::string &query, std::vector<std::s
if(exclude_operator_prior) {
q_exclude_tokens.push_back(phrase);
} else {
q_phrases.push_back(phrase);
q_include_tokens.insert(q_include_tokens.end(), phrase.begin(), phrase.end());
}
}
@ -4361,6 +4365,11 @@ std::vector<field> Collection::get_fields() {
return fields;
}
bool Collection::contains_field(const std::string &field) {
std::shared_lock lock(mutex);
return search_schema.find(field) != search_schema.end();
}
std::unordered_map<std::string, field> Collection::get_dynamic_fields() {
std::shared_lock lock(mutex);
return dynamic_fields;
@ -5841,7 +5850,7 @@ bool Collection::get_enable_nested_fields() {
Option<bool> Collection::parse_facet(const std::string& facet_field, std::vector<facet>& facets) const {
const std::regex base_pattern(".+\\(.*\\)");
const std::regex range_pattern("[[a-z A-Z]+:\\[([+-]?([0-9]*[.])?[0-9]*)\\,\\s*([+-]?([0-9]*[.])?[0-9]*)\\]");
const std::regex range_pattern("[[0-9]*[a-z A-Z]+[0-9]*:\\[([+-]?([0-9]*[.])?[0-9]*)\\,\\s*([+-]?([0-9]*[.])?[0-9]*)\\]");
const std::string _alpha = "_alpha";
if ((facet_field.find(":") != std::string::npos)

View File

@ -2791,10 +2791,71 @@ bool put_conversation_model(const std::shared_ptr<http_req>& req, const std::sha
res->set_200(model.dump());
return true;
}
bool get_click_events(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
auto click_events = AnalyticsManager::get_instance().get_click_events();
res->set_200(click_events.dump());
bool get_click_events(const std::shared_ptr<http_req>& req, const std::shared_ptr<http_res>& res) {
auto analytics_store = AnalyticsManager::get_instance().get_analytics_store();
if (!analytics_store) {
LOG(ERROR) << "Analytics store not initialized.";
return true;
}
export_state_t *export_state = nullptr;
auto click_event_prefix = std::string(AnalyticsManager::CLICK_EVENT) + "_";
if (req->data == nullptr) {
export_state = new export_state_t();
req->data = export_state;
export_state->iter_upper_bound_key = std::string(AnalyticsManager::CLICK_EVENT) + "`";
export_state->iter_upper_bound = new rocksdb::Slice(export_state->iter_upper_bound_key);
export_state->it = analytics_store->scan(click_event_prefix, export_state->iter_upper_bound);
} else {
export_state = dynamic_cast<export_state_t *>(req->data);
}
if (export_state->it != nullptr) {
rocksdb::Iterator *it = export_state->it;
size_t batch_counter = 0;
std::string().swap(res->body);
if (!it->Valid()) {
LOG(ERROR) << "No click events found in db.";
req->last_chunk_aggregate = true;
res->final = true;
res->set_404();
stream_response(req, res);
return false;
}
while (it->Valid() && it->key().ToString().compare(0, click_event_prefix.size(), click_event_prefix) == 0) {
res->body += it->value().ToString();
it->Next();
// append a new line character if there is going to be one more record to send
if (it->Valid() &&
it->key().ToString().compare(0, click_event_prefix.size(), click_event_prefix) == 0) {
res->body += "\n";
req->last_chunk_aggregate = false;
res->final = false;
} else {
req->last_chunk_aggregate = true;
res->final = true;
}
batch_counter++;
if (batch_counter == export_state->export_batch_size) {
break;
}
}
} else {
req->last_chunk_aggregate = true;
res->final = true;
}
res->content_type_header = "text/plain; charset=utf-8";
res->status_code = 200;
stream_response(req, res);
return true;
}

View File

@ -15,7 +15,13 @@ Option<bool> EmbedderManager::validate_and_init_model(const nlohmann::json& mode
return validate_and_init_remote_model(model_config, num_dims);
} else {
LOG(INFO) << "Validating and initializing local model: " << model_name;
return validate_and_init_local_model(model_config, num_dims);
auto op = validate_and_init_local_model(model_config, num_dims);
if(op.ok()) {
LOG(INFO) << "Finished initializing local model: " << model_name;
} else {
LOG(ERROR) << "Failed to initialize local model " << model_name << ", error: " << op.error();
}
return op;
}
}

View File

@ -786,8 +786,10 @@ void Index::index_field_in_memory(const field& afield, std::vector<index_record>
}
else if(afield.type == field_types::FLOAT) {
float raw_val = document[afield.name].get<float>();
auto fhash = reinterpret_cast<uint32_t&>(raw_val);
facet_value_id_t facet_value_id(StringUtils::float_to_str(raw_val), fhash);
const std::string& float_str_val = StringUtils::float_to_str(raw_val);
float normalized_raw_val = std::stof(float_str_val);
auto fhash = reinterpret_cast<uint32_t&>(normalized_raw_val);
facet_value_id_t facet_value_id(float_str_val, fhash);
fvalue_to_seq_ids[facet_value_id].push_back(seq_id);
seq_id_to_fvalues[seq_id].push_back(facet_value_id);
}
@ -1387,7 +1389,7 @@ void Index::do_facets(std::vector<facet> & facets, facet_query_t & facet_query,
if(a_facet.get_range(std::stoll(doc_val), range_pair)) {
const auto& range_id = range_pair.first;
facet_count_t& facet_count = a_facet.result_map[range_id];
facet_count.count = kv.second.count;
facet_count.count += kv.second.count;
}
} else {
facet_count_t& facet_count = a_facet.value_result_map[kv.first];

View File

@ -82,7 +82,7 @@ void master_server_routes() {
server->post("/analytics/events", post_create_event);
//collection based query click events
server->get("/analytics/click_events", get_click_events);
server->get("/analytics/click_events", get_click_events, false, true);
server->post("/analytics/click_events", post_create_event);
server->post("/analytics/click_events/replicate", post_replicate_events);
server->get("/analytics/query_hits_counts", get_query_hits_counts);

View File

@ -594,3 +594,7 @@ size_t StringUtils::split_facet(const std::string &s, std::vector<std::string> &
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> utf8conv;
return utf8conv.from_bytes(bytes).size();
}*/
size_t StringUtils::get_occurence_count(const std::string &str, char symbol) {
return std::count(str.begin(), str.end(), symbol);
}

View File

@ -109,6 +109,7 @@ void init_cmdline_options(cmdline::parser & options, int argc, char **argv) {
options.add<int>("cache-num-entries", '\0', "Number of entries to cache.", false, 1000);
options.add<uint32_t>("analytics-flush-interval", '\0', "Frequency of persisting analytics data to disk (in seconds).", false, 3600);
options.add<uint32_t>("housekeeping-interval", '\0', "Frequency of housekeeping background job (in seconds).", false, 1800);
options.add<uint32_t>("db-compaction-interval", '\0', "Frequency of RocksDB compaction (in seconds).", false, 604800);
// DEPRECATED
options.add<std::string>("listen-address", 'h', "[DEPRECATED: use `api-address`] Address to which Typesense API service binds.", false, "0.0.0.0");
@ -394,7 +395,7 @@ int run_server(const Config & config, const std::string & version, void (*master
ThreadPool replication_thread_pool(num_threads);
// primary DB used for storing the documents: we will not use WAL since Raft provides that
Store store(db_dir);
Store store(db_dir, 24*60*60, 1024, true, config.get_db_compaction_interval());
// meta DB for storing house keeping things
Store meta_store(meta_dir, 24*60*60, 1024, false);
@ -402,7 +403,7 @@ int run_server(const Config & config, const std::string & version, void (*master
//analytics DB for storing query click events
std::unique_ptr<Store> analytics_store = nullptr;
if(!analytics_dir.empty()) {
analytics_store.reset(new Store(analytics_dir, 24 * 60 * 60, 1024, false));
analytics_store.reset(new Store(analytics_dir, 24 * 60 * 60, 1024, true, config.get_db_compaction_interval()));
}
curl_global_init(CURL_GLOBAL_SSL);

View File

@ -767,4 +767,178 @@ TEST_F(AnalyticsManagerTest, EventsExpiryPartial) {
ASSERT_EQ("management", resp[0]["q"]);
ASSERT_EQ(13, resp[0]["user_id"]);
ASSERT_EQ(834, resp[0]["hits_count"]);
}
TEST_F(AnalyticsManagerTest, PopularityScore) {
//reset click event rate limit
analyticsManager.resetRateLimit();
nlohmann::json products_schema = R"({
"name": "products",
"fields": [
{"name": "title", "type": "string"},
{"name": "popularity", "type": "int32"}
]
})"_json;
Collection* products_coll = collectionManager.create_collection(products_schema).get();
nlohmann::json doc;
doc["popularity"] = 0;
doc["id"] = "0";
doc["title"] = "Cool trousers";
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
doc["id"] = "1";
doc["title"] = "Funky trousers";
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
doc["id"] = "2";
doc["title"] = "Casual shorts";
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
doc["id"] = "3";
doc["title"] = "Trendy shorts";
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
doc["id"] = "4";
doc["title"] = "Formal pants";
ASSERT_TRUE(products_coll->add(doc.dump()).ok());
nlohmann::json analytics_rule = R"({
"name": "product_popularity",
"type": "popular_clicks",
"params": {
"source": {
"collections": ["products"]
},
"destination": {
"collection": "products",
"counter_field": "popularity"
}
}
})"_json;
auto create_op = analyticsManager.create_rule(analytics_rule, false, true);
ASSERT_TRUE(create_op.ok());
std::shared_ptr<http_req> req = std::make_shared<http_req>();
std::shared_ptr<http_res> res = std::make_shared<http_res>(nullptr);
nlohmann::json event1 = R"({
"type": "query_click",
"data": {
"q": "trousers",
"collection": "products",
"doc_id": "1",
"position": 2,
"user_id": "13"
}
})"_json;
req->body = event1.dump();
ASSERT_TRUE(post_create_event(req, res));
nlohmann::json event2 = R"({
"type": "query_click",
"data": {
"q": "shorts",
"collection": "products",
"doc_id": "3",
"position": 4,
"user_id": "11"
}
})"_json;
req->body = event2.dump();
ASSERT_TRUE(post_create_event(req, res));
ASSERT_TRUE(post_create_event(req, res));
auto popular_clicks = analyticsManager.get_popular_clicks();
ASSERT_EQ(1, popular_clicks.size());
ASSERT_EQ("popularity", popular_clicks["products"].counter_field);
ASSERT_EQ(2, popular_clicks["products"].docid_counts.size());
ASSERT_EQ(1, popular_clicks["products"].docid_counts["1"]);
ASSERT_EQ(2, popular_clicks["products"].docid_counts["3"]);
//trigger persistance event
for(const auto& popular_clicks_it : popular_clicks) {
auto coll = popular_clicks_it.first;
nlohmann::json doc;
auto counter_field = popular_clicks_it.second.counter_field;
req->params["collection"] = "products";
req->params["action"] = "update";
for(const auto& popular_click : popular_clicks_it.second.docid_counts) {
doc["id"] = popular_click.first;
doc[counter_field] = popular_click.second;
req->body = doc.dump();
post_import_documents(req, res);
}
}
sort_fields = {sort_by("popularity", "DESC")};
auto results = products_coll->search("*", {}, "", {},
sort_fields, {0}, 10, 1, FREQUENCY,{false},
Index::DROP_TOKENS_THRESHOLD,spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>()).get();
ASSERT_EQ(5, results["hits"].size());
ASSERT_EQ("3", results["hits"][0]["document"]["id"]);
ASSERT_EQ(2, results["hits"][0]["document"]["popularity"]);
ASSERT_EQ("Trendy shorts", results["hits"][0]["document"]["title"]);
ASSERT_EQ("1", results["hits"][1]["document"]["id"]);
ASSERT_EQ(1, results["hits"][1]["document"]["popularity"]);
ASSERT_EQ("Funky trousers", results["hits"][1]["document"]["title"]);
}
TEST_F(AnalyticsManagerTest, PopularityScoreValidation) {
nlohmann::json products_schema = R"({
"name": "books",
"fields": [
{"name": "title", "type": "string"},
{"name": "popularity", "type": "int32"}
]
})"_json;
Collection* products_coll = collectionManager.create_collection(products_schema).get();
nlohmann::json analytics_rule = R"({
"name": "books_popularity",
"type": "popular_clicks",
"params": {
"source": {
"collections": ["books"]
},
"destination": {
"collection": "popular_books",
"counter_field": "popularity"
}
}
})"_json;
auto create_op = analyticsManager.create_rule(analytics_rule, false, true);
ASSERT_FALSE(create_op.ok());
ASSERT_EQ("Collection `popular_books` not found.", create_op.error());
analytics_rule = R"({
"name": "books_popularity",
"type": "popular_clicks",
"params": {
"source": {
"collections": ["books"]
},
"destination": {
"collection": "books",
"counter_field": "popularity_score"
}
}
})"_json;
create_op = analyticsManager.create_rule(analytics_rule, false, true);
ASSERT_FALSE(create_op.ok());
ASSERT_EQ("counter_field `popularity_score` not found in destination collection.", create_op.error());
}

View File

@ -1591,7 +1591,7 @@ TEST_F(CollectionAllFieldsTest, FieldNameMatchingRegexpShouldNotBeIndexedInNonAu
}
TEST_F(CollectionAllFieldsTest, EmbedFromFieldJSONInvalidField) {
EmbedderManager::set_model_dir("/tmp/typensense_test/models");
EmbedderManager::set_model_dir("/tmp/typesense_test/models");
nlohmann::json field_json;
field_json["name"] = "embedding";
field_json["type"] = "float[]";

View File

@ -2980,3 +2980,49 @@ TEST_F(CollectionFacetingTest, RangeFacetTestWithGroupBy) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionFacetingTest, RangeFacetAlphanumericLabels) {
std::vector<field> fields = {field("monuments", field_types::STRING, false),
field("year", field_types::INT32, true),};
Collection* coll1 = collectionManager.create_collection(
"coll1", 1, fields, "", 0, "",
{},{}).get();
nlohmann::json doc;
doc["id"] = "0";
doc["monuments"] = "Statue Of Unity";
doc["year"] = 2018;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["id"] = "1";
doc["monuments"] = "Taj Mahal";
doc["year"] = 1653;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["id"] = "2";
doc["monuments"] = "Mysore Palace";
doc["year"] = 1897;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["id"] = "3";
doc["monuments"] = "Chennakesava Temple";
doc["year"] = 1117;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
auto results = coll1->search("*", {},
"", {"year(10thAD:[1000,1500], 15thAD:[1500,2000], 20thAD:[2000, ])"},
{}, {2}, 10,
1, FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true).get();
ASSERT_EQ(3, results["facet_counts"][0]["counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"]);
ASSERT_EQ("15thAD", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ(1, results["facet_counts"][0]["counts"][1]["count"]);
ASSERT_EQ("20thAD", results["facet_counts"][0]["counts"][1]["value"]);
ASSERT_EQ(1, results["facet_counts"][0]["counts"][2]["count"]);
ASSERT_EQ("10thAD", results["facet_counts"][0]["counts"][2]["value"]);
}

View File

@ -2648,3 +2648,50 @@ TEST_F(CollectionOptimizedFacetingTest, StringFacetsCountListRemoveTest) {
ASSERT_EQ("The Shawshank Redemption", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ(1, results["facet_counts"][0]["counts"][0]["count"]);
}
TEST_F(CollectionOptimizedFacetingTest, RangeFacetAlphanumericLabels) {
std::vector<field> fields = {field("monuments", field_types::STRING, false),
field("year", field_types::INT32, true),};
Collection* coll1 = collectionManager.create_collection(
"coll1", 1, fields, "", 0, "",
{},{}).get();
nlohmann::json doc;
doc["id"] = "0";
doc["monuments"] = "Statue Of Unity";
doc["year"] = 2018;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["id"] = "1";
doc["monuments"] = "Taj Mahal";
doc["year"] = 1653;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["id"] = "2";
doc["monuments"] = "Mysore Palace";
doc["year"] = 1897;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
doc["id"] = "3";
doc["monuments"] = "Chennakesava Temple";
doc["year"] = 1117;
ASSERT_TRUE(coll1->add(doc.dump()).ok());
auto results = coll1->search("*", {},
"", {"year(10thAD:[1000,1500], 15thAD:[1500,2000], 20thAD:[2000, ])"},
{}, {2}, 10,
1, FREQUENCY, {true},
10, spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 10, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000,
true, false, true, "", true, 6000*1000, 4, 7, fallback, 4, {off}, INT16_MAX, INT16_MAX,
2, 2, false, "", true, 0, max_score, 100, 0, 0, VALUE).get();
ASSERT_EQ(3, results["facet_counts"][0]["counts"].size());
ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"]);
ASSERT_EQ("15thAD", results["facet_counts"][0]["counts"][0]["value"]);
ASSERT_EQ(1, results["facet_counts"][0]["counts"][1]["count"]);
ASSERT_EQ("20thAD", results["facet_counts"][0]["counts"][1]["value"]);
ASSERT_EQ(1, results["facet_counts"][0]["counts"][2]["count"]);
ASSERT_EQ("10thAD", results["facet_counts"][0]["counts"][2]["value"]);
}

View File

@ -3728,7 +3728,7 @@ TEST_F(CollectionOverrideTest, WildcardTagRuleThatMatchesAllQueries) {
// includes instead of filter_by
coll1->remove_override("ov-1");
override_json1 = R"({
auto override_json2 = R"({
"id": "ov-1",
"rule": {
"tags": ["*"]
@ -3738,9 +3738,10 @@ TEST_F(CollectionOverrideTest, WildcardTagRuleThatMatchesAllQueries) {
]
})"_json;
op = override_t::parse(override_json1, "ov-1", override1);
override_t override2;
op = override_t::parse(override_json2, "ov-2", override2);
ASSERT_TRUE(op.ok());
coll1->add_override(override1);
coll1->add_override(override2);
results = coll1->search("foobar", {"name"}, "",
{}, sort_fields, {2}, 10, 1, FREQUENCY,
@ -3907,3 +3908,98 @@ TEST_F(CollectionOverrideTest, MetadataValidation) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionOverrideTest, WildcardSearchOverride) {
Collection* coll1;
std::vector<field> fields = {field("name", field_types::STRING, false),
field("category", field_types::STRING, true),};
coll1 = collectionManager.get_collection("coll1").get();
if (coll1 == nullptr) {
coll1 = collectionManager.create_collection("coll1", 1, fields, "").get();
}
nlohmann::json doc1;
doc1["id"] = "0";
doc1["name"] = "queryA";
doc1["category"] = "kids";
nlohmann::json doc2;
doc2["id"] = "1";
doc2["name"] = "queryA";
doc2["category"] = "kitchen";
nlohmann::json doc3;
doc3["id"] = "2";
doc3["name"] = "Clay Toy";
doc3["category"] = "home";
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
ASSERT_TRUE(coll1->add(doc3.dump()).ok());
std::vector<sort_by> sort_fields = {sort_by("_text_match", "DESC")};
nlohmann::json override_json1 = R"({
"id": "ov-1",
"rule": {
"query": "*",
"match": "exact"
},
"filter_by": "category: kids"
})"_json;
override_t override1;
auto op = override_t::parse(override_json1, "ov-1", override1);
ASSERT_TRUE(op.ok());
coll1->add_override(override1);
std::string override_tags = "";
auto results = coll1->search("*", {}, "",
{}, sort_fields, {2}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 10000,
4, 7, fallback, 4, {off}, 100, 100, 2, 2, false, "", true, 0, max_score, 100, 0,
0, HASH, 30000, 2, "", {}, {}, "right_to_left",
true, true, false, -1, "", override_tags).get();
ASSERT_EQ(1, results["hits"].size());
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
// includes instead of filter_by
coll1->remove_override("ov-1");
override_t override2;
auto override_json2 = R"({
"id": "ov-2",
"rule": {
"query": "*",
"match": "exact"
},
"includes": [
{"id": "1", "position": 1}
]
})"_json;
op = override_t::parse(override_json2, "ov-2", override2);
ASSERT_TRUE(op.ok());
coll1->add_override(override2);
results = coll1->search("*", {}, "",
{}, sort_fields, {2}, 10, 1, FREQUENCY,
{false}, Index::DROP_TOKENS_THRESHOLD,
spp::sparse_hash_set<std::string>(),
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "title", 20, {}, {}, {}, 0,
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 10000,
4, 7, fallback, 4, {off}, 100, 100, 2, 2, false, "", true, 0, max_score, 100, 0,
0, HASH, 30000, 2, "", {}, {}, "right_to_left",
true, true, false, -1, "", override_tags).get();
ASSERT_EQ(3, results["hits"].size());
ASSERT_EQ("1", results["hits"][0]["document"]["id"].get<std::string>());
collectionManager.drop_collection("coll1");
}

View File

@ -1988,7 +1988,7 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) {
"name": "coll1",
"fields": [
{"name": "title", "type": "string" },
{"name": "brand", "type": "string" },
{"name": "brand", "type": "string", "infix": true },
{"name": "points", "type": "int32" }
]
}
@ -2082,6 +2082,20 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) {
ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get<std::string>());
}
// Score associated with the first match is assigned to the document.
sort_fields = {
sort_by({"brand:nike", "brand:adidas", "points: 1"}, {3, 2, 5}, "DESC"),
sort_by("points", "DESC"),
};
results = coll1->search("*", {"title"}, "", {}, sort_fields, {2}, 10, 1, FREQUENCY, {true}, 10).get();
ASSERT_EQ(6, results["hits"].size());
expected_ids = {"3", "0", "4", "2", "1", "5"};
for(size_t i = 0; i < expected_ids.size(); i++) {
ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get<std::string>());
}
// bad syntax for eval query
sort_fields = {
sort_by({"brandnike || points:0"}, {1}, "DESC"),
@ -2107,6 +2121,25 @@ TEST_F(CollectionSortingTest, OptionalFilteringViaSortingWildcard) {
ASSERT_FALSE(search_op.ok());
ASSERT_EQ("The eval expression in sort_by is empty.", search_op.error());
req_params = {
{"collection", "coll1"},
{"q", "a"},
{"query_by", "brand"},
{"sort_by", "_eval(brand:puma):desc, _text_match:desc"},
{"infix", "always"}
};
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
ASSERT_TRUE(search_op.ok());
results = nlohmann::json::parse(json_res);
ASSERT_EQ(4, results["hits"].size()); // 3 Adidas 1 Puma documents
// Because of `_eval`, Puma document will be on top even when having a lower text match score than Adidas documents.
expected_ids = {"5", "4", "2", "1"};
for(size_t i = 0; i < expected_ids.size(); i++) {
ASSERT_EQ(expected_ids[i], results["hits"][i]["document"]["id"].get<std::string>());
}
// more bad syntax!
sort_fields = {
sort_by(")", "DESC"),

View File

@ -2179,6 +2179,10 @@ TEST_F(CollectionSpecificMoreTest, PhraseMatchAcrossArrayElements) {
auto res = coll1->search(R"("state of the art)", {"texts"}, "", {}, {}, {0}, 10, 1,
FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>()).get();
ASSERT_EQ(1, res["hits"].size());
res = coll1->search(R"("state of the art")", {"texts"}, "", {}, {}, {0}, 10, 1,
FREQUENCY, {true}, 10, spp::sparse_hash_set<std::string>()).get();
ASSERT_EQ(0, res["hits"].size());
}

View File

@ -2996,3 +2996,59 @@ TEST_F(CollectionSpecificTest, DontHighlightPunctuation) {
collectionManager.drop_collection("coll1");
}
TEST_F(CollectionSpecificTest, ExactMatchWithoutClosingSymbol) {
std::vector<field> fields = {field("title", field_types::STRING, false),};
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields).get();
std::vector<std::vector<std::string>> records = {
{"Hampi"},
{"Mahabalipuram"},
{"Taj Mahal"},
{"Mysore Palace"}
};
for(size_t i=0; i<records.size(); i++) {
nlohmann::json doc;
doc["id"] = std::to_string(i);
doc["title"] = records[i][0];
ASSERT_TRUE(coll1->add(doc.dump()).ok());
}
std::map<std::string, std::string> req_params = {
{"collection", "coll1"},
{"q", "\"Hamp"},
{"query_by", "title"},
};
nlohmann::json embedded_params;
std::string json_res;
auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
nlohmann::json result = nlohmann::json::parse(json_res);
ASSERT_EQ(1, result["hits"].size());
ASSERT_EQ("0", result["hits"][0]["document"]["id"]);
ASSERT_EQ("Hampi", result["hits"][0]["document"]["title"]);
req_params = {
{"collection", "coll1"},
{"q", "\"Mah"},
{"query_by", "title"},
};
now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
result = nlohmann::json::parse(json_res);
ASSERT_EQ(2, result["hits"].size());
ASSERT_EQ("2", result["hits"][0]["document"]["id"]);
ASSERT_EQ("Taj Mahal", result["hits"][0]["document"]["title"]);
ASSERT_EQ("1", result["hits"][1]["document"]["id"]);
ASSERT_EQ("Mahabalipuram", result["hits"][1]["document"]["title"]);
}

View File

@ -7,27 +7,33 @@
#include "raft_server.h"
#include "conversation_model_manager.h"
#include "conversation_manager.h"
#include <analytics_manager.h>
class CoreAPIUtilsTest : public ::testing::Test {
protected:
Store *store;
Store *store, *analytics_store;
CollectionManager & collectionManager = CollectionManager::get_instance();
std::atomic<bool> quit = false;
std::vector<std::string> query_fields;
std::vector<sort_by> sort_fields;
AnalyticsManager& analyticsManager = AnalyticsManager::get_instance();
void setupCollection() {
std::string state_dir_path = "/tmp/typesense_test/core_api_utils";
std::string analytics_db_path = "/tmp/typesense_test/analytics_db2";
LOG(INFO) << "Truncating and creating: " << state_dir_path;
system(("rm -rf "+state_dir_path+" && mkdir -p "+state_dir_path).c_str());
store = new Store(state_dir_path);
analytics_store = new Store(analytics_db_path);
collectionManager.init(store, 1.0, "auth_key", quit);
collectionManager.load(8, 1000);
ConversationModelManager::init(store);
ConversationManager::get_instance().init(store);
analyticsManager.init(store, analytics_store);
}
virtual void SetUp() {
@ -37,6 +43,7 @@ protected:
virtual void TearDown() {
collectionManager.dispose();
delete store;
delete analytics_store;
}
};
@ -1495,4 +1502,119 @@ TEST_F(CoreAPIUtilsTest, TestInvalidConversationModels) {
ASSERT_EQ(400, resp->status_code);
ASSERT_EQ("Property `model_name` is not provided or not a string.", nlohmann::json::parse(resp->body)["message"]);
}
TEST_F(CoreAPIUtilsTest, GetClickEvents) {
//reset analytics store
analyticsManager.resetRateLimit();
analyticsManager.resetAnalyticsStore();
nlohmann::json schema = R"({
"name": "titles",
"fields": [
{"name": "name", "type": "string" },
{"name": "points", "type": "int32" }
]
})"_json;
auto op = collectionManager.create_collection(schema);
ASSERT_TRUE(op.ok());
Collection* titles = op.get();
std::shared_ptr<http_req> req = std::make_shared<http_req>();
std::shared_ptr<http_res> res = std::make_shared<http_res>(nullptr);
// no events in db
get_click_events(req, res);
ASSERT_EQ("{\"message\": \"Not Found\"}", res->body);
//add some events
nlohmann::json event1 = R"({
"type": "query_click",
"data": {
"q": "technology",
"collection": "titles",
"doc_id": "21",
"position": 2,
"user_id": "13"
}
})"_json;
req->body = event1.dump();
ASSERT_TRUE(post_create_event(req, res));
nlohmann::json event2 = R"({
"type": "query_click",
"data": {
"q": "technology",
"collection": "titles",
"doc_id": "12",
"position": 1,
"user_id": "13"
}
})"_json;
req->body = event2.dump();
ASSERT_TRUE(post_create_event(req, res));
nlohmann::json event3 = R"({
"type": "query_click",
"data": {
"q": "technology",
"collection": "titles",
"doc_id": "52",
"position": 5,
"user_id": "13"
}
})"_json;
req->body = event3.dump();
ASSERT_TRUE(post_create_event(req, res));
event1["collection_id"] = "0";
event1["timestamp"] = 1521512521;
event1["event_type"] = "click_events";
event2["collection_id"] = "0";
event2["timestamp"] = 1521514354;
event2["event_type"] = "click_events";
event3["collection_id"] = "0";
event3["timestamp"] = 1521515382;
event3["event_type"] = "click_events";
nlohmann::json click_events = nlohmann::json::array();
click_events.push_back(event1);
click_events.push_back(event2);
click_events.push_back(event3);
req->body = click_events.dump();
ASSERT_TRUE(post_replicate_events(req, res));
//get click events
req->data = nullptr;
get_click_events(req, res);
std::vector<std::string> res_strs;
StringUtils::split(res->body, res_strs, "\n");
auto result = nlohmann::json::array();
result.push_back(nlohmann::json::parse(res_strs[0]));
result.push_back(nlohmann::json::parse(res_strs[1]));
result.push_back(nlohmann::json::parse(res_strs[2]));
ASSERT_EQ("0", result[0]["collection_id"]);
ASSERT_EQ("13", result[0]["data"]["user_id"]);
ASSERT_EQ("21", result[0]["data"]["doc_id"]);
ASSERT_EQ(2, result[0]["data"]["position"]);
ASSERT_EQ("technology", result[0]["data"]["q"]);
ASSERT_EQ("0", result[1]["collection_id"]);
ASSERT_EQ("13", result[1]["data"]["user_id"]);
ASSERT_EQ("12", result[1]["data"]["doc_id"]);
ASSERT_EQ(1, result[1]["data"]["position"]);
ASSERT_EQ("technology", result[1]["data"]["q"]);
ASSERT_EQ("0", result[2]["collection_id"]);
ASSERT_EQ("13", result[2]["data"]["user_id"]);
ASSERT_EQ("52", result[2]["data"]["doc_id"]);
ASSERT_EQ(5, result[2]["data"]["position"]);
ASSERT_EQ("technology", result[2]["data"]["q"]);
}