add rate limit for clickevents, make tests more verbose

This commit is contained in:
krunal 2023-11-09 13:39:55 +05:30
parent 1c4c7ac320
commit 34290ab1d9
6 changed files with 92 additions and 29 deletions

View File

@ -46,6 +46,19 @@ struct ClickEvent {
}
};
struct event_cache_t {
uint64_t creation_time;
uint64_t count;
bool operator == (const event_cache_t& res) const {
return creation_time == res.creation_time;
}
bool operator != (const event_cache_t& res) const {
return creation_time != res.creation_time;
}
};
class AnalyticsManager {
private:
mutable std::mutex mutex;
@ -133,8 +146,8 @@ public:
std::unordered_map<std::string, PopularQueries*> get_popular_queries();
void add_click_event(const std::string& query_collection, const std::string& query, const std::string& user_id,
std::string doc_id, uint64_t position);
Option<bool> add_click_event(const std::string& query_collection, const std::string& query, const std::string& user_id,
std::string doc_id, uint64_t position, const std::string& client_ip);
void persist_click_event(ReplicationState *raft_server, uint64_t prev_persistence_s);

View File

@ -21,6 +21,6 @@ public:
EventManager(EventManager const&) = delete;
void operator=(EventManager const&) = delete;
bool add_event(const nlohmann::json& event);
Option<bool> add_event(const nlohmann::json& event, const std::string& ip);
};

View File

@ -4,6 +4,11 @@
#include "tokenizer.h"
#include "http_client.h"
#include "collection_manager.h"
#include "lru/lru.hpp"
LRU::Cache<std::string, event_cache_t> events_cache;
#define CLICK_EVENTS_RATE_LIMIT_SEC 60
#define CLICK_EVENTS_RATE_LIMIT_COUNT 100
Option<bool> AnalyticsManager::create_rule(nlohmann::json& payload, bool upsert, bool write_to_disk) {
/*
@ -218,16 +223,38 @@ void AnalyticsManager::add_suggestion(const std::string &query_collection, const
}
}
void AnalyticsManager::add_click_event(const std::string &query_collection, const std::string &query, const std::string &user_id,
std::string doc_id, uint64_t position) {
Option<bool> AnalyticsManager::add_click_event(const std::string &query_collection, const std::string &query, const std::string &user_id,
std::string doc_id, uint64_t position, const std::string& client_ip) {
std::unique_lock lock(mutex);
auto &click_events_vec = query_collection_click_events[query_collection];
auto now_ts_seconds = std::chrono::duration_cast<std::chrono::microseconds>(
auto now_ts_seconds = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
auto events_cache_it = events_cache.find(client_ip);
if(events_cache_it != events_cache.end()) {
//event found in events cache
if ((now_ts_seconds - events_cache_it->second.creation_time) < CLICK_EVENTS_RATE_LIMIT_SEC) {
if (events_cache_it->second.count >= CLICK_EVENTS_RATE_LIMIT_COUNT) {
return Option<bool>(500, "click event rate limit reached.");
} else {
events_cache_it->second.count++;
}
} else {
events_cache_it->second.count = 1;
}
} else {
event_cache_t eventCache{(uint64_t) now_ts_seconds, 1};
events_cache.insert(client_ip, eventCache);
}
auto now_ts_useconds = std::chrono::duration_cast<std::chrono::microseconds>(
std::chrono::system_clock::now().time_since_epoch()).count();
ClickEvent click_event(query, now_ts_seconds, user_id, doc_id, position);
ClickEvent click_event(query, now_ts_useconds, user_id, doc_id, position);
click_events_vec.emplace_back(click_event);
return Option<bool>(true);
}
void AnalyticsManager::run(ReplicationState* raft_server) {

View File

@ -2226,13 +2226,13 @@ bool post_create_event(const std::shared_ptr<http_req>& req, const std::shared_p
return false;
}
bool success = EventManager::get_instance().add_event(req_json);
if(success) {
auto add_event_op = EventManager::get_instance().add_event(req_json, req->client_ip);
if(add_event_op.ok()) {
res->set_201(R"({"ok": true)");
return true;
}
res->set_400(R"({"ok": false)");
res->set_400(add_event_op.error());
return false;
}

View File

@ -1,7 +1,7 @@
#include <analytics_manager.h>
#include "event_manager.h"
bool EventManager::add_event(const nlohmann::json& event) {
Option<bool> EventManager::add_event(const nlohmann::json& event, const std::string& client_ip) {
/*
Sample event payload:
@ -15,7 +15,7 @@ bool EventManager::add_event(const nlohmann::json& event) {
*/
if(!event.contains("type")) {
return false;
return Option<bool>(404, "key `type` not found.");
}
const auto& event_type_val = event[EVENT_TYPE];
@ -24,48 +24,63 @@ bool EventManager::add_event(const nlohmann::json& event) {
const std::string& event_type = event_type_val.get<std::string>();
if(event_type == "search") {
if(!event.contains("data")) {
return false;
return Option<bool>(404, "key `data` not found.");
}
const auto& event_data_val = event[EVENT_DATA];
if(!event_data_val.is_object()) {
return false;
return Option<bool>(500, "event_data_val is not object.");
}
const auto& event_data_query_it = event_data_val["q"];
if(!event_data_query_it.is_string() || !event_data_val["collections"].is_array()) {
return false;
if(!event_data_query_it.is_string()) {
return Option<bool>(500, "`q` value should be string.");
}
if(!event_data_val["collections"].is_array() || !event_data_val["collections"][0].is_string()) {
return Option<bool>(500, "`collections` value should be string array.");
}
for(const auto& coll: event_data_val["collections"]) {
if(!coll.is_string()) {
return false;
}
std::string query = event_data_query_it.get<std::string>();
AnalyticsManager::get_instance().add_suggestion(coll.get<std::string>(), query, false, "");
}
} else if(event_type == "query_click") {
if (!event.contains("data")) {
return false;
return Option<bool>(404, "key `data` not found.");
}
const auto &event_data_val = event[EVENT_DATA];
if (!event_data_val.is_object()) {
return false;
return Option<bool>(500, "event_data_val is not object.");
}
if (!event_data_val.contains("q") || !event_data_val.contains("doc_id") || !event_data_val.contains("user_id")
|| !event_data_val.contains("position") || !event_data_val.contains("collection")) {
return false;
return Option<bool>(500, "event json data fields should contain `q`, `doc_id`, `position`, `user_id`, and `collection`.");
}
if (!event_data_val["q"].is_string() || !event_data_val["doc_id"].is_string() || !event_data_val["user_id"].is_string()
|| !event_data_val["position"].is_number_unsigned() || !event_data_val["collection"].is_string()) {
return false;
if (!event_data_val["q"].is_string()) {
return Option<bool>(500, "`q` value should be string.");
}
if(!event_data_val["doc_id"].is_string()) {
return Option<bool>(500, "`doc_id` value should be string.");
}
if(!event_data_val["user_id"].is_string()) {
return Option<bool>(500, "`user_id` value should be string.");
}
if(!event_data_val["position"].is_number_unsigned()){
return Option<bool>(500, "`position` value should be unsigned int.");
}
if(!event_data_val["collection"].is_string()) {
return Option<bool>(500, "`collection` value should be string.");
}
const std::string query = event_data_val["q"].get<std::string>();
@ -74,11 +89,16 @@ bool EventManager::add_event(const nlohmann::json& event) {
uint64_t position = event_data_val["position"].get<uint64_t>();
const std::string& collection = event_data_val["collection"].get<std::string>();
AnalyticsManager::get_instance().add_click_event(collection, query, user_id, doc_id, position);
auto op = AnalyticsManager::get_instance().add_click_event(collection, query, user_id, doc_id, position, client_ip);
if(!op.ok()) {
return Option<bool>(op.code(), op.error());
}
} else {
return false;
return Option<bool>(404, "event_type " + event_type + " not found.");
}
} else {
return Option<bool>(500, "`event_type` value should be string.");
}
return true;
return Option(true);
}

View File

@ -232,6 +232,7 @@ TEST_F(AnalyticsManagerTest, ClickEventsValidation) {
req->body = event1.dump();
ASSERT_FALSE(post_create_event(req, res));
ASSERT_EQ("{\"message\": \"event_type click not found.\"}", res->body);
//missing query param
nlohmann::json event2 = R"({
@ -246,6 +247,7 @@ TEST_F(AnalyticsManagerTest, ClickEventsValidation) {
req->body = event2.dump();
ASSERT_FALSE(post_create_event(req, res));
ASSERT_EQ("{\"message\": \"event json data fields should contain `q`, `doc_id`, `position`, `user_id`, and `collection`.\"}", res->body);
//should be string type
nlohmann::json event3 = R"({
@ -261,6 +263,7 @@ TEST_F(AnalyticsManagerTest, ClickEventsValidation) {
req->body = event3.dump();
ASSERT_FALSE(post_create_event(req, res));
ASSERT_EQ("{\"message\": \"`doc_id` value should be string.\"}", res->body);
//correct params
nlohmann::json event4 = R"({