From ce471c9bb1e145c1d74038299949ddf525ca52f6 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Mon, 9 Jan 2017 19:17:53 +0530 Subject: [PATCH] Keep the hashset bounded by deleting the element to be replaced in heap from the set. --- include/topster.h | 21 +++++++++++++++------ src/collection.cpp | 4 ++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/include/topster.h b/include/topster.h index b541667a..a15fcc3c 100644 --- a/include/topster.h +++ b/include/topster.h @@ -33,18 +33,20 @@ struct Topster { } void add(const uint64_t &key, const uint64_t &match_score, const int64_t &primary_attr, const int64_t &secondary_attr){ - if(dedup_keys.count(key) != 0) { - return ; - } - - dedup_keys.insert(key); - if (size >= MAX_SIZE) { if(!is_greater(data[0], match_score, primary_attr, secondary_attr)) { // when incoming value is less than the smallest in the heap, ignore return; } + if(dedup_keys.count(key) != 0) { + // when the key already exists, ignore + return ; + } + + dedup_keys.erase(data[0].key); + dedup_keys.insert(key); + data[0].key = key; data[0].match_score = match_score; data[0].primary_attr = primary_attr; @@ -67,6 +69,13 @@ struct Topster { i = next; } } else { + if(dedup_keys.count(key) != 0) { + // when the key already exists, ignore + return ; + } + + dedup_keys.insert(key); + data[size].key = key; data[size].match_score = match_score; data[size].primary_attr = primary_attr; diff --git a/src/collection.cpp b/src/collection.cpp index 8f4d5274..69b94549 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -537,9 +537,9 @@ void Collection::remove(std::string id) { } std::string Collection::get_seq_id_key(uint32_t seq_id) { - return collection_id + "_" + SEQ_ID_PREFIX + std::to_string(seq_id); + return std::to_string(collection_id) + "_" + SEQ_ID_PREFIX + std::to_string(seq_id); } std::string Collection::get_doc_id_key(std::string doc_id) { - return collection_id + "_" + DOC_ID_PREFIX + doc_id; + return std::to_string(collection_id) + "_" + DOC_ID_PREFIX + doc_id; } \ No newline at end of file