Generify the topster container to hold both integer and float.

Benchmarked to ensure that performance is on par.
This commit is contained in:
Kishore Nallan 2017-08-20 15:08:22 +05:30
parent ea550f167c
commit 3104dea42a
4 changed files with 100 additions and 15 deletions

View File

@ -56,6 +56,8 @@
- ~~When prefix=true, use token_ranking_field for token ordering only for last word~~
- ~~only last token should be prefix searched~~
- ~~Prefix-search strings should not be null terminated~~
- sort results by float field
- test for string filter comparison: title < "foo"
- test for token ranking on float field
- test for float int field deletion during doc deletion
- > INT32_MAX validation for float field

View File

@ -12,12 +12,56 @@
*/
template <size_t MAX_SIZE=100>
struct Topster {
struct number_t {
bool is_float;
union {
float floatval;
int64_t intval;
};
number_t(): intval(0), is_float(false) {
}
number_t(float val): floatval(val), is_float(true) {
}
number_t(int64_t val): intval(val), is_float(false) {
}
inline void operator = (const float & val) {
floatval = val;
is_float = true;
}
inline void operator = (const int64_t & val) {
intval = val;
is_float = false;
}
inline bool operator < (const number_t & rhs) const {
if(is_float) {
return floatval < rhs.floatval;
}
return intval < rhs.intval;
}
inline bool operator > (const number_t & rhs) const {
if(is_float) {
return floatval > rhs.floatval;
}
return intval > rhs.intval;
}
};
struct KV {
uint16_t query_index;
uint64_t key;
uint64_t match_score;
int64_t primary_attr;
int64_t secondary_attr;
number_t primary_attr;
number_t secondary_attr;
} data[MAX_SIZE];
uint32_t size;
@ -34,8 +78,8 @@ struct Topster {
b = c;
}
void add(const uint64_t &key, const uint16_t &query_index, const uint64_t &match_score, const int64_t &primary_attr,
const int64_t &secondary_attr) {
void add(const uint64_t &key, const uint16_t &query_index, const uint64_t &match_score, const number_t &primary_attr,
const number_t &secondary_attr) {
if (size >= MAX_SIZE) {
if(!is_greater(data[0], match_score, primary_attr, secondary_attr)) {
// when incoming value is less than the smallest in the heap, ignore
@ -55,9 +99,9 @@ struct Topster {
data[0].match_score = match_score;
data[0].primary_attr = primary_attr;
data[0].secondary_attr = secondary_attr;
uint32_t i = 0;
// sift to maintain heap property
uint32_t i = 0;
while ((2*i+1) < MAX_SIZE) {
uint32_t next = (uint32_t) (2 * i + 1);
if (next+1 < MAX_SIZE && is_greater_kv(data[next], data[next+1])) {
@ -85,8 +129,8 @@ struct Topster {
data[size].match_score = match_score;
data[size].primary_attr = primary_attr;
data[size].secondary_attr = secondary_attr;
size++;
size++;
for (uint32_t i = size - 1; i > 0;) {
uint32_t parent = (i-1)/2;
if (is_greater_kv(data[parent], data[i])) {
@ -99,7 +143,7 @@ struct Topster {
}
}
static bool is_greater(const struct KV& i, uint64_t match_score, int64_t primary_attr, int64_t secondary_attr) {
static bool is_greater(const struct KV& i, uint64_t match_score, number_t primary_attr, number_t secondary_attr) {
return std::tie (match_score, primary_attr, secondary_attr) >
std::tie (i.match_score, i.primary_attr, i.secondary_attr);
}

View File

@ -747,11 +747,8 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
// All fields are sorted descending
std::sort(field_order_kvs.begin(), field_order_kvs.end(),
[](const std::pair<int, Topster<100>::KV> & a, const std::pair<int, Topster<100>::KV> & b) {
if(a.second.match_score != b.second.match_score) return a.second.match_score > b.second.match_score;
if(a.second.primary_attr != b.second.primary_attr) return a.second.primary_attr > b.second.primary_attr;
if(a.second.secondary_attr != b.second.secondary_attr) return a.second.secondary_attr > b.second.secondary_attr;
if(a.first != b.first) return a.first > b.first; // field position
return a.second.key > b.second.key;
return std::tie(a.second.match_score, a.second.primary_attr, a.second.secondary_attr, a.first, a.second.key) >
std::tie(b.second.match_score, b.second.primary_attr, b.second.secondary_attr, b.first, b.second.key);
});
result["hits"] = nlohmann::json::array();

View File

@ -2,7 +2,7 @@
#include "topster.h"
#include "match_score.h"
TEST(TopsterTest, StoreMaxValuesWithoutRepetition) {
TEST(TopsterTest, StoreMaxIntValuesWithoutRepetition) {
Topster<5> topster;
struct {
@ -11,20 +11,62 @@ TEST(TopsterTest, StoreMaxValuesWithoutRepetition) {
uint64_t match_score;
int64_t primary_attr;
int64_t secondary_attr;
} data[10] = {
} data[14] = {
{0, 1, 10, 20, 30},
{0, 1, 10, 20, 30},
{0, 2, 4, 20, 30},
{2, 3, 7, 20, 30},
{0, 4, 11, 20, 30},
{1, 5, 9, 20, 30},
{1, 5, 9, 20, 30},
{1, 5, 9, 20, 30},
{0, 6, 6, 20, 30},
{2, 7, 6, 22, 30},
{2, 7, 6, 22, 30},
{1, 8, 9, 20, 30},
{0, 9, 8, 20, 30},
{3, 10, 5, 20, 30},
};
for(int i = 0; i < 10; i++) {
for(int i = 0; i < 14; i++) {
topster.add(data[i].key, data[i].query_index, data[i].match_score, data[i].primary_attr,
data[i].secondary_attr);
}
topster.sort();
std::vector<uint64_t> ids = {4, 1, 5, 8, 9};
for(int i = 0; i < topster.size; i++) {
EXPECT_EQ(ids[i], topster.getKeyAt(i));
}
}
TEST(TopsterTest, StoreMaxFloatValuesWithoutRepetition) {
Topster<5> topster;
struct {
uint16_t query_index;
uint64_t key;
uint64_t match_score;
float primary_attr;
int64_t secondary_attr;
} data[12] = {
{0, 1, 11, 20.04, 30},
{0, 2, 4, 20, 30},
{2, 3, 7, 20, 30},
{0, 4, 11, 20.05, 30},
{0, 4, 11, 20.05, 30},
{1, 5, 9, 24.50, 34},
{0, 6, 6, 20, 30},
{2, 7, 6, 22, 30},
{1, 8, 9, 24.50, 30},
{1, 8, 9, 24.50, 30},
{0, 9, 8, 24.50, 30},
{3, 10, 5, 20, 30},
};
for(int i = 0; i < 12; i++) {
topster.add(data[i].key, data[i].query_index, data[i].match_score, data[i].primary_attr,
data[i].secondary_attr);
}