mirror of
https://github.com/typesense/typesense.git
synced 2025-05-17 20:22:32 +08:00
Generify the topster container to hold both integer and float.
Benchmarked to ensure that performance is on par.
This commit is contained in:
parent
ea550f167c
commit
3104dea42a
2
TODO.md
2
TODO.md
@ -56,6 +56,8 @@
|
||||
- ~~When prefix=true, use token_ranking_field for token ordering only for last word~~
|
||||
- ~~only last token should be prefix searched~~
|
||||
- ~~Prefix-search strings should not be null terminated~~
|
||||
- sort results by float field
|
||||
- test for string filter comparison: title < "foo"
|
||||
- test for token ranking on float field
|
||||
- test for float int field deletion during doc deletion
|
||||
- > INT32_MAX validation for float field
|
||||
|
@ -12,12 +12,56 @@
|
||||
*/
|
||||
template <size_t MAX_SIZE=100>
|
||||
struct Topster {
|
||||
struct number_t {
|
||||
bool is_float;
|
||||
union {
|
||||
float floatval;
|
||||
int64_t intval;
|
||||
};
|
||||
|
||||
number_t(): intval(0), is_float(false) {
|
||||
|
||||
}
|
||||
|
||||
number_t(float val): floatval(val), is_float(true) {
|
||||
|
||||
}
|
||||
|
||||
number_t(int64_t val): intval(val), is_float(false) {
|
||||
|
||||
}
|
||||
|
||||
inline void operator = (const float & val) {
|
||||
floatval = val;
|
||||
is_float = true;
|
||||
}
|
||||
|
||||
inline void operator = (const int64_t & val) {
|
||||
intval = val;
|
||||
is_float = false;
|
||||
}
|
||||
|
||||
inline bool operator < (const number_t & rhs) const {
|
||||
if(is_float) {
|
||||
return floatval < rhs.floatval;
|
||||
}
|
||||
return intval < rhs.intval;
|
||||
}
|
||||
|
||||
inline bool operator > (const number_t & rhs) const {
|
||||
if(is_float) {
|
||||
return floatval > rhs.floatval;
|
||||
}
|
||||
return intval > rhs.intval;
|
||||
}
|
||||
};
|
||||
|
||||
struct KV {
|
||||
uint16_t query_index;
|
||||
uint64_t key;
|
||||
uint64_t match_score;
|
||||
int64_t primary_attr;
|
||||
int64_t secondary_attr;
|
||||
number_t primary_attr;
|
||||
number_t secondary_attr;
|
||||
} data[MAX_SIZE];
|
||||
|
||||
uint32_t size;
|
||||
@ -34,8 +78,8 @@ struct Topster {
|
||||
b = c;
|
||||
}
|
||||
|
||||
void add(const uint64_t &key, const uint16_t &query_index, const uint64_t &match_score, const int64_t &primary_attr,
|
||||
const int64_t &secondary_attr) {
|
||||
void add(const uint64_t &key, const uint16_t &query_index, const uint64_t &match_score, const number_t &primary_attr,
|
||||
const number_t &secondary_attr) {
|
||||
if (size >= MAX_SIZE) {
|
||||
if(!is_greater(data[0], match_score, primary_attr, secondary_attr)) {
|
||||
// when incoming value is less than the smallest in the heap, ignore
|
||||
@ -55,9 +99,9 @@ struct Topster {
|
||||
data[0].match_score = match_score;
|
||||
data[0].primary_attr = primary_attr;
|
||||
data[0].secondary_attr = secondary_attr;
|
||||
uint32_t i = 0;
|
||||
|
||||
// sift to maintain heap property
|
||||
uint32_t i = 0;
|
||||
while ((2*i+1) < MAX_SIZE) {
|
||||
uint32_t next = (uint32_t) (2 * i + 1);
|
||||
if (next+1 < MAX_SIZE && is_greater_kv(data[next], data[next+1])) {
|
||||
@ -85,8 +129,8 @@ struct Topster {
|
||||
data[size].match_score = match_score;
|
||||
data[size].primary_attr = primary_attr;
|
||||
data[size].secondary_attr = secondary_attr;
|
||||
size++;
|
||||
|
||||
size++;
|
||||
for (uint32_t i = size - 1; i > 0;) {
|
||||
uint32_t parent = (i-1)/2;
|
||||
if (is_greater_kv(data[parent], data[i])) {
|
||||
@ -99,7 +143,7 @@ struct Topster {
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_greater(const struct KV& i, uint64_t match_score, int64_t primary_attr, int64_t secondary_attr) {
|
||||
static bool is_greater(const struct KV& i, uint64_t match_score, number_t primary_attr, number_t secondary_attr) {
|
||||
return std::tie (match_score, primary_attr, secondary_attr) >
|
||||
std::tie (i.match_score, i.primary_attr, i.secondary_attr);
|
||||
}
|
||||
|
@ -747,11 +747,8 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
|
||||
// All fields are sorted descending
|
||||
std::sort(field_order_kvs.begin(), field_order_kvs.end(),
|
||||
[](const std::pair<int, Topster<100>::KV> & a, const std::pair<int, Topster<100>::KV> & b) {
|
||||
if(a.second.match_score != b.second.match_score) return a.second.match_score > b.second.match_score;
|
||||
if(a.second.primary_attr != b.second.primary_attr) return a.second.primary_attr > b.second.primary_attr;
|
||||
if(a.second.secondary_attr != b.second.secondary_attr) return a.second.secondary_attr > b.second.secondary_attr;
|
||||
if(a.first != b.first) return a.first > b.first; // field position
|
||||
return a.second.key > b.second.key;
|
||||
return std::tie(a.second.match_score, a.second.primary_attr, a.second.secondary_attr, a.first, a.second.key) >
|
||||
std::tie(b.second.match_score, b.second.primary_attr, b.second.secondary_attr, b.first, b.second.key);
|
||||
});
|
||||
|
||||
result["hits"] = nlohmann::json::array();
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include "topster.h"
|
||||
#include "match_score.h"
|
||||
|
||||
TEST(TopsterTest, StoreMaxValuesWithoutRepetition) {
|
||||
TEST(TopsterTest, StoreMaxIntValuesWithoutRepetition) {
|
||||
Topster<5> topster;
|
||||
|
||||
struct {
|
||||
@ -11,20 +11,62 @@ TEST(TopsterTest, StoreMaxValuesWithoutRepetition) {
|
||||
uint64_t match_score;
|
||||
int64_t primary_attr;
|
||||
int64_t secondary_attr;
|
||||
} data[10] = {
|
||||
} data[14] = {
|
||||
{0, 1, 10, 20, 30},
|
||||
{0, 1, 10, 20, 30},
|
||||
{0, 2, 4, 20, 30},
|
||||
{2, 3, 7, 20, 30},
|
||||
{0, 4, 11, 20, 30},
|
||||
{1, 5, 9, 20, 30},
|
||||
{1, 5, 9, 20, 30},
|
||||
{1, 5, 9, 20, 30},
|
||||
{0, 6, 6, 20, 30},
|
||||
{2, 7, 6, 22, 30},
|
||||
{2, 7, 6, 22, 30},
|
||||
{1, 8, 9, 20, 30},
|
||||
{0, 9, 8, 20, 30},
|
||||
{3, 10, 5, 20, 30},
|
||||
};
|
||||
|
||||
for(int i = 0; i < 10; i++) {
|
||||
for(int i = 0; i < 14; i++) {
|
||||
topster.add(data[i].key, data[i].query_index, data[i].match_score, data[i].primary_attr,
|
||||
data[i].secondary_attr);
|
||||
}
|
||||
|
||||
topster.sort();
|
||||
|
||||
std::vector<uint64_t> ids = {4, 1, 5, 8, 9};
|
||||
|
||||
for(int i = 0; i < topster.size; i++) {
|
||||
EXPECT_EQ(ids[i], topster.getKeyAt(i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TopsterTest, StoreMaxFloatValuesWithoutRepetition) {
|
||||
Topster<5> topster;
|
||||
|
||||
struct {
|
||||
uint16_t query_index;
|
||||
uint64_t key;
|
||||
uint64_t match_score;
|
||||
float primary_attr;
|
||||
int64_t secondary_attr;
|
||||
} data[12] = {
|
||||
{0, 1, 11, 20.04, 30},
|
||||
{0, 2, 4, 20, 30},
|
||||
{2, 3, 7, 20, 30},
|
||||
{0, 4, 11, 20.05, 30},
|
||||
{0, 4, 11, 20.05, 30},
|
||||
{1, 5, 9, 24.50, 34},
|
||||
{0, 6, 6, 20, 30},
|
||||
{2, 7, 6, 22, 30},
|
||||
{1, 8, 9, 24.50, 30},
|
||||
{1, 8, 9, 24.50, 30},
|
||||
{0, 9, 8, 24.50, 30},
|
||||
{3, 10, 5, 20, 30},
|
||||
};
|
||||
|
||||
for(int i = 0; i < 12; i++) {
|
||||
topster.add(data[i].key, data[i].query_index, data[i].match_score, data[i].primary_attr,
|
||||
data[i].secondary_attr);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user