diff --git a/TODO.md b/TODO.md index 61e7f95b..2c60a92e 100644 --- a/TODO.md +++ b/TODO.md @@ -56,6 +56,8 @@ - ~~When prefix=true, use token_ranking_field for token ordering only for last word~~ - ~~only last token should be prefix searched~~ - ~~Prefix-search strings should not be null terminated~~ +- sort results by float field +- test for string filter comparison: title < "foo" - test for token ranking on float field - test for float int field deletion during doc deletion - > INT32_MAX validation for float field diff --git a/include/topster.h b/include/topster.h index d2245c32..36f6df3b 100644 --- a/include/topster.h +++ b/include/topster.h @@ -12,12 +12,56 @@ */ template struct Topster { + struct number_t { + bool is_float; + union { + float floatval; + int64_t intval; + }; + + number_t(): intval(0), is_float(false) { + + } + + number_t(float val): floatval(val), is_float(true) { + + } + + number_t(int64_t val): intval(val), is_float(false) { + + } + + inline void operator = (const float & val) { + floatval = val; + is_float = true; + } + + inline void operator = (const int64_t & val) { + intval = val; + is_float = false; + } + + inline bool operator < (const number_t & rhs) const { + if(is_float) { + return floatval < rhs.floatval; + } + return intval < rhs.intval; + } + + inline bool operator > (const number_t & rhs) const { + if(is_float) { + return floatval > rhs.floatval; + } + return intval > rhs.intval; + } + }; + struct KV { uint16_t query_index; uint64_t key; uint64_t match_score; - int64_t primary_attr; - int64_t secondary_attr; + number_t primary_attr; + number_t secondary_attr; } data[MAX_SIZE]; uint32_t size; @@ -34,8 +78,8 @@ struct Topster { b = c; } - void add(const uint64_t &key, const uint16_t &query_index, const uint64_t &match_score, const int64_t &primary_attr, - const int64_t &secondary_attr) { + void add(const uint64_t &key, const uint16_t &query_index, const uint64_t &match_score, const number_t &primary_attr, + const number_t &secondary_attr) { if (size >= MAX_SIZE) { if(!is_greater(data[0], match_score, primary_attr, secondary_attr)) { // when incoming value is less than the smallest in the heap, ignore @@ -55,9 +99,9 @@ struct Topster { data[0].match_score = match_score; data[0].primary_attr = primary_attr; data[0].secondary_attr = secondary_attr; - uint32_t i = 0; // sift to maintain heap property + uint32_t i = 0; while ((2*i+1) < MAX_SIZE) { uint32_t next = (uint32_t) (2 * i + 1); if (next+1 < MAX_SIZE && is_greater_kv(data[next], data[next+1])) { @@ -85,8 +129,8 @@ struct Topster { data[size].match_score = match_score; data[size].primary_attr = primary_attr; data[size].secondary_attr = secondary_attr; - size++; + size++; for (uint32_t i = size - 1; i > 0;) { uint32_t parent = (i-1)/2; if (is_greater_kv(data[parent], data[i])) { @@ -99,7 +143,7 @@ struct Topster { } } - static bool is_greater(const struct KV& i, uint64_t match_score, int64_t primary_attr, int64_t secondary_attr) { + static bool is_greater(const struct KV& i, uint64_t match_score, number_t primary_attr, number_t secondary_attr) { return std::tie (match_score, primary_attr, secondary_attr) > std::tie (i.match_score, i.primary_attr, i.secondary_attr); } diff --git a/src/collection.cpp b/src/collection.cpp index 0a3c2498..1d6a5840 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -747,11 +747,8 @@ Option Collection::search(std::string query, const std::vector::KV> & a, const std::pair::KV> & b) { - if(a.second.match_score != b.second.match_score) return a.second.match_score > b.second.match_score; - if(a.second.primary_attr != b.second.primary_attr) return a.second.primary_attr > b.second.primary_attr; - if(a.second.secondary_attr != b.second.secondary_attr) return a.second.secondary_attr > b.second.secondary_attr; - if(a.first != b.first) return a.first > b.first; // field position - return a.second.key > b.second.key; + return std::tie(a.second.match_score, a.second.primary_attr, a.second.secondary_attr, a.first, a.second.key) > + std::tie(b.second.match_score, b.second.primary_attr, b.second.secondary_attr, b.first, b.second.key); }); result["hits"] = nlohmann::json::array(); diff --git a/test/topster_test.cpp b/test/topster_test.cpp index 8fbaaaf5..169d4ba5 100644 --- a/test/topster_test.cpp +++ b/test/topster_test.cpp @@ -2,7 +2,7 @@ #include "topster.h" #include "match_score.h" -TEST(TopsterTest, StoreMaxValuesWithoutRepetition) { +TEST(TopsterTest, StoreMaxIntValuesWithoutRepetition) { Topster<5> topster; struct { @@ -11,20 +11,62 @@ TEST(TopsterTest, StoreMaxValuesWithoutRepetition) { uint64_t match_score; int64_t primary_attr; int64_t secondary_attr; - } data[10] = { + } data[14] = { + {0, 1, 10, 20, 30}, {0, 1, 10, 20, 30}, {0, 2, 4, 20, 30}, {2, 3, 7, 20, 30}, {0, 4, 11, 20, 30}, {1, 5, 9, 20, 30}, + {1, 5, 9, 20, 30}, + {1, 5, 9, 20, 30}, {0, 6, 6, 20, 30}, {2, 7, 6, 22, 30}, + {2, 7, 6, 22, 30}, {1, 8, 9, 20, 30}, {0, 9, 8, 20, 30}, {3, 10, 5, 20, 30}, }; - for(int i = 0; i < 10; i++) { + for(int i = 0; i < 14; i++) { + topster.add(data[i].key, data[i].query_index, data[i].match_score, data[i].primary_attr, + data[i].secondary_attr); + } + + topster.sort(); + + std::vector ids = {4, 1, 5, 8, 9}; + + for(int i = 0; i < topster.size; i++) { + EXPECT_EQ(ids[i], topster.getKeyAt(i)); + } +} + +TEST(TopsterTest, StoreMaxFloatValuesWithoutRepetition) { + Topster<5> topster; + + struct { + uint16_t query_index; + uint64_t key; + uint64_t match_score; + float primary_attr; + int64_t secondary_attr; + } data[12] = { + {0, 1, 11, 20.04, 30}, + {0, 2, 4, 20, 30}, + {2, 3, 7, 20, 30}, + {0, 4, 11, 20.05, 30}, + {0, 4, 11, 20.05, 30}, + {1, 5, 9, 24.50, 34}, + {0, 6, 6, 20, 30}, + {2, 7, 6, 22, 30}, + {1, 8, 9, 24.50, 30}, + {1, 8, 9, 24.50, 30}, + {0, 9, 8, 24.50, 30}, + {3, 10, 5, 20, 30}, + }; + + for(int i = 0; i < 12; i++) { topster.add(data[i].key, data[i].query_index, data[i].match_score, data[i].primary_attr, data[i].secondary_attr); }