Search across multiple fields.

Need to write more tests.
This commit is contained in:
Kishore Nallan 2017-01-01 18:47:23 +05:30
parent 54a60398ab
commit 2b6293650e
6 changed files with 165 additions and 89 deletions

13
TODO.md
View File

@ -6,7 +6,7 @@
- ~~Proper JSON as input~~
- ~~Storing raw JSON input to RocksDB~~
- ART for every indexed field
- ~~ART for every indexed field~~
- UTF-8 support for fuzzy search
- Facets
- Filters
@ -19,10 +19,13 @@
- only last token should be prefix searched
- art int search should support signed ints
- storage key prefix should include collection name
- storage key prefix should include collection name
- use art for indexing score as well
- ISX what (score based on typo matches)
- Mininum results should be a variable instead of blindly going with max_results
- Minimum results should be a variable instead of blindly going with max_results
- Benchmark with -ffast-math
- Space sensitivity
- Use bitmap index instead of forarray for doc list
- ~~Search across multiple fields~~
- Multi field search tests
- Throw errors when schema is broken
**API**

View File

@ -43,8 +43,13 @@ private:
long long int n);
void log_leaves(const int cost, const std::string &token, const std::vector<art_leaf *> &leaves) const;
void search_candidates(std::vector<std::vector<art_leaf*>> & token_leaves, std::vector<Topster<100>::KV> & result_kvs,
spp::sparse_hash_set<uint64_t> & dedup_seq_ids, size_t & total_results, const size_t & max_results);
std::vector<Topster<100>::KV> search(std::string & query, const std::string & field, const int num_typos, const size_t num_results,
std::vector<Topster<100>::KV> & result_kvs, spp::sparse_hash_set<uint64_t> & result_set,
const token_ordering token_order = FREQUENCY, const bool prefix = false);
void search_candidates(int & token_rank, std::vector<std::vector<art_leaf*>> & token_leaves,
std::vector<Topster<100>::KV> & result_kvs, spp::sparse_hash_set<uint64_t> & dedup_seq_ids,
size_t & total_results, const size_t & max_results);
void index_string_field(const std::string &field_name, art_tree *t, const nlohmann::json &document, uint32_t seq_id) const;
@ -56,12 +61,12 @@ public:
const std::vector<std::string> rank_fields);
~Collection();
std::string add(std::string json_str);
std::vector<nlohmann::json> search(std::string query, const int num_typos, const size_t num_results,
const token_ordering token_order = FREQUENCY, const bool prefix = false);
std::vector<nlohmann::json> search(std::string query, const std::vector<std::string> fields, const int num_typos,
const size_t num_results, const token_ordering token_order = FREQUENCY,
const bool prefix = false);
void remove(std::string id);
void score_results(Topster<100> &topster, const std::vector<art_leaf *> &query_suggestion,
const uint32_t *result_ids,
const size_t result_size) const;
void score_results(Topster<100> &topster, const int & token_rank, const std::vector<art_leaf *> &query_suggestion,
const uint32_t *result_ids, const size_t result_size) const;
enum {MAX_SEARCH_TOKENS = 20};
enum {MAX_RESULTS = 100};

View File

@ -143,7 +143,7 @@ void Collection::index_string_field(const std::string &field_name, art_tree *t,
}
}
void Collection::search_candidates(std::vector<std::vector<art_leaf*>> & token_leaves,
void Collection::search_candidates(int & token_rank, std::vector<std::vector<art_leaf*>> & token_leaves,
std::vector<Topster<100>::KV> & result_kvs, spp::sparse_hash_set<uint64_t> & dedup_seq_ids,
size_t & total_results, const size_t & max_results) {
const size_t combination_limit = 10;
@ -153,6 +153,7 @@ void Collection::search_candidates(std::vector<std::vector<art_leaf*>> & token_l
for(long long n=0; n<N && n<combination_limit; ++n) {
// every element in `query_suggestion` contains a token and its associated hits
std::vector<art_leaf *> query_suggestion = next_suggestion(token_leaves, n);
token_rank++;
/*std:: cout << "\nSuggestion: ";
for(auto suggestion_leaf: query_suggestion) {
@ -178,7 +179,7 @@ void Collection::search_candidates(std::vector<std::vector<art_leaf*>> & token_l
// go through each matching document id and calculate match score
Topster<100> topster;
score_results(topster, query_suggestion, result_ids, result_size);
score_results(topster, token_rank, query_suggestion, result_ids, result_size);
delete[] result_ids;
topster.sort();
@ -191,10 +192,58 @@ void Collection::search_candidates(std::vector<std::vector<art_leaf*>> & token_l
}
}
if(total_results >= max_results) break;
if(total_results >= max_results) {
break;
}
}
}
std::vector<nlohmann::json> Collection::search(std::string query, const std::vector<std::string> fields,
const int num_typos, const size_t num_results,
const token_ordering token_order, const bool prefix) {
// Order of `fields` are used to rank results
auto begin = std::chrono::high_resolution_clock::now();
std::vector<std::pair<int, Topster<100>::KV>> field_order_kvs;
for(int i = 0; i < fields.size(); i++) {
const std::string & field = fields[i];
// Container for holding the results
std::vector<Topster<100>::KV> result_kvs;
// To prevent duplicate results, while preserving order of result vector
spp::sparse_hash_set<uint64_t> result_set;
search(query, field, num_typos, num_results, result_kvs, result_set, token_order, prefix);
for(auto result_kv: result_kvs) {
field_order_kvs.push_back(std::make_pair(fields.size() - i, result_kv));
}
}
std::sort(field_order_kvs.begin(), field_order_kvs.end(),
[](const std::pair<int, Topster<100>::KV> & a, const std::pair<int, Topster<100>::KV> & b) {
if(a.second.match_score != b.second.match_score) return a.second.match_score > b.second.match_score;
if(a.second.primary_attr != b.second.primary_attr) return a.second.primary_attr > b.second.primary_attr;
if(a.second.secondary_attr != b.second.secondary_attr) return a.second.secondary_attr > b.second.secondary_attr;
if(a.first != b.first) return a.first > b.first;
return a.second.key > b.second.key;
});
std::vector<nlohmann::json> results;
for(auto field_order_kv: field_order_kvs) {
std::string value;
store->get(get_seq_id_key((uint32_t) field_order_kv.second.key), value);
nlohmann::json document = nlohmann::json::parse(value);
results.push_back(document);
}
long long int timeMillis = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - begin).count();
std::cout << "Time taken for result calc: " << timeMillis << "us" << std::endl;
store->print_memory_usage();
return results;
}
/*
1. Split the query into tokens
2. Outer loop will generate bounded cartesian product with costs for each token
@ -204,25 +253,22 @@ void Collection::search_candidates(std::vector<std::vector<art_leaf*>> & token_l
4. Intersect the lists to find docs that match each phrase
5. Sort the docs based on some ranking criteria
*/
std::vector<nlohmann::json> Collection::search(std::string query, const int num_typos, const size_t num_results,
const token_ordering token_order, const bool prefix) {
auto begin = std::chrono::high_resolution_clock::now();
std::vector<Topster<100>::KV> Collection::search(std::string & query, const std::string & field,
const int num_typos, const size_t num_results,
std::vector<Topster<100>::KV> & result_kvs,
spp::sparse_hash_set<uint64_t> & result_set,
const token_ordering token_order, const bool prefix) {
std::vector<std::string> tokens;
StringUtils::tokenize(query, tokens, " ", true);
const int max_cost = (num_typos < 0 || num_typos > 2) ? 2 : num_typos;
const size_t max_results = std::min(num_results, (size_t) Collection::MAX_RESULTS);
size_t total_results = 0;
std::vector<Topster<100>::KV> result_kvs;
size_t total_results = result_kvs.size();
// To prevent us from doing ART search repeatedly as we iterate through possible corrections
spp::sparse_hash_map<std::string, std::vector<art_leaf*>> token_cost_cache;
// To prevent duplicate results, while preserving order of result vector
spp::sparse_hash_set<uint64_t> result_set;
// Used to drop the least occurring token(s) for partial searches
spp::sparse_hash_map<std::string, uint32_t> token_to_count;
@ -239,8 +285,10 @@ std::vector<nlohmann::json> Collection::search(std::string query, const int num_
}
std::vector<std::vector<art_leaf*>> token_leaves;
const size_t combination_limit = 10;
auto product = []( long long a, std::vector<int>& b ) { return a*b.size(); };
int token_rank = 0;
long long n = 0;
long long int N = std::accumulate(token_to_costs.begin(), token_to_costs.end(), 1LL, product);
@ -256,7 +304,6 @@ std::vector<nlohmann::json> Collection::search(std::string query, const int num_
token_leaves.clear();
int token_index = 0;
bool retry_with_larger_cost = false;
while(token_index < tokens.size()) {
// For each token, look up the generated cost for this iteration and search using that cost
@ -264,13 +311,14 @@ std::vector<nlohmann::json> Collection::search(std::string query, const int num_
const std::string token_cost_hash = token + std::to_string(costs[token_index]);
std::vector<art_leaf*> leaves;
//std::cout << "\nSearching for: " << token << " - cost: " << costs[token_index] << std::endl;
/*std::cout << "\nSearching for: " << token << " - cost: " << costs[token_index] << ", token_rank: "
<< token_rank << std::endl;*/
if(token_cost_cache.count(token_cost_hash) != 0) {
leaves = token_cost_cache[token_cost_hash];
} else {
int token_len = prefix ? (int) token.length() : (int) token.length() + 1;
art_fuzzy_search(index_map.at("title"), (const unsigned char *) token.c_str(), token_len,
art_fuzzy_search(index_map.at(field), (const unsigned char *) token.c_str(), token_len,
costs[token_index], costs[token_index], 3, token_order, prefix, leaves);
if(!leaves.empty()) {
token_cost_cache.emplace(token_cost_hash, leaves);
@ -298,22 +346,16 @@ std::vector<nlohmann::json> Collection::search(std::string query, const int num_
n = -1;
N = std::accumulate(token_to_costs.begin(), token_to_costs.end(), 1LL, product);
// Don't look at remaining tokens if
// a) We've run out of tokens, or b) We're not at at max_cost for this token
// since we would see them again in a future iteration when we retry with a larger cost
if(token_index == -1 || costs[token_index] != max_cost) {
retry_with_larger_cost = true;
break;
}
break;
}
token_index++;
}
if(token_leaves.size() != 0 && !retry_with_larger_cost) {
if(token_leaves.size() != 0 && token_leaves.size() == tokens.size()) {
// If a) all tokens were found, or b) Some were skipped because they don't exist within max_cost,
// go ahead and search for candidates with what we have so far
search_candidates(token_leaves, result_kvs, result_set, total_results, max_results);
search_candidates(token_rank, token_leaves, result_kvs, result_set, total_results, max_results);
if (total_results >= max_results) {
// If we don't find enough results, we continue outerloop (looking at tokens with greater cost)
@ -324,7 +366,8 @@ std::vector<nlohmann::json> Collection::search(std::string query, const int num_
n++;
}
if(result_kvs.size() == 0 && token_to_count.size() != 0) {
// When there are not enough overall results and atleast one token has results
if(result_kvs.size() < max_results && token_to_count.size() > 1) {
// Drop certain token with least hits and try searching again
std::string truncated_query;
@ -340,27 +383,15 @@ std::vector<nlohmann::json> Collection::search(std::string query, const int num_
);
for(uint32_t i = 0; i < token_count_pairs.size()-1; i++) {
if(token_to_count.count(tokens[i]) != 0) {
if(token_to_count.count(token_count_pairs[i].first) != 0) {
truncated_query += " " + token_count_pairs.at(i).first;
}
}
return search(truncated_query, num_typos, num_results);
return search(truncated_query, field, num_typos, num_results, result_kvs, result_set, token_order, prefix);
}
std::vector<nlohmann::json> results;
for(auto result_kv: result_kvs) {
std::string value;
store->get(get_seq_id_key((uint32_t) result_kv.key), value);
nlohmann::json document = nlohmann::json::parse(value);
results.push_back(document);
}
long long int timeMillis = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - begin).count();
std::cout << "Time taken for result calc: " << timeMillis << "us" << std::endl;
store->print_memory_usage();
return results;
return result_kvs;
}
void Collection::log_leaves(const int cost, const std::string &token, const std::vector<art_leaf *> &leaves) const {
@ -374,8 +405,12 @@ void Collection::log_leaves(const int cost, const std::string &token, const std:
}
}
void Collection::score_results(Topster<100> &topster, const std::vector<art_leaf *> &query_suggestion,
const uint32_t *result_ids, const size_t result_size) const {
void Collection::score_results(Topster<100> &topster, const int & token_rank,
const std::vector<art_leaf *> &query_suggestion, const uint32_t *result_ids,
const size_t result_size) const {
const int max_token_rank = 250;
for(auto i=0; i<result_size; i++) {
uint32_t doc_id = result_ids[i];
std::vector<std::vector<uint16_t>> token_positions;
@ -405,18 +440,23 @@ void Collection::score_results(Topster<100> &topster, const std::vector<art_leaf
mscore = MatchScore::match_score(doc_id, token_positions);
}
const uint64_t match_score = (uint64_t)(mscore.words_present * 32 + (MAX_SEARCH_TOKENS - mscore.distance));
int token_rank_score = max_token_rank - token_rank;
// Construct a single match_score from individual components (for multi-field sort)
const uint64_t match_score = (token_rank_score << 16) +
((uint64_t)(mscore.words_present) << 8) +
(MAX_SEARCH_TOKENS - mscore.distance);
int64_t primary_rank_score = primary_rank_scores.count(doc_id) > 0 ? primary_rank_scores.at(doc_id) : 0;
int64_t secondary_rank_score = secondary_rank_scores.count(doc_id) > 0 ? secondary_rank_scores.at(doc_id) : 0;
topster.add(doc_id, match_score, primary_rank_score, secondary_rank_score);
/*std::cout << "mscore.distance: " << (int) mscore.distance << ", match_score: "
/*std::cout << "token_rank_score: " << token_rank_score << ", match_score: "
<< match_score << ", primary_rank_score: " << primary_rank_score << ", doc_id: " << doc_id << std::endl;*/
}
}
inline std::vector<art_leaf *> Collection::next_suggestion(
const std::vector<std::vector<art_leaf *>> &token_leaves,
long long int n) {
inline std::vector<art_leaf *> Collection::next_suggestion(const std::vector<std::vector<art_leaf *>> &token_leaves,
long long int n) {
std::vector<art_leaf*> query_suggestion(token_leaves.size());
// generate the next combination from `token_leaves` and store it in `query_suggestion`

View File

@ -56,7 +56,8 @@ int main() {
collection->remove("foo");
auto begin = std::chrono::high_resolution_clock::now();
collection->search("the", 1, 100);
std::vector<std::string> search_fields = {"title"};
collection->search("the", search_fields, 1, 100);
long long int timeMillis = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - begin).count();
cout << "Time taken: " << timeMillis << "us" << endl;
delete collection;

View File

@ -84,7 +84,10 @@ static int get_search(h2o_handler_t *self, h2o_req_t *req) {
printf("Query: %s\n", query_map["q"].c_str());
auto begin = std::chrono::high_resolution_clock::now();
std::vector<nlohmann::json> results = collection->search(query_map["q"], std::stoi(query_map[NUM_TYPOS]), 100, token_order, false);
std::vector<std::string> search_fields = {"title"};
std::vector<nlohmann::json> results = collection->search(query_map["q"], search_fields,
std::stoi(query_map[NUM_TYPOS]), 100, token_order, false);
nlohmann::json json_array = nlohmann::json::array();
for(nlohmann::json& result: results) {
json_array.push_back(result);

View File

@ -7,11 +7,13 @@
class CollectionTest : public ::testing::Test {
protected:
Collection *collection;
std::vector<std::string> search_fields;
virtual void SetUp() {
std::ifstream infile("/Users/kishore/others/wreally/typesense/test/documents.jsonl");
std::vector<field> fields = {field("title", field_type::STRING)};
std::vector<std::string> rank_fields = {"points"};
search_fields = {"title"};
collection = new Collection("/tmp/typesense_test/collection", "collection", fields, rank_fields);
std::string json_line;
@ -29,7 +31,7 @@ protected:
};
TEST_F(CollectionTest, ExactSearchShouldBeStable) {
std::vector<nlohmann::json> results = collection->search("the", 0, 10);
std::vector<nlohmann::json> results = collection->search("the", search_fields, 0, 10);
ASSERT_EQ(7, results.size());
// For two documents of the same score, the larger doc_id appears first
@ -44,8 +46,8 @@ TEST_F(CollectionTest, ExactSearchShouldBeStable) {
}
TEST_F(CollectionTest, ExactPhraseSearch) {
std::vector<nlohmann::json> results = collection->search("rocket launch", 0, 10);
ASSERT_EQ(4, results.size());
std::vector<nlohmann::json> results = collection->search("rocket launch", search_fields, 0, 10);
ASSERT_EQ(5, results.size());
/*
Sort by (match, diff, score)
@ -53,9 +55,10 @@ TEST_F(CollectionTest, ExactPhraseSearch) {
1: score: 15, diff: 4
17: score: 8, diff: 4
16: score: 10, diff: 5
13: score: 12, (single word match)
*/
std::vector<std::string> ids = {"8", "1", "17", "16"};
std::vector<std::string> ids = {"8", "1", "17", "16", "13"};
for(size_t i = 0; i < results.size(); i++) {
nlohmann::json result = results.at(i);
@ -65,7 +68,7 @@ TEST_F(CollectionTest, ExactPhraseSearch) {
}
// Check pagination
results = collection->search("rocket launch", 0, 3);
results = collection->search("rocket launch", search_fields, 0, 3);
ASSERT_EQ(3, results.size());
for(size_t i = 0; i < 3; i++) {
nlohmann::json result = results.at(i);
@ -77,7 +80,7 @@ TEST_F(CollectionTest, ExactPhraseSearch) {
TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
// Tokens that are not found in the index should be skipped
std::vector<nlohmann::json> results = collection->search("DoesNotExist from", 0, 10);
std::vector<nlohmann::json> results = collection->search("DoesNotExist from", search_fields, 0, 10);
ASSERT_EQ(2, results.size());
std::vector<std::string> ids = {"2", "17"};
@ -90,7 +93,7 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
}
// with non-zero cost
results = collection->search("DoesNotExist from", 1, 10);
results = collection->search("DoesNotExist from", search_fields, 1, 10);
ASSERT_EQ(2, results.size());
for(size_t i = 0; i < results.size(); i++) {
@ -101,7 +104,7 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
}
// with 2 indexed words
results = collection->search("from DoesNotExist insTruments", 1, 10);
results = collection->search("from DoesNotExist insTruments", search_fields, 1, 10);
ASSERT_EQ(2, results.size());
ids = {"2", "17"};
@ -113,16 +116,16 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
}
results.clear();
results = collection->search("DoesNotExist1 DoesNotExist2", 0, 10);
results = collection->search("DoesNotExist1 DoesNotExist2", search_fields, 0, 10);
ASSERT_EQ(0, results.size());
results.clear();
results = collection->search("DoesNotExist1 DoesNotExist2", 2, 10);
results = collection->search("DoesNotExist1 DoesNotExist2", search_fields, 2, 10);
ASSERT_EQ(0, results.size());
}
TEST_F(CollectionTest, PartialPhraseSearch) {
std::vector<nlohmann::json> results = collection->search("rocket research", 0, 10);
std::vector<nlohmann::json> results = collection->search("rocket research", search_fields, 0, 10);
ASSERT_EQ(4, results.size());
std::vector<std::string> ids = {"1", "8", "16", "17"};
@ -136,15 +139,23 @@ TEST_F(CollectionTest, PartialPhraseSearch) {
}
TEST_F(CollectionTest, QueryWithTypo) {
std::vector<nlohmann::json> results = collection->search("kind biologcal", 2, 10);
ASSERT_EQ(1, results.size());
std::vector<nlohmann::json> results = collection->search("kind biologcal", search_fields, 2, 3);
ASSERT_EQ(3, results.size());
std::string result_id = results.at(0)["id"];
ASSERT_STREQ("19", result_id.c_str());
std::vector<std::string> ids = {"19", "20", "21"};
for(size_t i = 0; i < results.size(); i++) {
nlohmann::json result = results.at(i);
std::string result_id = result["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
results.clear();
results = collection->search("fer thx", 1, 10);
std::vector<std::string> ids = {"1", "10", "13"};
results = collection->search("fer thx", search_fields, 1, 3);
ids = {"1", "10", "13"};
ASSERT_EQ(3, results.size());
for(size_t i = 0; i < results.size(); i++) {
nlohmann::json result = results.at(i);
@ -155,7 +166,7 @@ TEST_F(CollectionTest, QueryWithTypo) {
}
TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
std::vector<nlohmann::json> results = collection->search("loox", 1, 2, MAX_SCORE, false);
std::vector<nlohmann::json> results = collection->search("loox", search_fields, 1, 2, MAX_SCORE, false);
ASSERT_EQ(2, results.size());
std::vector<std::string> ids = {"22", "23"};
@ -166,7 +177,7 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
results = collection->search("loox", 1, 3, FREQUENCY, false);
results = collection->search("loox", search_fields, 1, 3, FREQUENCY, false);
ASSERT_EQ(3, results.size());
ids = {"3", "12", "24"};
@ -178,17 +189,17 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
}
// Check pagination
results = collection->search("loox", 1, 1, FREQUENCY, false);
results = collection->search("loox", search_fields, 1, 1, FREQUENCY, false);
ASSERT_EQ(1, results.size());
std::string solo_id = results.at(0)["id"];
ASSERT_STREQ("3", solo_id.c_str());
results = collection->search("loox", 1, 2, FREQUENCY, false);
results = collection->search("loox", search_fields, 1, 2, FREQUENCY, false);
ASSERT_EQ(2, results.size());
// Check total ordering
results = collection->search("loox", 1, 10, FREQUENCY, false);
results = collection->search("loox", search_fields, 1, 10, FREQUENCY, false);
ASSERT_EQ(5, results.size());
ids = {"3", "12", "24", "22", "23"};
@ -199,7 +210,7 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
results = collection->search("loox", 1, 10, MAX_SCORE, false);
results = collection->search("loox", search_fields, 1, 10, MAX_SCORE, false);
ASSERT_EQ(5, results.size());
ids = {"22", "23", "3", "12", "24"};
@ -213,10 +224,23 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
TEST_F(CollectionTest, TextContainingAnActualTypo) {
// A line contains "ISX" but not "what" - need to ensure that correction to "ISS what" happens
std::vector<nlohmann::json> results = collection->search("ISX what", 1, 10, FREQUENCY, false);
ASSERT_EQ(5, results.size());
std::vector<nlohmann::json> results = collection->search("ISX what", search_fields, 1, 4, FREQUENCY, false);
ASSERT_EQ(4, results.size());
std::vector<std::string> ids = {"20", "19", "6", "21", "8"};
std::vector<std::string> ids = {"19", "6", "21", "8"};
for(size_t i = 0; i < results.size(); i++) {
nlohmann::json result = results.at(i);
std::string result_id = result["id"];
std::string id = ids.at(i);
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
// Record containing exact token match should appear first
results = collection->search("ISX", search_fields, 1, 10, FREQUENCY, false);
ASSERT_EQ(8, results.size());
ids = {"20", "19", "6", "3", "21", "4", "10", "8"};
for(size_t i = 0; i < results.size(); i++) {
nlohmann::json result = results.at(i);
@ -227,7 +251,7 @@ TEST_F(CollectionTest, TextContainingAnActualTypo) {
}
TEST_F(CollectionTest, PrefixSearching) {
std::vector<nlohmann::json> results = collection->search("ex", 0, 10, FREQUENCY, true);
std::vector<nlohmann::json> results = collection->search("ex", search_fields, 0, 10, FREQUENCY, true);
ASSERT_EQ(2, results.size());
std::vector<std::string> ids = {"12", "6"};
@ -238,7 +262,7 @@ TEST_F(CollectionTest, PrefixSearching) {
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
results = collection->search("ex", 0, 10, MAX_SCORE, true);
results = collection->search("ex", search_fields, 0, 10, MAX_SCORE, true);
ASSERT_EQ(2, results.size());
ids = {"6", "12"};