mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 12:42:50 +08:00
Merge branch '0.22.0-rc' into postings-refactor-integration
# Conflicts: # src/collection.cpp # src/index.cpp # test/collection_specific_test.cpp
This commit is contained in:
commit
261536d0f4
@ -5,6 +5,7 @@
|
||||
#include "art.h"
|
||||
#include "option.h"
|
||||
#include "string_utils.h"
|
||||
#include "logger.h"
|
||||
#include "json.hpp"
|
||||
|
||||
namespace field_types {
|
||||
@ -192,6 +193,10 @@ struct field {
|
||||
bool found_default_sorting_field = false;
|
||||
|
||||
for(const field & field: fields) {
|
||||
if(field.name == "id") {
|
||||
continue;
|
||||
}
|
||||
|
||||
nlohmann::json field_val;
|
||||
field_val[fields::name] = field.name;
|
||||
field_val[fields::type] = field.type;
|
||||
@ -263,6 +268,13 @@ struct field {
|
||||
size_t num_auto_detect_fields = 0;
|
||||
|
||||
for(nlohmann::json & field_json: fields_json) {
|
||||
if(field_json["name"] == "id") {
|
||||
// No field should exist with the name "id" as it is reserved for internal use
|
||||
// We cannot throw an error here anymore since that will break backward compatibility!
|
||||
LOG(WARNING) << "Collection schema cannot contain a field with name `id`. Ignoring field.";
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!field_json.is_object() ||
|
||||
field_json.count(fields::name) == 0 || field_json.count(fields::type) == 0 ||
|
||||
!field_json.at(fields::name).is_string() || !field_json.at(fields::type).is_string()) {
|
||||
@ -463,6 +475,7 @@ namespace sort_field_const {
|
||||
static const std::string seq_id = "_seq_id";
|
||||
|
||||
static const std::string exclude_radius = "exclude_radius";
|
||||
static const std::string precision = "precision";
|
||||
}
|
||||
|
||||
struct sort_by {
|
||||
@ -472,14 +485,16 @@ struct sort_by {
|
||||
// geo related fields
|
||||
int64_t geopoint;
|
||||
uint32_t exclude_radius;
|
||||
uint32_t geo_precision;
|
||||
|
||||
sort_by(const std::string & name, const std::string & order):
|
||||
name(name), order(order), geopoint(0), exclude_radius(0) {
|
||||
name(name), order(order), geopoint(0), exclude_radius(0), geo_precision(0) {
|
||||
|
||||
}
|
||||
|
||||
sort_by(const std::string &name, const std::string &order, int64_t geopoint, uint32_t exclude_radius) :
|
||||
name(name), order(order), geopoint(geopoint), exclude_radius(exclude_radius) {
|
||||
sort_by(const std::string &name, const std::string &order, int64_t geopoint,
|
||||
uint32_t exclude_radius, uint32_t geo_precision) :
|
||||
name(name), order(order), geopoint(geopoint), exclude_radius(exclude_radius), geo_precision(geo_precision) {
|
||||
|
||||
}
|
||||
|
||||
@ -488,6 +503,7 @@ struct sort_by {
|
||||
order = other.order;
|
||||
geopoint = other.geopoint;
|
||||
exclude_radius = other.exclude_radius;
|
||||
geo_precision = other.geo_precision;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
@ -16,6 +16,9 @@ struct KV {
|
||||
uint64_t distinct_key{};
|
||||
int64_t scores[3]{}; // match score + 2 custom attributes
|
||||
|
||||
// to be used only in final aggregation
|
||||
uint64_t* query_indices = nullptr;
|
||||
|
||||
KV(uint8_t field_id, uint16_t queryIndex, uint32_t token_bits, uint64_t key, uint64_t distinct_key,
|
||||
uint8_t match_score_index, const int64_t *scores):
|
||||
field_id(field_id), match_score_index(match_score_index),
|
||||
@ -27,6 +30,69 @@ struct KV {
|
||||
}
|
||||
|
||||
KV() = default;
|
||||
|
||||
KV(KV& kv) = default;
|
||||
|
||||
KV(KV&& kv) noexcept : field_id(kv.field_id), match_score_index(kv.match_score_index),
|
||||
query_index(kv.query_index), array_index(kv.array_index), token_bits(kv.token_bits),
|
||||
key(kv.key), distinct_key(kv.distinct_key) {
|
||||
|
||||
scores[0] = kv.scores[0];
|
||||
scores[1] = kv.scores[1];
|
||||
scores[2] = kv.scores[2];
|
||||
|
||||
query_indices = kv.query_indices;
|
||||
kv.query_indices = nullptr;
|
||||
}
|
||||
|
||||
KV& operator=(KV&& kv) noexcept {
|
||||
if (this != &kv) {
|
||||
field_id = kv.field_id;
|
||||
match_score_index = kv.match_score_index;
|
||||
query_index = kv.query_index;
|
||||
array_index = kv.array_index;
|
||||
token_bits = kv.token_bits;
|
||||
key = kv.key;
|
||||
distinct_key = kv.distinct_key;
|
||||
|
||||
scores[0] = kv.scores[0];
|
||||
scores[1] = kv.scores[1];
|
||||
scores[2] = kv.scores[2];
|
||||
|
||||
delete[] query_indices;
|
||||
query_indices = kv.query_indices;
|
||||
kv.query_indices = nullptr;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
KV& operator=(KV& kv) noexcept {
|
||||
if (this != &kv) {
|
||||
field_id = kv.field_id;
|
||||
match_score_index = kv.match_score_index;
|
||||
query_index = kv.query_index;
|
||||
array_index = kv.array_index;
|
||||
token_bits = kv.token_bits;
|
||||
key = kv.key;
|
||||
distinct_key = kv.distinct_key;
|
||||
|
||||
scores[0] = kv.scores[0];
|
||||
scores[1] = kv.scores[1];
|
||||
scores[2] = kv.scores[2];
|
||||
|
||||
delete[] query_indices;
|
||||
query_indices = kv.query_indices;
|
||||
kv.query_indices = nullptr;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~KV() {
|
||||
delete [] query_indices;
|
||||
query_indices = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -338,7 +338,8 @@ void Collection::batch_index(std::vector<std::vector<index_record>> &index_batch
|
||||
res["code"] = index_record.indexed.code();
|
||||
}
|
||||
|
||||
json_out[index_record.position] = res.dump();
|
||||
json_out[index_record.position] = res.dump(-1, ' ', false,
|
||||
nlohmann::detail::error_handler_t::ignore);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -753,36 +754,54 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
|
||||
if(geo_parts.size() == 3) {
|
||||
// try to parse the exclude radius option
|
||||
if(!StringUtils::begins_with(geo_parts[2], sort_field_const::exclude_radius)) {
|
||||
return Option<nlohmann::json>(400, error);
|
||||
}
|
||||
bool is_exclude_option = false;
|
||||
|
||||
std::vector<std::string> exclude_parts;
|
||||
StringUtils::split(geo_parts[2], exclude_parts, ":");
|
||||
|
||||
if(exclude_parts.size() != 2) {
|
||||
return Option<nlohmann::json>(400, error);
|
||||
}
|
||||
|
||||
std::vector<std::string> exclude_value_parts;
|
||||
StringUtils::split(exclude_parts[1], exclude_value_parts, " ");
|
||||
|
||||
if(exclude_value_parts.size() != 2) {
|
||||
return Option<nlohmann::json>(400, error);
|
||||
}
|
||||
|
||||
if(!StringUtils::is_float(exclude_value_parts[0])) {
|
||||
return Option<nlohmann::json>(400, error);
|
||||
}
|
||||
|
||||
if(exclude_value_parts[1] == "km") {
|
||||
sort_field_std.exclude_radius = std::stof(exclude_value_parts[0]) * 1000;
|
||||
} else if(exclude_value_parts[1] == "mi") {
|
||||
sort_field_std.exclude_radius = std::stof(exclude_value_parts[0]) * 1609.34;
|
||||
if(StringUtils::begins_with(geo_parts[2], sort_field_const::exclude_radius)) {
|
||||
is_exclude_option = true;
|
||||
} else if(StringUtils::begins_with(geo_parts[2], sort_field_const::precision)) {
|
||||
is_exclude_option = false;
|
||||
} else {
|
||||
return Option<nlohmann::json>(400, "Sort field's exclude radius "
|
||||
return Option<nlohmann::json>(400, error);
|
||||
}
|
||||
|
||||
std::vector<std::string> param_parts;
|
||||
StringUtils::split(geo_parts[2], param_parts, ":");
|
||||
|
||||
if(param_parts.size() != 2) {
|
||||
return Option<nlohmann::json>(400, error);
|
||||
}
|
||||
|
||||
std::vector<std::string> param_value_parts;
|
||||
StringUtils::split(param_parts[1], param_value_parts, " ");
|
||||
|
||||
if(param_value_parts.size() != 2) {
|
||||
return Option<nlohmann::json>(400, error);
|
||||
}
|
||||
|
||||
if(!StringUtils::is_float(param_value_parts[0])) {
|
||||
return Option<nlohmann::json>(400, error);
|
||||
}
|
||||
|
||||
int32_t value_meters;
|
||||
|
||||
if(param_value_parts[1] == "km") {
|
||||
value_meters = std::stof(param_value_parts[0]) * 1000;
|
||||
} else if(param_value_parts[1] == "mi") {
|
||||
value_meters = std::stof(param_value_parts[0]) * 1609.34;
|
||||
} else {
|
||||
return Option<nlohmann::json>(400, "Sort field's parameter "
|
||||
"unit must be either `km` or `mi`.");
|
||||
}
|
||||
|
||||
if(value_meters <= 0) {
|
||||
return Option<nlohmann::json>(400, "Sort field's parameter must be a positive number.");
|
||||
}
|
||||
|
||||
if(is_exclude_option) {
|
||||
sort_field_std.exclude_radius = value_meters;
|
||||
} else {
|
||||
sort_field_std.geo_precision = value_meters;
|
||||
}
|
||||
}
|
||||
|
||||
double lat = std::stod(geo_parts[0]);
|
||||
@ -1387,12 +1406,22 @@ void Collection::aggregate_topster(size_t query_index, Topster& agg_topster, Top
|
||||
Topster* group_topster = group_topster_entry.second;
|
||||
for(const auto& map_kv: group_topster->kv_map) {
|
||||
map_kv.second->query_index += query_index;
|
||||
if(map_kv.second->query_indices != nullptr) {
|
||||
for(size_t i = 0; i < map_kv.second->query_indices[0]; i++) {
|
||||
map_kv.second->query_indices[i+1] += query_index;
|
||||
}
|
||||
}
|
||||
agg_topster.add(map_kv.second);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(const auto& map_kv: index_topster->kv_map) {
|
||||
map_kv.second->query_index += query_index;
|
||||
if(map_kv.second->query_indices != nullptr) {
|
||||
for(size_t i = 0; i < map_kv.second->query_indices[0]; i++) {
|
||||
map_kv.second->query_indices[i+1] += query_index;
|
||||
}
|
||||
}
|
||||
agg_topster.add(map_kv.second);
|
||||
}
|
||||
}
|
||||
@ -1497,20 +1526,29 @@ void Collection::highlight_result(const field &search_field,
|
||||
std::vector<art_leaf*> query_suggestion;
|
||||
std::set<std::string> query_suggestion_tokens;
|
||||
|
||||
for (const art_leaf* token_leaf : searched_queries[field_order_kv->query_index]) {
|
||||
// Must search for the token string fresh on that field for the given document since `token_leaf`
|
||||
// is from the best matched field and need not be present in other fields of a document.
|
||||
Index* index = indices[field_order_kv->key % num_memory_shards];
|
||||
art_leaf *actual_leaf = index->get_token_leaf(search_field.name, &token_leaf->key[0], token_leaf->key_len);
|
||||
size_t qindex = 0;
|
||||
|
||||
//LOG(INFO) << "field: " << search_field.name << ", key: " << token_leaf->key;
|
||||
do {
|
||||
auto searched_query =
|
||||
(field_order_kv->query_indices == nullptr) ? searched_queries[field_order_kv->query_index] :
|
||||
searched_queries[field_order_kv->query_indices[qindex + 1]];
|
||||
|
||||
if(actual_leaf != nullptr) {
|
||||
query_suggestion.push_back(actual_leaf);
|
||||
std::string token(reinterpret_cast<char*>(actual_leaf->key), actual_leaf->key_len-1);
|
||||
query_suggestion_tokens.insert(token);
|
||||
for (art_leaf* token_leaf : searched_query) {
|
||||
// Must search for the token string fresh on that field for the given document since `token_leaf`
|
||||
// is from the best matched field and need not be present in other fields of a document.
|
||||
Index* index = indices[field_order_kv->key % num_memory_shards];
|
||||
art_leaf* actual_leaf = index->get_token_leaf(search_field.name, &token_leaf->key[0], token_leaf->key_len);
|
||||
|
||||
if(actual_leaf != nullptr) {
|
||||
query_suggestion.push_back(actual_leaf);
|
||||
std::string token(reinterpret_cast<char*>(actual_leaf->key), actual_leaf->key_len - 1);
|
||||
//LOG(INFO) << "field: " << search_field.name << ", key: " << token;
|
||||
query_suggestion_tokens.insert(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
qindex++;
|
||||
} while(field_order_kv->query_indices != nullptr && qindex < field_order_kv->query_indices[0]);
|
||||
|
||||
if(query_suggestion.size() != q_tokens.size()) {
|
||||
// can happen for compound query matched across 2 fields when some tokens are dropped
|
||||
@ -1525,6 +1563,7 @@ void Collection::highlight_result(const field &search_field,
|
||||
q_token.size() + 1);
|
||||
if(actual_leaf != nullptr) {
|
||||
query_suggestion.push_back(actual_leaf);
|
||||
query_suggestion_tokens.insert(q_token);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1614,9 +1653,10 @@ void Collection::highlight_result(const field &search_field,
|
||||
|
||||
highlight.matched_tokens.emplace_back();
|
||||
std::vector<std::string>& matched_tokens = highlight.matched_tokens.back();
|
||||
bool found_first_match = false;
|
||||
|
||||
while(tokenizer.next(raw_token, raw_token_index, tok_start, tok_end)) {
|
||||
if(token_offsets.empty()) {
|
||||
if(!found_first_match) {
|
||||
if(snippet_start_window.size() == highlight_affix_num_tokens + 1) {
|
||||
snippet_start_window.pop_front();
|
||||
}
|
||||
@ -1624,7 +1664,10 @@ void Collection::highlight_result(const field &search_field,
|
||||
snippet_start_window.push_back(tok_start);
|
||||
}
|
||||
|
||||
if (token_hits.count(raw_token) != 0 ||
|
||||
bool token_already_found = token_hits.count(raw_token) != 0;
|
||||
|
||||
// ensures that the `snippet_start_offset` is always from a matched token, and not from query suggestion
|
||||
if ((found_first_match && token_already_found) ||
|
||||
(match_offset_index < match.offsets.size() &&
|
||||
match.offsets[match_offset_index].offset == raw_token_index)) {
|
||||
|
||||
@ -1637,9 +1680,15 @@ void Collection::highlight_result(const field &search_field,
|
||||
} while(match_offset_index < match.offsets.size() &&
|
||||
match.offsets[match_offset_index - 1].offset == match.offsets[match_offset_index].offset);
|
||||
|
||||
if(token_offsets.size() == 1) {
|
||||
if(!found_first_match) {
|
||||
snippet_start_offset = snippet_start_window.front();
|
||||
}
|
||||
|
||||
found_first_match = true;
|
||||
|
||||
} else if(query_suggestion_tokens.find(raw_token) != query_suggestion_tokens.end()) {
|
||||
token_offsets.emplace(tok_start, tok_end);
|
||||
token_hits.insert(raw_token);
|
||||
}
|
||||
|
||||
if(raw_token_index == last_valid_offset + highlight_affix_num_tokens) {
|
||||
@ -1669,6 +1718,11 @@ void Collection::highlight_result(const field &search_field,
|
||||
auto offset_it = token_offsets.begin();
|
||||
std::stringstream highlighted_text;
|
||||
|
||||
// tokens from query might occur before actual snippet start offset: we skip that
|
||||
while(offset_it != token_offsets.end() && offset_it->first < snippet_start_offset) {
|
||||
offset_it++;
|
||||
}
|
||||
|
||||
for(size_t i = snippet_start_offset; i <= snippet_end_offset; i++) {
|
||||
if(offset_it != token_offsets.end()) {
|
||||
if (i == offset_it->first) {
|
||||
|
@ -1649,13 +1649,18 @@ void Index::search(const std::vector<query_tokens_t>& field_query_tokens,
|
||||
auto& kvs = seq_id_kvs.second; // each `kv` can be from a different field
|
||||
|
||||
std::sort(kvs.begin(), kvs.end(), Topster::is_greater);
|
||||
kvs[0]->query_indices = new uint64_t[kvs.size() + 1];
|
||||
kvs[0]->query_indices[0] = kvs.size();
|
||||
|
||||
// LOG(INFO) << "DOC ID: " << seq_id << ", score: " << kvs[0]->scores[kvs[0]->match_score_index];
|
||||
//LOG(INFO) << "DOC ID: " << seq_id << ", score: " << kvs[0]->scores[kvs[0]->match_score_index];
|
||||
|
||||
// to calculate existing aggregate scores across best matching fields
|
||||
spp::sparse_hash_map<uint8_t, KV*> existing_field_kvs;
|
||||
for(const auto kv: kvs) {
|
||||
existing_field_kvs.emplace(kv->field_id, kv);
|
||||
for(size_t kv_i = 0; kv_i < kvs.size(); kv_i++) {
|
||||
existing_field_kvs.emplace(kvs[kv_i]->field_id, kvs[kv_i]);
|
||||
kvs[0]->query_indices[kv_i+1] = kvs[kv_i]->query_index;
|
||||
/*LOG(INFO) << "kv_i: " << kv_i << ", kvs[kv_i]->query_index: " << kvs[kv_i]->query_index << ", "
|
||||
<< "searched_query: " << searched_queries[kvs[kv_i]->query_index][0];*/
|
||||
}
|
||||
|
||||
uint32_t token_bits = (uint32_t(1) << 31); // top most bit set to guarantee atleast 1 bit set
|
||||
@ -2098,6 +2103,11 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const uint16
|
||||
dist = 0;
|
||||
}
|
||||
|
||||
if(sort_fields[i].geo_precision > 0) {
|
||||
dist = dist + sort_fields[i].geo_precision - 1 -
|
||||
(dist + sort_fields[i].geo_precision - 1) % sort_fields[i].geo_precision;
|
||||
}
|
||||
|
||||
geopoint_distances[i].emplace(seq_id, dist);
|
||||
}
|
||||
|
||||
|
@ -263,7 +263,7 @@ TEST_F(CollectionLocaleTest, SearchAgainstThaiTextExactMatch) {
|
||||
ASSERT_EQ("ติดกับดัก<mark>ราย</mark><mark>ได้</mark>ปานกลาง",
|
||||
results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ("ข้อมูลรายคนหรือ<mark>ราย</mark>บริษัทในการเชื่อมโยงส่วน<mark>ได้</mark>ส่วนเสีย",
|
||||
ASSERT_EQ("ข้อมูล<mark>ราย</mark>คนหรือ<mark>ราย</mark>บริษัทในการเชื่อมโยงส่วน<mark>ได้</mark>ส่วนเสีย",
|
||||
results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
}
|
||||
|
@ -232,7 +232,6 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
std::vector<std::string> facets;
|
||||
|
||||
nlohmann::json results = collection1->search("thomas", search_fields, "", facets, sort_fields, {0}, 10, 1, FREQUENCY, {false}).get();
|
||||
LOG(INFO) << results;
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
std::unordered_map<std::string, field> schema = collection1->get_schema();
|
||||
|
@ -756,7 +756,101 @@ TEST_F(CollectionSortingTest, GeoPointSortingWithExcludeRadius) {
|
||||
{}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
|
||||
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Sort field's exclude radius unit must be either `km` or `mi`.", res_op.error());
|
||||
ASSERT_EQ("Sort field's parameter unit must be either `km` or `mi`.", res_op.error());
|
||||
|
||||
geo_sort_fields = { sort_by("loc(32.24348, 77.1893, exclude_radius: -10 km)", "ASC") };
|
||||
res_op = coll1->search("*", {}, "loc: (32.24348, 77.1893, 20 km)",
|
||||
{}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
|
||||
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Sort field's parameter must be a positive number.", res_op.error());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CollectionSortingTest, GeoPointSortingWithPrecision) {
|
||||
Collection* coll1;
|
||||
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("loc", field_types::GEOPOINT, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
coll1 = collectionManager.get_collection("coll1").get();
|
||||
if (coll1 == nullptr) {
|
||||
coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::string>> records = {
|
||||
{"Tibetan Colony", "32.24678, 77.19239"},
|
||||
{"Civil Hospital", "32.23959, 77.18763"},
|
||||
{"Johnson Lodge", "32.24751, 77.18814"},
|
||||
|
||||
{"Lion King Rock", "32.24493, 77.17038"},
|
||||
{"Jai Durga Handloom", "32.25749, 77.17583"},
|
||||
{"Panduropa", "32.26059, 77.21798"},
|
||||
|
||||
{"Police Station", "32.23743, 77.18639"},
|
||||
{"Panduropa Post", "32.26263, 77.2196"},
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < records.size(); i++) {
|
||||
nlohmann::json doc;
|
||||
|
||||
std::vector<std::string> lat_lng;
|
||||
StringUtils::split(records[i][1], lat_lng, ", ");
|
||||
|
||||
double lat = std::stod(lat_lng[0]);
|
||||
double lng = std::stod(lat_lng[1]);
|
||||
|
||||
doc["id"] = std::to_string(i);
|
||||
doc["title"] = records[i][0];
|
||||
doc["loc"] = {lat, lng};
|
||||
doc["points"] = i;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
std::vector<sort_by> geo_sort_fields = {
|
||||
sort_by("loc(32.24348, 77.1893, precision: 0.9 km)", "ASC"),
|
||||
sort_by("points", "DESC"),
|
||||
};
|
||||
|
||||
auto results = coll1->search("*",
|
||||
{}, "loc: (32.24348, 77.1893, 20 km)",
|
||||
{}, geo_sort_fields, {0}, 10, 1, FREQUENCY).get();
|
||||
|
||||
ASSERT_EQ(8, results["found"].get<size_t>());
|
||||
|
||||
std::vector<std::string> expected_ids = {
|
||||
"6", "2", "1", "0", "3", "4", "7", "5"
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < expected_ids.size(); i++) {
|
||||
ASSERT_STREQ(expected_ids[i].c_str(), results["hits"][i]["document"]["id"].get<std::string>().c_str());
|
||||
}
|
||||
|
||||
// badly formatted precision
|
||||
|
||||
geo_sort_fields = { sort_by("loc(32.24348, 77.1893, precision 1 km)", "ASC") };
|
||||
auto res_op = coll1->search("*", {}, "loc: (32.24348, 77.1893, 20 km)",
|
||||
{}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
|
||||
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Bad syntax for geopoint sorting field `loc`", res_op.error());
|
||||
|
||||
geo_sort_fields = { sort_by("loc(32.24348, 77.1893, precision: 1 meter)", "ASC") };
|
||||
res_op = coll1->search("*", {}, "loc: (32.24348, 77.1893, 20 km)",
|
||||
{}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
|
||||
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Sort field's parameter unit must be either `km` or `mi`.", res_op.error());
|
||||
|
||||
geo_sort_fields = { sort_by("loc(32.24348, 77.1893, precision: -10 km)", "ASC") };
|
||||
res_op = coll1->search("*", {}, "loc: (32.24348, 77.1893, 20 km)",
|
||||
{}, geo_sort_fields, {0}, 10, 1, FREQUENCY);
|
||||
|
||||
ASSERT_FALSE(res_op.ok());
|
||||
ASSERT_EQ("Sort field's parameter must be a positive number.", res_op.error());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
@ -606,10 +606,117 @@ TEST_F(CollectionSpecificTest, TokensSpreadAcrossFields) {
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {4, 1}).get();
|
||||
|
||||
LOG(INFO) << results;
|
||||
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
ASSERT_EQ("1", results["hits"][1]["document"]["id"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, GuardAgainstIdFieldInSchema) {
|
||||
// The "id" field, if defined in the schema should be ignored
|
||||
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("id", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
nlohmann::json schema;
|
||||
schema["name"] = "books";
|
||||
schema["fields"] = nlohmann::json::array();
|
||||
schema["fields"][0]["name"] = "title";
|
||||
schema["fields"][0]["type"] = "string";
|
||||
schema["fields"][1]["name"] = "id";
|
||||
schema["fields"][1]["type"] = "string";
|
||||
schema["fields"][2]["name"] = "points";
|
||||
schema["fields"][2]["type"] = "int32";
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
ASSERT_EQ(0, coll1->get_schema().count("id"));
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, HandleBadCharactersInStringGracefully) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
std::string doc_str = "不推荐。\",\"price\":10.12,\"ratings\":5}";
|
||||
|
||||
auto add_op = coll1->add(doc_str);
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, HighlightSecondaryFieldWithPrefixMatch) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false),
|
||||
field("description", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["title"] = "Functions and Equations";
|
||||
doc1["description"] = "Use a function to solve an equation.";
|
||||
doc1["points"] = 100;
|
||||
|
||||
nlohmann::json doc2;
|
||||
doc2["id"] = "1";
|
||||
doc2["title"] = "Function of effort";
|
||||
doc2["description"] = "Learn all about it.";
|
||||
doc2["points"] = 100;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
ASSERT_TRUE(coll1->add(doc2.dump()).ok());
|
||||
|
||||
auto results = coll1->search("function", {"title", "description"}, "", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {true, true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 40, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {1, 1}).get();
|
||||
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
|
||||
|
||||
ASSERT_EQ("<mark>Functions</mark> and Equations",
|
||||
results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ("Use a <mark>function</mark> to solve an equation.",
|
||||
results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionSpecificTest, HighlightWithDropTokens) {
|
||||
std::vector<field> fields = {field("description", field_types::STRING, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["description"] = "HPE Aruba AP-575 802.11ax Wireless Access Point - TAA Compliant - 2.40 GHz, "
|
||||
"5 GHz - MIMO Technology - 1 x Network (RJ-45) - Gigabit Ethernet - Bluetooth 5";
|
||||
doc1["points"] = 100;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
|
||||
auto results = coll1->search("HPE Aruba AP-575 Technology Gigabit Bluetooth 5", {"description"}, "", {}, {}, {0}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "description", 40, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>").get();
|
||||
|
||||
ASSERT_EQ(1, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("0", results["hits"][0]["document"]["id"].get<std::string>());
|
||||
|
||||
ASSERT_EQ("<mark>HPE</mark> <mark>Aruba</mark> <mark>AP-575</mark> 802.11ax Wireless Access Point - "
|
||||
"TAA Compliant - 2.40 GHz, <mark>5</mark> GHz - MIMO <mark>Technology</mark> - 1 x Network (RJ-45) - "
|
||||
"<mark>Gigabit</mark> Ethernet - <mark>Bluetooth</mark> <mark>5</mark>",
|
||||
results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
@ -746,23 +746,23 @@ TEST_F(CollectionTest, ArrayStringFieldHighlight) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
ASSERT_EQ(4, results["hits"][0]["highlights"][0].size());
|
||||
ASSERT_STREQ(results["hits"][0]["highlights"][0]["field"].get<std::string>().c_str(), "tags");
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"][0]["snippets"].size());
|
||||
ASSERT_STREQ("<mark>truth</mark>", results["hits"][0]["highlights"][0]["snippets"][0].get<std::string>().c_str());
|
||||
ASSERT_STREQ("plain <mark>truth</mark>", results["hits"][0]["highlights"][0]["snippets"][1].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"][0]["matched_tokens"].size());
|
||||
ASSERT_STREQ("truth", results["hits"][0]["highlights"][0]["matched_tokens"][0][0].get<std::string>().c_str());
|
||||
ASSERT_STREQ("truth", results["hits"][0]["highlights"][0]["matched_tokens"][1][0].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"][0]["indices"].size());
|
||||
ASSERT_EQ(1, results["hits"][0]["highlights"][0]["indices"][0]);
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"][0]["indices"][1]);
|
||||
ASSERT_EQ(3, results["hits"][0]["highlights"][0].size());
|
||||
ASSERT_STREQ("title", results["hits"][0]["highlights"][0]["field"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("Plain <mark>Truth</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>().c_str());
|
||||
ASSERT_EQ(1, results["hits"][0]["highlights"][0]["matched_tokens"].size());
|
||||
ASSERT_STREQ("Truth", results["hits"][0]["highlights"][0]["matched_tokens"][0].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(3, results["hits"][0]["highlights"][1].size());
|
||||
ASSERT_STREQ("title", results["hits"][0]["highlights"][1]["field"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("Plain <mark>Truth</mark>", results["hits"][0]["highlights"][1]["snippet"].get<std::string>().c_str());
|
||||
ASSERT_EQ(1, results["hits"][0]["highlights"][1]["matched_tokens"].size());
|
||||
ASSERT_STREQ("Truth", results["hits"][0]["highlights"][1]["matched_tokens"][0].get<std::string>().c_str());
|
||||
ASSERT_EQ(4, results["hits"][0]["highlights"][1].size());
|
||||
ASSERT_STREQ(results["hits"][0]["highlights"][1]["field"].get<std::string>().c_str(), "tags");
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"][1]["snippets"].size());
|
||||
ASSERT_STREQ("<mark>truth</mark>", results["hits"][0]["highlights"][1]["snippets"][0].get<std::string>().c_str());
|
||||
ASSERT_STREQ("plain <mark>truth</mark>", results["hits"][0]["highlights"][1]["snippets"][1].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"][1]["matched_tokens"].size());
|
||||
ASSERT_STREQ("truth", results["hits"][0]["highlights"][1]["matched_tokens"][0][0].get<std::string>().c_str());
|
||||
ASSERT_STREQ("truth", results["hits"][0]["highlights"][1]["matched_tokens"][1][0].get<std::string>().c_str());
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"][1]["indices"].size());
|
||||
ASSERT_EQ(1, results["hits"][0]["highlights"][1]["indices"][0]);
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"][1]["indices"][1]);
|
||||
|
||||
ASSERT_EQ(3, results["hits"][1]["highlights"][0].size());
|
||||
ASSERT_STREQ("title", results["hits"][1]["highlights"][0]["field"].get<std::string>().c_str());
|
||||
@ -2500,15 +2500,15 @@ TEST_F(CollectionTest, SearchHighlightFieldFully) {
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title, tags").get();
|
||||
|
||||
ASSERT_EQ(2, res["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("<mark>LAZY</mark>", res["hits"][0]["highlights"][0]["values"][0].get<std::string>());
|
||||
ASSERT_EQ("The quick brown fox jumped over the <mark>lazy</mark> dog and ran straight to the forest to sleep.",
|
||||
res["hits"][0]["highlights"][1]["value"].get<std::string>());
|
||||
res["hits"][0]["highlights"][0]["value"].get<std::string>());
|
||||
ASSERT_EQ(1, res["hits"][0]["highlights"][0]["matched_tokens"].size());
|
||||
ASSERT_STREQ("lazy", res["hits"][0]["highlights"][0]["matched_tokens"][0].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(1, res["hits"][0]["highlights"][1]["matched_tokens"].size());
|
||||
ASSERT_STREQ("lazy", res["hits"][0]["highlights"][1]["matched_tokens"][0].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(1, res["hits"][0]["highlights"][0]["values"][0].size());
|
||||
ASSERT_STREQ("<mark>LAZY</mark>", res["hits"][0]["highlights"][0]["values"][0].get<std::string>().c_str());
|
||||
ASSERT_EQ(1, res["hits"][0]["highlights"][1]["values"].size());
|
||||
ASSERT_EQ("<mark>LAZY</mark>", res["hits"][0]["highlights"][1]["values"][0].get<std::string>());
|
||||
ASSERT_EQ(1, res["hits"][0]["highlights"][1]["snippets"].size());
|
||||
ASSERT_EQ("<mark>LAZY</mark>", res["hits"][0]["highlights"][1]["snippets"][0].get<std::string>());
|
||||
|
||||
// excluded fields should not be returned in highlights section
|
||||
spp::sparse_hash_set<std::string> excluded_fields = {"tags"};
|
||||
@ -3190,13 +3190,17 @@ TEST_F(CollectionTest, MultiFieldRelevance5) {
|
||||
ASSERT_STREQ("1", results["hits"][1]["document"]["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("2", results["hits"][2]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(1, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("country", results["hits"][0]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canada</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("field_a", results["hits"][0]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canadia</mark>", results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
ASSERT_EQ("country", results["hits"][0]["highlights"][1]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canada</mark>", results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ(1, results["hits"][1]["highlights"].size());
|
||||
ASSERT_EQ("company_name", results["hits"][1]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canaida</mark> Corp", results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
|
||||
ASSERT_EQ(2, results["hits"][1]["highlights"].size());
|
||||
ASSERT_EQ("field_a", results["hits"][1]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canadoo</mark>", results["hits"][1]["highlights"][0]["snippet"].get<std::string>());
|
||||
ASSERT_EQ("company_name", results["hits"][1]["highlights"][1]["field"].get<std::string>());
|
||||
ASSERT_EQ("<mark>Canaida</mark> Corp", results["hits"][1]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ(1, results["hits"][2]["highlights"].size());
|
||||
ASSERT_EQ("field_a", results["hits"][2]["highlights"][0]["field"].get<std::string>());
|
||||
@ -3361,7 +3365,7 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
|
||||
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(2, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ(3, results["hits"][0]["highlights"].size());
|
||||
ASSERT_EQ("name", results["hits"][0]["highlights"][0]["field"].get<std::string>());
|
||||
ASSERT_EQ("Best Wireless Vehicle <mark>Charger</mark>",
|
||||
results["hits"][0]["highlights"][0]["snippet"].get<std::string>());
|
||||
@ -3370,6 +3374,9 @@ TEST_F(CollectionTest, MultiFieldHighlighting) {
|
||||
ASSERT_EQ("Easily replenish your cell phone with this wireless <mark>charger.</mark>",
|
||||
results["hits"][0]["highlights"][1]["snippet"].get<std::string>());
|
||||
|
||||
ASSERT_EQ("categories", results["hits"][0]["highlights"][2]["field"].get<std::string>());
|
||||
ASSERT_EQ("Car <mark>Chargers</mark>", results["hits"][0]["highlights"][2]["snippets"][0].get<std::string>());
|
||||
|
||||
results = coll1->search("John With Denver",
|
||||
{"description"}, "", {}, {}, {0}, 10, 1, FREQUENCY,
|
||||
{true}, 1, spp::sparse_hash_set<std::string>(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user