mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 21:22:25 +08:00
Merge branch 'v0.25-join' into v0.26-facets
This commit is contained in:
commit
bb5720955b
@ -507,7 +507,7 @@ private:
|
||||
static void handle_doc_ops(const tsl::htrie_map<char, field>& search_schema,
|
||||
nlohmann::json& update_doc, const nlohmann::json& old_doc);
|
||||
|
||||
static void get_doc_changes(const index_operation_t op, const tsl::htrie_map<char, field>& search_schema,
|
||||
static void get_doc_changes(const index_operation_t op, const tsl::htrie_map<char, field>& embedding_fields,
|
||||
nlohmann::json &update_doc, const nlohmann::json &old_doc, nlohmann::json &new_doc,
|
||||
nlohmann::json &del_doc);
|
||||
|
||||
|
@ -53,12 +53,6 @@ Collection::Collection(const std::string& name, const uint32_t collection_id, co
|
||||
symbols_to_index(to_char_array(symbols_to_index)), token_separators(to_char_array(token_separators)),
|
||||
index(init_index()) {
|
||||
|
||||
for (auto const& field: fields) {
|
||||
if (field.embed.count(fields::from) != 0) {
|
||||
embedding_fields.emplace(field.name, field);
|
||||
}
|
||||
}
|
||||
|
||||
this->num_documents = 0;
|
||||
}
|
||||
|
||||
|
@ -919,7 +919,9 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
nlohmann::json preset;
|
||||
const auto& preset_op = CollectionManager::get_instance().get_preset(preset_it->second, preset);
|
||||
|
||||
if(preset_op.ok()) {
|
||||
// NOTE: we merge only single preset configuration because multi ("searches") preset value replaces
|
||||
// the request body directly before we reach this single search request function.
|
||||
if(preset_op.ok() && !preset.contains("searches")) {
|
||||
if(!preset.is_object()) {
|
||||
return Option<bool>(400, "Search preset is not an object.");
|
||||
}
|
||||
|
@ -449,7 +449,7 @@ void Index::validate_and_preprocess(Index *index,
|
||||
|
||||
if(index_rec.is_update) {
|
||||
// scrub string fields to reduce delete ops
|
||||
get_doc_changes(index_rec.operation, search_schema, index_rec.doc, index_rec.old_doc,
|
||||
get_doc_changes(index_rec.operation, embedding_fields, index_rec.doc, index_rec.old_doc,
|
||||
index_rec.new_doc, index_rec.del_doc);
|
||||
|
||||
if(generate_embeddings) {
|
||||
@ -2689,8 +2689,8 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
|
||||
for(size_t res_index = 0; res_index < vec_results.size(); res_index++) {
|
||||
auto& vec_result = vec_results[res_index];
|
||||
auto doc_id = vec_result.first;
|
||||
auto result_it = topster->kv_map.find(doc_id);
|
||||
auto seq_id = vec_result.first;
|
||||
auto result_it = topster->kv_map.find(seq_id);
|
||||
|
||||
if(result_it != topster->kv_map.end()) {
|
||||
if(result_it->second->match_score_index < 0 || result_it->second->match_score_index > 2) {
|
||||
@ -2699,30 +2699,42 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
|
||||
// result overlaps with keyword search: we have to combine the scores
|
||||
|
||||
auto result = result_it->second;
|
||||
KV* kv = result_it->second;
|
||||
// old_score + (1 / rank_of_document) * WEIGHT)
|
||||
result->vector_distance = vec_result.second;
|
||||
result->text_match_score = result->scores[result->match_score_index];
|
||||
kv->vector_distance = vec_result.second;
|
||||
kv->text_match_score = kv->scores[kv->match_score_index];
|
||||
int64_t match_score = float_to_int64_t(
|
||||
(int64_t_to_float(result->scores[result->match_score_index])) +
|
||||
(int64_t_to_float(kv->scores[kv->match_score_index])) +
|
||||
((1.0 / (res_index + 1)) * VECTOR_SEARCH_WEIGHT));
|
||||
int64_t match_score_index = -1;
|
||||
int64_t scores[3] = {0};
|
||||
|
||||
compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, doc_id, 0, match_score, scores, match_score_index, vec_result.second);
|
||||
compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, seq_id, 0,
|
||||
match_score, scores, match_score_index, vec_result.second);
|
||||
|
||||
for(int i = 0; i < 3; i++) {
|
||||
result->scores[i] = scores[i];
|
||||
kv->scores[i] = scores[i];
|
||||
}
|
||||
result->match_score_index = match_score_index;
|
||||
|
||||
kv->match_score_index = match_score_index;
|
||||
|
||||
} else {
|
||||
// Result has been found only in vector search: we have to add it to both KV and result_ids
|
||||
// (1 / rank_of_document) * WEIGHT)
|
||||
int64_t scores[3] = {0};
|
||||
int64_t match_score = float_to_int64_t((1.0 / (res_index + 1)) * VECTOR_SEARCH_WEIGHT);
|
||||
int64_t match_score_index = -1;
|
||||
compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, doc_id, 0, match_score, scores, match_score_index, vec_result.second);
|
||||
KV kv(searched_queries.size(), doc_id, doc_id, match_score_index, scores);
|
||||
compute_sort_scores(sort_fields_std, sort_order, field_values, geopoint_indices, seq_id, 0, match_score, scores, match_score_index, vec_result.second);
|
||||
|
||||
uint64_t distinct_id = seq_id;
|
||||
if (group_limit != 0) {
|
||||
distinct_id = get_distinct_id(group_by_fields, seq_id);
|
||||
if(excluded_group_ids.count(distinct_id) != 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
KV kv(searched_queries.size(), seq_id, distinct_id, match_score_index, scores);
|
||||
kv.text_match_score = 0;
|
||||
kv.vector_distance = vec_result.second;
|
||||
|
||||
@ -2735,7 +2747,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
}
|
||||
|
||||
topster->add(&kv);
|
||||
vec_search_ids.push_back(doc_id);
|
||||
vec_search_ids.push_back(seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
@ -5804,7 +5816,7 @@ void Index::handle_doc_ops(const tsl::htrie_map<char, field>& search_schema,
|
||||
}
|
||||
}
|
||||
|
||||
void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<char, field>& search_schema,
|
||||
void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<char, field>& embedding_fields,
|
||||
nlohmann::json& update_doc, const nlohmann::json& old_doc, nlohmann::json& new_doc,
|
||||
nlohmann::json& del_doc) {
|
||||
|
||||
@ -5817,7 +5829,12 @@ void Index::get_doc_changes(const index_operation_t op, const tsl::htrie_map<cha
|
||||
}
|
||||
|
||||
if(!update_doc.contains(it.key())) {
|
||||
del_doc[it.key()] = it.value();
|
||||
// embedding field won't be part of upsert doc so populate new doc with the value from old doc
|
||||
if(embedding_fields.count(it.key()) != 0) {
|
||||
new_doc[it.key()] = it.value();
|
||||
} else {
|
||||
del_doc[it.key()] = it.value();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -221,6 +221,7 @@ std::vector<embedding_res_t> OpenAIEmbedder::batch_embed(const std::vector<std::
|
||||
}
|
||||
|
||||
nlohmann::json res_json;
|
||||
|
||||
try {
|
||||
res_json = nlohmann::json::parse(res);
|
||||
} catch (const std::exception& e) {
|
||||
@ -232,8 +233,21 @@ std::vector<embedding_res_t> OpenAIEmbedder::batch_embed(const std::vector<std::
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
if(res_json.count("data") == 0 || !res_json["data"].is_array() || res_json["data"].size() != inputs.size()) {
|
||||
std::vector<embedding_res_t> outputs;
|
||||
for(size_t i = 0; i < inputs.size(); i++) {
|
||||
outputs.push_back(embedding_res_t(500, "Got malformed response from OpenAI API."));
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
std::vector<embedding_res_t> outputs;
|
||||
for(auto& data : res_json["data"]) {
|
||||
if(data.count("embedding") == 0 || !data["embedding"].is_array() || data["embedding"].size() == 0) {
|
||||
outputs.push_back(embedding_res_t(500, "Got malformed response from OpenAI API."));
|
||||
continue;
|
||||
}
|
||||
outputs.push_back(embedding_res_t(data["embedding"].get<std::vector<float>>()));
|
||||
}
|
||||
|
||||
@ -577,7 +591,20 @@ std::vector<embedding_res_t> GCPEmbedder::batch_embed(const std::vector<std::str
|
||||
return outputs;
|
||||
}
|
||||
std::vector<embedding_res_t> outputs;
|
||||
|
||||
if(res_json.count("predictions") == 0 || !res_json["predictions"].is_array() || res_json["predictions"].size() != inputs.size()) {
|
||||
std::vector<embedding_res_t> outputs;
|
||||
for(size_t i = 0; i < inputs.size(); i++) {
|
||||
outputs.push_back(embedding_res_t(500, "Got malformed response from GCP API."));
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
for(const auto& prediction : res_json["predictions"]) {
|
||||
if(prediction.count("embeddings") == 0 || !prediction["embeddings"].is_object() || prediction["embeddings"].count("values") == 0 || !prediction["embeddings"]["values"].is_array() || prediction["embeddings"]["values"].size() == 0) {
|
||||
outputs.push_back(embedding_res_t(500, "Got malformed response from GCP API."));
|
||||
continue;
|
||||
}
|
||||
outputs.push_back(embedding_res_t(prediction["embeddings"]["values"].get<std::vector<float>>()));
|
||||
}
|
||||
|
||||
|
@ -119,7 +119,7 @@ int init_root_logger(Config & config, const std::string & server_version) {
|
||||
|
||||
if(log_dir.empty()) {
|
||||
// use console logger if log dir is not specified
|
||||
FLAGS_logtostdout = true;
|
||||
FLAGS_logtostderr = true;
|
||||
} else {
|
||||
if(!directory_exists(log_dir)) {
|
||||
std::cerr << "Typesense failed to start. " << "Log directory " << log_dir << " does not exist.";
|
||||
|
@ -224,6 +224,55 @@ TEST_F(CollectionVectorTest, BasicVectorQuerying) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, VectorUnchangedUpsert) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string"},
|
||||
{"name": "points", "type": "int32"},
|
||||
{"name": "vec", "type": "float[]", "num_dim": 3}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
std::vector<float> vec = {0.12, 0.45, 0.64};
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["id"] = "0";
|
||||
doc["title"] = "Title";
|
||||
doc["points"] = 100;
|
||||
doc["vec"] = vec;
|
||||
|
||||
auto add_op = coll1->add(doc.dump());
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.12, 0.44, 0.55])").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
|
||||
// upsert unchanged doc
|
||||
add_op = coll1->add(doc.dump(), index_operation_t::UPSERT);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
results = coll1->search("*", {}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {}, 0,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.12, 0.44, 0.55])").get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, NumVectorGreaterThanNumDim) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
@ -692,6 +741,88 @@ TEST_F(CollectionVectorTest, VectorWithNullValue) {
|
||||
nlohmann::json::parse(json_lines[1])["error"].get<std::string>());
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, EmbeddedVectorUnchangedUpsert) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string"},
|
||||
{"name": "points", "type": "int32"},
|
||||
{"name": "embedding", "type":"float[]", "embed":{"from": ["title"],
|
||||
"model_config": {"model_name": "ts/e5-small"}}}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["id"] = "0";
|
||||
doc["title"] = "Title";
|
||||
doc["points"] = 100;
|
||||
|
||||
auto add_op = coll1->add(doc.dump());
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>()).get();
|
||||
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
auto embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
|
||||
ASSERT_EQ(384, embedding.size());
|
||||
|
||||
// upsert unchanged doc
|
||||
doc.clear();
|
||||
doc["id"] = "0";
|
||||
doc["title"] = "Title";
|
||||
doc["points"] = 100;
|
||||
|
||||
add_op = coll1->add(doc.dump(), index_operation_t::UPSERT);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>()).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
|
||||
ASSERT_EQ(384, embedding.size());
|
||||
|
||||
// update
|
||||
|
||||
doc.clear();
|
||||
doc["id"] = "0";
|
||||
doc["title"] = "Title";
|
||||
doc["points"] = 100;
|
||||
|
||||
add_op = coll1->add(doc.dump(), index_operation_t::UPDATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>()).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
|
||||
ASSERT_EQ(384, embedding.size());
|
||||
|
||||
// emplace
|
||||
|
||||
doc.clear();
|
||||
doc["id"] = "0";
|
||||
doc["title"] = "Title";
|
||||
doc["points"] = 100;
|
||||
|
||||
add_op = coll1->add(doc.dump(), index_operation_t::EMPLACE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
results = coll1->search("title", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>()).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
|
||||
ASSERT_EQ(384, embedding.size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, HybridSearchWithExplicitVector) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "objects",
|
||||
@ -1099,7 +1230,67 @@ TEST_F(CollectionVectorTest, HideCredential) {
|
||||
ASSERT_EQ("***********", coll_summary["fields"][1]["embed"]["model_config"]["project_id"].get<std::string>());
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, UpdateOfCollWithNonOptionalEmbeddingField) {
|
||||
TEST_F(CollectionVectorTest, UpdateOfFieldReferencedByEmbedding) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "objects",
|
||||
"fields": [
|
||||
{"name": "name", "type": "string"},
|
||||
{"name": "embedding", "type":"float[]", "embed":{"from": ["name"],
|
||||
"model_config": {"model_name": "ts/e5-small"}}}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
|
||||
|
||||
auto op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll = op.get();
|
||||
|
||||
nlohmann::json object;
|
||||
object["id"] = "0";
|
||||
object["name"] = "butter";
|
||||
|
||||
auto add_op = coll->add(object.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
auto original_embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
|
||||
|
||||
nlohmann::json update_object;
|
||||
update_object["id"] = "0";
|
||||
update_object["name"] = "ghee";
|
||||
auto update_op = coll->add(update_object.dump(), EMPLACE);
|
||||
ASSERT_TRUE(update_op.ok());
|
||||
|
||||
results = coll->search("ghee", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
auto updated_embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
|
||||
ASSERT_NE(original_embedding, updated_embedding);
|
||||
|
||||
// action = update
|
||||
update_object["name"] = "milk";
|
||||
update_op = coll->add(update_object.dump(), UPDATE);
|
||||
ASSERT_TRUE(update_op.ok());
|
||||
|
||||
results = coll->search("milk", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
updated_embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
|
||||
ASSERT_NE(original_embedding, updated_embedding);
|
||||
|
||||
// action = upsert
|
||||
update_object["name"] = "cheese";
|
||||
update_op = coll->add(update_object.dump(), UPSERT);
|
||||
ASSERT_TRUE(update_op.ok());
|
||||
|
||||
results = coll->search("cheese", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
updated_embedding = results["hits"][0]["document"]["embedding"].get<std::vector<float>>();
|
||||
ASSERT_NE(original_embedding, updated_embedding);
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, UpdateOfFieldNotReferencedByEmbedding) {
|
||||
// test updates to a field that's not referred by an embedding field
|
||||
nlohmann::json schema = R"({
|
||||
"name": "objects",
|
||||
"fields": [
|
||||
@ -1123,16 +1314,34 @@ TEST_F(CollectionVectorTest, UpdateOfCollWithNonOptionalEmbeddingField) {
|
||||
auto add_op = coll->add(object.dump(), CREATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
nlohmann::json update_object;
|
||||
update_object["id"] = "0";
|
||||
update_object["about"] = "something about butter";
|
||||
auto update_op = coll->add(update_object.dump(), EMPLACE);
|
||||
ASSERT_TRUE(update_op.ok());
|
||||
|
||||
results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
// action = update
|
||||
update_object["about"] = "something about butter 2";
|
||||
update_op = coll->add(update_object.dump(), UPDATE);
|
||||
ASSERT_TRUE(update_op.ok());
|
||||
|
||||
results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
|
||||
// action = upsert
|
||||
update_object["name"] = "butter";
|
||||
update_object["about"] = "something about butter 3";
|
||||
update_op = coll->add(update_object.dump(), UPSERT);
|
||||
ASSERT_TRUE(update_op.ok());
|
||||
|
||||
results = coll->search("butter", {"embedding"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}).get();
|
||||
ASSERT_EQ(1, results["found"].get<size_t>());
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, FreshEmplaceWithOptionalEmbeddingReferencedField) {
|
||||
@ -1327,6 +1536,58 @@ TEST_F(CollectionVectorTest, KeywordSearchReturnOnlyTextMatchInfo) {
|
||||
ASSERT_EQ(1, results["hits"][0].count("text_match_info"));
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, GroupByWithVectorSearch) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string"},
|
||||
{"name": "group", "type": "string", "facet": true},
|
||||
{"name": "vec", "type": "float[]", "num_dim": 4}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection(schema).get();
|
||||
|
||||
std::vector<std::vector<float>> values = {
|
||||
{0.851758, 0.909671, 0.823431, 0.372063},
|
||||
{0.97826, 0.933157, 0.39557, 0.306488},
|
||||
{0.230606, 0.634397, 0.514009, 0.399594}
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < values.size(); i++) {
|
||||
nlohmann::json doc;
|
||||
doc["id"] = std::to_string(i);
|
||||
doc["title"] = std::to_string(i) + " title";
|
||||
doc["group"] = "0";
|
||||
doc["vec"] = values[i];
|
||||
ASSERT_TRUE(coll1->add(doc.dump()).ok());
|
||||
}
|
||||
|
||||
auto res = coll1->search("title", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {"group"}, 1,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488])").get();
|
||||
|
||||
ASSERT_EQ(1, res["grouped_hits"].size());
|
||||
ASSERT_EQ(1, res["grouped_hits"][0]["hits"].size());
|
||||
ASSERT_EQ(1, res["grouped_hits"][0]["hits"][0].count("vector_distance"));
|
||||
|
||||
res = coll1->search("*", {"title"}, "", {}, {}, {0}, 10, 1, FREQUENCY, {true}, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10, {}, {}, {"group"}, 1,
|
||||
"<mark>", "</mark>", {}, 1000, true, false, true, "", false, 6000 * 1000, 4, 7, fallback,
|
||||
4, {off}, 32767, 32767, 2,
|
||||
false, true, "vec:([0.96826, 0.94, 0.39557, 0.306488])").get();
|
||||
|
||||
ASSERT_EQ(1, res["grouped_hits"].size());
|
||||
ASSERT_EQ(1, res["grouped_hits"][0]["hits"].size());
|
||||
ASSERT_EQ(1, res["grouped_hits"][0]["hits"][0].count("vector_distance"));
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, HybridSearchReturnAllInfo) {
|
||||
auto schema_json =
|
||||
R"({
|
||||
|
@ -611,7 +611,7 @@ TEST_F(CoreAPIUtilsTest, MultiSearchWithPresetShouldUsePresetForAuth) {
|
||||
ASSERT_EQ(2, embedded_params_vec.size());
|
||||
}
|
||||
|
||||
TEST_F(CoreAPIUtilsTest, PresetSingleSearch) {
|
||||
TEST_F(CoreAPIUtilsTest, PresetMultiSearch) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "coll1",
|
||||
"fields": [
|
||||
@ -636,7 +636,7 @@ TEST_F(CoreAPIUtilsTest, PresetSingleSearch) {
|
||||
|
||||
auto search_body = R"(
|
||||
{"searches":[
|
||||
{"collection":"coll1","q":"apple", "query_by": "title", "preset": "single_preset"}
|
||||
{"collection":"coll1","q":"apple", "query_by": "name", "preset": "single_preset"}
|
||||
]}
|
||||
)";
|
||||
|
||||
@ -646,8 +646,40 @@ TEST_F(CoreAPIUtilsTest, PresetSingleSearch) {
|
||||
|
||||
post_multi_search(req, res);
|
||||
|
||||
ASSERT_EQ("12", req->params["per_page"]);
|
||||
ASSERT_EQ("coll1", req->params["collection"]);
|
||||
auto res_json = nlohmann::json::parse(res->body);
|
||||
ASSERT_EQ(1, res_json["results"].size());
|
||||
ASSERT_EQ(0, res_json["results"][0]["found"].get<size_t>());
|
||||
|
||||
// with multiple "searches" preset configuration
|
||||
preset_value = R"(
|
||||
{"searches":[
|
||||
{"collection":"coll1", "q": "*", "per_page": "8"},
|
||||
{"collection":"coll1", "q": "*", "per_page": "11"}
|
||||
]}
|
||||
)"_json;
|
||||
|
||||
collectionManager.upsert_preset("multi_preset", preset_value);
|
||||
embedded_params.clear();
|
||||
req->params.clear();
|
||||
req->params["preset"] = "multi_preset";
|
||||
req->embedded_params_vec.clear();
|
||||
req->embedded_params_vec.push_back(embedded_params);
|
||||
req->embedded_params_vec.push_back(embedded_params);
|
||||
|
||||
// "preset": "multi_preset"
|
||||
search_body = R"(
|
||||
{"searches":[
|
||||
{"collection":"coll1","q":"apple", "query_by": "title"}
|
||||
]}
|
||||
)";
|
||||
|
||||
req->body = search_body;
|
||||
|
||||
post_multi_search(req, res);
|
||||
res_json = nlohmann::json::parse(res->body);
|
||||
ASSERT_EQ(2, res_json["results"].size());
|
||||
ASSERT_EQ(0, res_json["results"][0]["found"].get<size_t>());
|
||||
ASSERT_EQ(0, res_json["results"][1]["found"].get<size_t>());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user