mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 22:33:27 +08:00
Merge branch 'event_anaylytics' of https://github.com/krunal1313/typesense into event_anaylytics
This commit is contained in:
commit
471dccc42e
4
.github/workflows/tests.yml
vendored
4
.github/workflows/tests.yml
vendored
@ -38,15 +38,17 @@ jobs:
|
||||
uses: bazelbuild/setup-bazelisk@v2
|
||||
|
||||
- name: Download bazel cache
|
||||
uses: dawidd6/action-download-artifact@v2
|
||||
uses: jasonbosco/action-download-artifact@709b71d3729e8980f52a5a2a9ec04261060945c1
|
||||
with:
|
||||
name: bazel-cache
|
||||
search_artifacts: true
|
||||
workflow_conclusion: ""
|
||||
if_no_artifact_found: warn
|
||||
skip_unpack: true
|
||||
|
||||
- name: Uncompress bazel cache
|
||||
run: |
|
||||
unzip bazel-cache.zip
|
||||
mkdir -p ~/.cache/bazel
|
||||
tar_file="bazel-cache.tar.gz" && \
|
||||
[ -f "$tar_file" ] && \
|
||||
|
@ -276,12 +276,26 @@ struct index_record {
|
||||
class VectorFilterFunctor: public hnswlib::BaseFilterFunctor {
|
||||
filter_result_iterator_t* const filter_result_iterator;
|
||||
|
||||
const uint32_t* excluded_ids = nullptr;
|
||||
const uint32_t excluded_ids_length = 0;
|
||||
|
||||
public:
|
||||
explicit VectorFilterFunctor(filter_result_iterator_t* const filter_result_iterator) :
|
||||
filter_result_iterator(filter_result_iterator) {}
|
||||
|
||||
explicit VectorFilterFunctor(filter_result_iterator_t* const filter_result_iterator,
|
||||
const uint32_t* excluded_ids = nullptr, const uint32_t excluded_ids_length = 0) :
|
||||
filter_result_iterator(filter_result_iterator),
|
||||
excluded_ids(excluded_ids), excluded_ids_length(excluded_ids_length) {}
|
||||
|
||||
bool operator()(hnswlib::labeltype id) override {
|
||||
if (filter_result_iterator->approx_filter_ids_length == 0) {
|
||||
if (filter_result_iterator->approx_filter_ids_length == 0 && excluded_ids_length == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if(excluded_ids_length > 0 && excluded_ids && std::binary_search(excluded_ids, excluded_ids + excluded_ids_length, id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if(filter_result_iterator->approx_filter_ids_length == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,9 @@ embedding_res_t CLIPImageEmbedder::embed(const std::string& encoded_image) {
|
||||
auto processed_image_op = image_processor_.process_image(encoded_image);
|
||||
|
||||
if (!processed_image_op.ok()) {
|
||||
return embedding_res_t(processed_image_op.code(), processed_image_op.error());
|
||||
nlohmann::json error_json;
|
||||
error_json["error"] = processed_image_op.error();
|
||||
return embedding_res_t(processed_image_op.code(), error_json);
|
||||
}
|
||||
|
||||
auto processed_image = processed_image_op.get();
|
||||
@ -58,7 +60,9 @@ std::vector<embedding_res_t> CLIPImageEmbedder::batch_embed(const std::vector<st
|
||||
auto processed_image_op = image_processor_.process_image(input);
|
||||
|
||||
if (!processed_image_op.ok()) {
|
||||
results[i] = embedding_res_t(processed_image_op.code(), processed_image_op.error());
|
||||
nlohmann::json error_json;
|
||||
error_json["error"] = processed_image_op.error();
|
||||
results[i] = embedding_res_t(processed_image_op.code(), error_json);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
@ -67,6 +71,17 @@ std::vector<embedding_res_t> CLIPImageEmbedder::batch_embed(const std::vector<st
|
||||
i++;
|
||||
}
|
||||
|
||||
|
||||
// no valid images
|
||||
if (processed_images.empty()) {
|
||||
std::vector<embedding_res_t> result_vector(inputs.size());
|
||||
for (int i = 0; i < inputs.size(); i++) {
|
||||
result_vector[i] = results[i];
|
||||
}
|
||||
|
||||
return result_vector;
|
||||
}
|
||||
|
||||
// create input tensor
|
||||
std::vector<int64_t> input_shape = {static_cast<int64_t>(processed_images.size()), 3, 224, 224};
|
||||
std::vector<const char*> input_names = {"input_ids", "pixel_values", "attention_mask"};
|
||||
|
@ -36,8 +36,7 @@ Option<processed_image_t> CLIPImageProcessor::process_image(const std::string& i
|
||||
LOG(INFO) << "Running image processor";
|
||||
try {
|
||||
output_tensors = session_->Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), output_names.size());
|
||||
} catch (const std::exception& e) {
|
||||
LOG(INFO) << "Error while running image processor: " << e.what();
|
||||
} catch (...) {
|
||||
return Option<processed_image_t>(400, "Error while processing image");
|
||||
}
|
||||
|
||||
|
@ -2542,6 +2542,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
k++;
|
||||
}
|
||||
|
||||
VectorFilterFunctor filterFunctor(filter_result_iterator, excluded_result_ids, excluded_result_ids_size);
|
||||
auto& field_vector_index = vector_index.at(vector_query.field_name);
|
||||
|
||||
std::vector<std::pair<float, single_filter_result_t>> dist_results;
|
||||
@ -2946,7 +2947,7 @@ Option<bool> Index::search(std::vector<query_tokens_t>& field_query_tokens, cons
|
||||
const float VECTOR_SEARCH_WEIGHT = vector_query.alpha;
|
||||
const float TEXT_MATCH_WEIGHT = 1.0 - VECTOR_SEARCH_WEIGHT;
|
||||
|
||||
VectorFilterFunctor filterFunctor(filter_result_iterator);
|
||||
VectorFilterFunctor filterFunctor(filter_result_iterator, excluded_result_ids, excluded_result_ids_size);
|
||||
auto& field_vector_index = vector_index.at(vector_query.field_name);
|
||||
|
||||
std::vector<std::pair<float, size_t>> dist_labels;
|
||||
|
@ -106,7 +106,7 @@ Option<bool> OpenAIEmbedder::is_model_valid(const nlohmann::json& model_config,
|
||||
if(json_res.count("error") == 0 || json_res["error"].count("message") == 0) {
|
||||
return Option<bool>(400, "OpenAI API error: " + res);
|
||||
}
|
||||
return Option<bool>(400, "OpenAI API error: " + nlohmann::json::parse(res)["error"]["message"].get<std::string>());
|
||||
return Option<bool>(400, "OpenAI API error: " + json_res["error"]["message"].get<std::string>());
|
||||
}
|
||||
|
||||
nlohmann::json models_json;
|
||||
@ -152,7 +152,7 @@ Option<bool> OpenAIEmbedder::is_model_valid(const nlohmann::json& model_config,
|
||||
if(json_res.count("error") == 0 || json_res["error"].count("message") == 0) {
|
||||
return Option<bool>(400, "OpenAI API error: " + embedding_res);
|
||||
}
|
||||
return Option<bool>(400, "OpenAI API error: " + nlohmann::json::parse(res)["error"]["message"].get<std::string>());
|
||||
return Option<bool>(400, "OpenAI API error: " + json_res["error"]["message"].get<std::string>());
|
||||
}
|
||||
std::vector<float> embedding;
|
||||
try {
|
||||
@ -337,7 +337,7 @@ Option<bool> GoogleEmbedder::is_model_valid(const nlohmann::json& model_config,
|
||||
return Option<bool>(400, "Google API error: " + res);
|
||||
}
|
||||
|
||||
return Option<bool>(400, "Google API error: " + nlohmann::json::parse(res)["error"]["message"].get<std::string>());
|
||||
return Option<bool>(400, "Google API error: " + json_res["error"]["message"].get<std::string>());
|
||||
}
|
||||
|
||||
try {
|
||||
@ -477,7 +477,7 @@ Option<bool> GCPEmbedder::is_model_valid(const nlohmann::json& model_config, siz
|
||||
if(json_res.count("error") == 0 || json_res["error"].count("message") == 0) {
|
||||
return Option<bool>(400, "GCP API error: " + res);
|
||||
}
|
||||
return Option<bool>(400, "GCP API error: " + nlohmann::json::parse(res)["error"]["message"].get<std::string>());
|
||||
return Option<bool>(400, "GCP API error: " + json_res["error"]["message"].get<std::string>());
|
||||
}
|
||||
nlohmann::json res_json;
|
||||
try {
|
||||
@ -680,7 +680,7 @@ Option<std::string> GCPEmbedder::generate_access_token(const std::string& refres
|
||||
if(res_code == 408) {
|
||||
return Option<std::string>(408, "GCP API timeout.");
|
||||
}
|
||||
return Option<std::string>(400, "GCP API error: " + nlohmann::json::parse(res)["error"]["message"].get<std::string>());
|
||||
return Option<std::string>(400, "GCP API error: " + json_res["error"]["message"].get<std::string>());
|
||||
}
|
||||
nlohmann::json res_json;
|
||||
try {
|
||||
|
@ -2987,7 +2987,6 @@ TEST_F(CollectionVectorTest, TestImageEmbedding) {
|
||||
|
||||
auto coll = collection_create_op.get();
|
||||
|
||||
LOG(INFO) << "Adding image to collection";
|
||||
|
||||
auto add_op = coll->add(R"({
|
||||
"name": "dog",
|
||||
@ -3027,6 +3026,83 @@ TEST_F(CollectionVectorTest, TestImageEmbedding) {
|
||||
ASSERT_EQ(results2["hits"][1]["document"]["id"], "0");
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, TestHybridSearchHiddenHits) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "test",
|
||||
"fields": [
|
||||
{
|
||||
"name": "name",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "embedding",
|
||||
"type": "float[]",
|
||||
"embed": {
|
||||
"from": [
|
||||
"name"
|
||||
],
|
||||
"model_config": {
|
||||
"model_name": "ts/e5-small"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
EmbedderManager::set_model_dir("/tmp/typesense_test/models");
|
||||
|
||||
auto collection_create_op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
auto coll = collection_create_op.get();
|
||||
|
||||
auto add_op = coll->add(R"({
|
||||
"name": "soccer",
|
||||
"id": "0"
|
||||
})"_json.dump());
|
||||
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
add_op = coll->add(R"({
|
||||
"name": "guitar",
|
||||
"id": "1"
|
||||
})"_json.dump());
|
||||
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
add_op = coll->add(R"({
|
||||
"name": "typesense",
|
||||
"id": "2"
|
||||
})"_json.dump());
|
||||
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
add_op = coll->add(R"({
|
||||
"name": "potato",
|
||||
"id": "3"
|
||||
})"_json.dump());
|
||||
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto results = coll->search("sports", {"name", "embedding"},
|
||||
"", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
0, spp::sparse_hash_set<std::string>()).get();
|
||||
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get<std::string>().c_str());
|
||||
|
||||
|
||||
// do hybrid search with hidden_hits
|
||||
auto hybrid_results = coll->search("sports", {"name", "embedding"},
|
||||
"", {}, {}, {2}, 10,
|
||||
1, FREQUENCY, {true},
|
||||
0, spp::sparse_hash_set<std::string>(), spp::sparse_hash_set<std::string>(), 10, "", 30, 4, "", 1, "", "0").get();
|
||||
|
||||
ASSERT_EQ(3, hybrid_results["hits"].size());
|
||||
ASSERT_FALSE(hybrid_results["hits"][0]["document"]["id"] == 0);
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, TryAddingMultipleImageFieldToEmbedFrom) {
|
||||
auto schema_json =
|
||||
R"({
|
||||
@ -3045,4 +3121,34 @@ TEST_F(CollectionVectorTest, TryAddingMultipleImageFieldToEmbedFrom) {
|
||||
ASSERT_FALSE(collection_create_op.ok());
|
||||
|
||||
ASSERT_EQ(collection_create_op.error(), "Only one field can be used in the `embed.from` property of an embed field when embedding from an image field.");
|
||||
}
|
||||
}
|
||||
<<<<<<< Updated upstream
|
||||
|
||||
TEST_F(CollectionVectorTest, TestInvalidImage) {
|
||||
auto schema_json =
|
||||
R"({
|
||||
"name": "Images",
|
||||
"fields": [
|
||||
{"name": "name", "type": "string"},
|
||||
{"name": "image", "type": "image", "store": false},
|
||||
{"name": "embedding", "type":"float[]", "embed":{"from": ["image"], "model_config": {"model_name": "ts/clip-vit-b-p32"}}}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
EmbedderManager::set_model_dir("/tmp/typesense_test/models");
|
||||
|
||||
auto collection_create_op = collectionManager.create_collection(schema_json);
|
||||
ASSERT_TRUE(collection_create_op.ok());
|
||||
|
||||
auto coll = collection_create_op.get();
|
||||
|
||||
auto add_op = coll->add(R"({
|
||||
"name": "teddy bear",
|
||||
"image": "invalid"
|
||||
})"_json.dump());
|
||||
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
|
||||
ASSERT_EQ(add_op.error(), "Error while processing image");
|
||||
|
||||
}
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
TEST(FacetIndexTest, FacetValueDeletionString) {
|
||||
facet_index_t findex;
|
||||
findex.initialize("brand");
|
||||
|
||||
std::unordered_map<facet_value_id_t, std::vector<uint32_t>, facet_value_id_t::Hash> fvalue_to_seq_ids;
|
||||
std::unordered_map<uint32_t, std::vector<facet_value_id_t>> seq_id_to_fvalues;
|
||||
|
||||
@ -18,15 +20,54 @@ TEST(FacetIndexTest, FacetValueDeletionString) {
|
||||
doc["brand"] = "nike";
|
||||
|
||||
findex.insert("brand", fvalue_to_seq_ids, seq_id_to_fvalues, true);
|
||||
ASSERT_EQ(3, findex.facet_val_num_ids("brand", "nike"));
|
||||
|
||||
findex.remove(doc, brandf, 0);
|
||||
findex.remove(doc, brandf, 1);
|
||||
findex.remove(doc, brandf, 2);
|
||||
ASSERT_EQ(1, findex.facet_val_num_ids("brand", "nike"));
|
||||
|
||||
findex.remove(doc, brandf, 2);
|
||||
ASSERT_FALSE(findex.facet_value_exists("brand", "nike"));
|
||||
}
|
||||
|
||||
TEST(FacetIndexTest, FacetValueDeletionOfLongString) {
|
||||
facet_index_t findex;
|
||||
findex.initialize("brand");
|
||||
|
||||
std::unordered_map<facet_value_id_t, std::vector<uint32_t>, facet_value_id_t::Hash> fvalue_to_seq_ids;
|
||||
std::unordered_map<uint32_t, std::vector<facet_value_id_t>> seq_id_to_fvalues;
|
||||
|
||||
std::string longval;
|
||||
|
||||
for(size_t i = 0; i < 200; i++) {
|
||||
longval += "a";
|
||||
}
|
||||
|
||||
facet_value_id_t longfval(longval.substr(0, 100), 1);
|
||||
|
||||
fvalue_to_seq_ids[longfval] = {0, 1, 2};
|
||||
seq_id_to_fvalues[0] = {longfval};
|
||||
seq_id_to_fvalues[1] = {longfval};
|
||||
seq_id_to_fvalues[2] = {longfval};
|
||||
|
||||
field brandf("brand", field_types::STRING, true);
|
||||
nlohmann::json doc;
|
||||
doc["brand"] = longval;
|
||||
|
||||
findex.insert("brand", fvalue_to_seq_ids, seq_id_to_fvalues, true);
|
||||
ASSERT_EQ(3, findex.facet_val_num_ids("brand", longval.substr(0, 100)));
|
||||
|
||||
findex.remove(doc, brandf, 0);
|
||||
findex.remove(doc, brandf, 1);
|
||||
ASSERT_EQ(1, findex.facet_val_num_ids("brand", longval.substr(0, 100)));
|
||||
|
||||
findex.remove(doc, brandf, 2);
|
||||
ASSERT_FALSE(findex.facet_value_exists("brand", longval.substr(0, 100)));
|
||||
}
|
||||
|
||||
TEST(FacetIndexTest, FacetValueDeletionFloat) {
|
||||
facet_index_t findex;
|
||||
findex.initialize("price");
|
||||
std::unordered_map<facet_value_id_t, std::vector<uint32_t>, facet_value_id_t::Hash> fvalue_to_seq_ids;
|
||||
std::unordered_map<uint32_t, std::vector<facet_value_id_t>> seq_id_to_fvalues;
|
||||
|
||||
@ -39,13 +80,16 @@ TEST(FacetIndexTest, FacetValueDeletionFloat) {
|
||||
|
||||
field pricef("price", field_types::FLOAT, true);
|
||||
nlohmann::json doc;
|
||||
doc["price"] = "99.95";
|
||||
doc["price"] = 99.95;
|
||||
|
||||
findex.insert("price", fvalue_to_seq_ids, seq_id_to_fvalues, true);
|
||||
ASSERT_EQ(3, findex.facet_val_num_ids("price", "99.95"));
|
||||
|
||||
findex.insert("brand", fvalue_to_seq_ids, seq_id_to_fvalues, true);
|
||||
findex.remove(doc, pricef, 0);
|
||||
findex.remove(doc, pricef, 1);
|
||||
findex.remove(doc, pricef, 2);
|
||||
ASSERT_EQ(1, findex.facet_val_num_ids("price", "99.95"));
|
||||
|
||||
findex.remove(doc, pricef, 2);
|
||||
ASSERT_FALSE(findex.facet_value_exists("price", "99.95"));
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user