From 596c1c31f480f92c0169e864424dd2f0c93d0d99 Mon Sep 17 00:00:00 2001 From: ozanarmagan Date: Tue, 1 Aug 2023 11:06:14 +0300 Subject: [PATCH 1/4] Fix HybridSearchWithExplicitVector test --- test/collection_vector_search_test.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/collection_vector_search_test.cpp b/test/collection_vector_search_test.cpp index 7f1e7eef..84e54d03 100644 --- a/test/collection_vector_search_test.cpp +++ b/test/collection_vector_search_test.cpp @@ -775,7 +775,7 @@ TEST_F(CollectionVectorTest, HybridSearchWithExplicitVector) { ASSERT_EQ(2, search_res["found"].get()); ASSERT_EQ(2, search_res["hits"].size()); - ASSERT_FLOAT_EQ(0.0462081432, search_res["hits"][0]["vector_distance"].get()); + ASSERT_FLOAT_EQ(0.046207964, search_res["hits"][0]["vector_distance"].get()); ASSERT_FLOAT_EQ(0.1213316321, search_res["hits"][1]["vector_distance"].get()); // to pass k param @@ -825,10 +825,6 @@ TEST_F(CollectionVectorTest, HybridSearchWithExplicitVector) { ASSERT_FLOAT_EQ(2.0f, search_res["hits"][0]["vector_distance"].get()); ASSERT_FLOAT_EQ(2.0f, search_res["hits"][1]["vector_distance"].get()); ASSERT_FLOAT_EQ(2.0f, search_res["hits"][2]["vector_distance"].get()); - - ASSERT_FLOAT_EQ(2.0f, search_res["hits"][0]["hybrid_search_info"]["vector_distance"].get()); - ASSERT_FLOAT_EQ(2.0f, search_res["hits"][1]["hybrid_search_info"]["vector_distance"].get()); - ASSERT_FLOAT_EQ(2.0f, search_res["hits"][2]["hybrid_search_info"]["vector_distance"].get()); } TEST_F(CollectionVectorTest, HybridSearchOnlyVectorMatches) { From 7998054900349bcc8c073c42316fbfc0714136b1 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Tue, 1 Aug 2023 18:40:48 +0530 Subject: [PATCH 2/4] Exclude POST /health from batch indexer resource check. --- src/batched_indexer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/batched_indexer.cpp b/src/batched_indexer.cpp index 93a19382..be3c9974 100644 --- a/src/batched_indexer.cpp +++ b/src/batched_indexer.cpp @@ -200,7 +200,8 @@ void BatchedIndexer::run() { config.get_disk_used_max_percentage(), config.get_memory_used_max_percentage()); - if (resource_check != cached_resource_stat_t::OK && orig_req->http_method != "DELETE") { + if (resource_check != cached_resource_stat_t::OK && + orig_req->http_method != "DELETE" && found_rpath->handler != post_health) { const std::string& err_msg = "Rejecting write: running out of resource type: " + std::string(magic_enum::enum_name(resource_check)); LOG(ERROR) << err_msg; From 770c192e9762e088ba217989638674c42606d48a Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Wed, 2 Aug 2023 07:15:40 +0530 Subject: [PATCH 3/4] Add onnx patch for arm gpu for T4 --- bazel/onnx.patch | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/bazel/onnx.patch b/bazel/onnx.patch index 121a941d..0ceeb6db 100644 --- a/bazel/onnx.patch +++ b/bazel/onnx.patch @@ -11,6 +11,19 @@ index 88b46890b7..d090499971 100644 __cmake_contentNameLower __cmake_contentName + +--- cmake/CMakeLists.txt ++++ cmake/CMakeLists.txt +@@ -1268,6 +1268,7 @@ if (onnxruntime_USE_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_53,code=sm_53") # TX1, Nano + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_62,code=sm_62") # TX2 + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_72,code=sm_72") # AGX Xavier, NX Xavier ++ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_75,code=sm_75") # T4 + if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # AGX Orin, NX Orin + endif() + + diff --git a/.gitmodules b/.gitmodules index 8e4217162b..bb63b7d9c5 100644 --- .gitmodules From 06b64d8879a4a797f21974f5c11709511cc39e6b Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Wed, 2 Aug 2023 13:58:50 +0530 Subject: [PATCH 4/4] Deal with repeated facet values in arrays. --- src/index.cpp | 5 ++- test/collection_faceting_test.cpp | 74 ++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/src/index.cpp b/src/index.cpp index 7192faba..6a1c99bf 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -1056,6 +1056,8 @@ void Index::tokenize_string_array_with_facets(const std::vector& st std::unordered_map>& token_to_offsets, std::vector& facet_hashes) { + std::set facet_hash_set; // required to deal with repeating phrases + for(size_t array_index = 0; array_index < strings.size(); array_index++) { const std::string& str = strings[array_index]; std::set token_set; // required to deal with repeating tokens @@ -1089,8 +1091,9 @@ void Index::tokenize_string_array_with_facets(const std::vector& st } } - if(is_facet) { + if(is_facet && facet_hash_set.count(facet_hash) == 0) { facet_hashes.push_back(facet_hash); + facet_hash_set.insert(facet_hash); } if(token_set.empty()) { diff --git a/test/collection_faceting_test.cpp b/test/collection_faceting_test.cpp index ee9b798f..fec66c48 100644 --- a/test/collection_faceting_test.cpp +++ b/test/collection_faceting_test.cpp @@ -994,7 +994,7 @@ TEST_F(CollectionFacetingTest, FacetArrayValuesShouldBeNormalized) { ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); // any document is chosen as representative - ASSERT_EQ("bu-qu", results["facet_counts"][0]["counts"][0]["value"].get()); + ASSERT_EQ("BUQU", results["facet_counts"][0]["counts"][0]["value"].get()); collectionManager.drop_collection("coll1"); } @@ -1057,6 +1057,78 @@ TEST_F(CollectionFacetingTest, FacetByNestedIntField) { ASSERT_EQ("companyRank", wildcard_facets[1].field_name); } +TEST_F(CollectionFacetingTest, FacetByNestedArrayField) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": "data", "type": "object", "optional": false, "facet": true } + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll1 = op.get(); + + auto doc1 = R"({ + "data": {"details": [{"name": "Foo"}, {"name": "Foo"}]} + })"_json; + + auto doc2 = R"({ + "data": {"details": [{"name": "Foo"}, {"name": "Foo"}]} + })"_json; + + ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok()); + ASSERT_TRUE(coll1->add(doc2.dump(), CREATE).ok()); + + auto results = coll1->search("*", {}, "", {"data.details.name"}, {}, {0}, 10, 1, + token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4).get(); + + ASSERT_EQ(2, results["found"].get()); + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ("data.details.name", results["facet_counts"][0]["field_name"]); + ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"].get()); + ASSERT_EQ("Foo", results["facet_counts"][0]["counts"][0]["value"].get()); +} + +TEST_F(CollectionFacetingTest, FacetByArrayField) { + nlohmann::json schema = R"({ + "name": "coll1", + "enable_nested_fields": true, + "fields": [ + {"name": "data", "type": "string[]", "optional": false, "facet": true } + ] + })"_json; + + auto op = collectionManager.create_collection(schema); + ASSERT_TRUE(op.ok()); + Collection* coll1 = op.get(); + + auto doc1 = R"({ + "data": ["Foo", "Foo"] + })"_json; + + auto doc2 = R"({ + "data": ["Foo", "Foo"] + })"_json; + + ASSERT_TRUE(coll1->add(doc1.dump(), CREATE).ok()); + ASSERT_TRUE(coll1->add(doc2.dump(), CREATE).ok()); + + auto results = coll1->search("*", {}, "", {"data"}, {}, {0}, 10, 1, + token_ordering::FREQUENCY, {true}, 10, spp::sparse_hash_set(), + spp::sparse_hash_set(), 10, "", 30, 4).get(); + + ASSERT_EQ(2, results["found"].get()); + ASSERT_EQ(1, results["facet_counts"].size()); + ASSERT_EQ("data", results["facet_counts"][0]["field_name"]); + ASSERT_EQ(1, results["facet_counts"][0]["counts"].size()); + ASSERT_EQ(2, results["facet_counts"][0]["counts"][0]["count"].get()); + ASSERT_EQ("Foo", results["facet_counts"][0]["counts"][0]["value"].get()); +} + TEST_F(CollectionFacetingTest, FacetParseTest){ std::vector fields = { field("score", field_types::INT32, true),