mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 12:42:50 +08:00
Merge branch 'v0.25-join' into v0.26-facets
This commit is contained in:
commit
bfa744353b
@ -79,7 +79,7 @@ public:
|
||||
Option<bool> remove_rule(const std::string& name);
|
||||
|
||||
void add_suggestion(const std::string& query_collection,
|
||||
std::string& query, bool live_query, const std::string& user_id);
|
||||
const std::string& query, bool live_query, const std::string& user_id);
|
||||
|
||||
void stop();
|
||||
|
||||
|
@ -88,5 +88,5 @@ public:
|
||||
|
||||
bool should_skip_char(char c);
|
||||
|
||||
static void normalize_ascii(std::string& text);
|
||||
static std::string normalize_ascii_no_spaces(const std::string& text);
|
||||
};
|
@ -787,6 +787,10 @@ public:
|
||||
cors_domains.insert(cors_values_vec.begin(), cors_values_vec.end());
|
||||
}
|
||||
|
||||
void set_enable_search_analytics(bool enable_search_analytics) {
|
||||
this->enable_search_analytics = enable_search_analytics;
|
||||
}
|
||||
|
||||
// validation
|
||||
|
||||
Option<bool> is_valid() {
|
||||
|
@ -203,7 +203,7 @@ Option<bool> AnalyticsManager::remove_popular_queries_index(const std::string &n
|
||||
return Option<bool>(true);
|
||||
}
|
||||
|
||||
void AnalyticsManager::add_suggestion(const std::string &query_collection, std::string &query,
|
||||
void AnalyticsManager::add_suggestion(const std::string &query_collection, const std::string& query,
|
||||
const bool live_query, const std::string& user_id) {
|
||||
// look up suggestion collections for the query collection
|
||||
std::unique_lock lock(mutex);
|
||||
@ -212,7 +212,6 @@ void AnalyticsManager::add_suggestion(const std::string &query_collection, std::
|
||||
for(const auto& suggestion_collection: suggestion_collections_it->second) {
|
||||
const auto& popular_queries_it = popular_queries.find(suggestion_collection);
|
||||
if(popular_queries_it != popular_queries.end()) {
|
||||
Tokenizer::normalize_ascii(query);
|
||||
popular_queries_it->second->add(query, live_query, user_id);
|
||||
}
|
||||
}
|
||||
@ -235,6 +234,8 @@ void AnalyticsManager::run(ReplicationState* raft_server) {
|
||||
}
|
||||
|
||||
persist_suggestions(raft_server, prev_persistence_s);
|
||||
prev_persistence_s = std::chrono::duration_cast<std::chrono::seconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
lk.unlock();
|
||||
}
|
||||
@ -270,8 +271,6 @@ void AnalyticsManager::persist_suggestions(ReplicationState *raft_server, uint64
|
||||
continue;
|
||||
}
|
||||
|
||||
prev_persistence_s = now_ts_seconds;
|
||||
|
||||
std::string import_payload;
|
||||
popularQueries->serialize_as_docs(import_payload);
|
||||
|
||||
|
@ -5110,9 +5110,13 @@ void Collection::hide_credential(nlohmann::json& json, const std::string& creden
|
||||
}
|
||||
|
||||
Option<bool> Collection::truncate_after_top_k(const string &field_name, size_t k) {
|
||||
std::shared_lock slock(mutex);
|
||||
|
||||
std::vector<uint32_t> seq_ids;
|
||||
auto op = index->seq_ids_outside_top_k(field_name, k, seq_ids);
|
||||
|
||||
slock.unlock();
|
||||
|
||||
if(!op.ok()) {
|
||||
return op;
|
||||
}
|
||||
|
@ -1242,7 +1242,7 @@ Option<bool> CollectionManager::do_search(std::map<std::string, std::string>& re
|
||||
|
||||
if(Config::get_instance().get_enable_search_analytics()) {
|
||||
if(result.count("found") != 0 && result["found"].get<size_t>() != 0) {
|
||||
std::string analytics_query = raw_query;
|
||||
std::string analytics_query = Tokenizer::normalize_ascii_no_spaces(raw_query);
|
||||
AnalyticsManager::get_instance().add_suggestion(orig_coll_name, analytics_query,
|
||||
true, req_params["x-typesense-user-id"]);
|
||||
}
|
||||
|
@ -607,7 +607,7 @@ Option<bool> field::json_fields_to_fields(bool enable_nested_fields, nlohmann::j
|
||||
}
|
||||
|
||||
if(!the_fields.empty() && !the_fields.back().embed.empty()) {
|
||||
embed_json_field_indices.emplace_back(i, i);
|
||||
embed_json_field_indices.emplace_back(i, the_fields.size()-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5886,6 +5886,7 @@ size_t Index::num_seq_ids() const {
|
||||
|
||||
Option<bool> Index::seq_ids_outside_top_k(const std::string& field_name, size_t k,
|
||||
std::vector<uint32_t>& outside_seq_ids) {
|
||||
std::shared_lock lock(mutex);
|
||||
if (numerical_index.count(field_name) != 0) {
|
||||
auto field_it = numerical_index.find(field_name);
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <string_utils.h>
|
||||
#include "tokenizer.h"
|
||||
|
||||
Tokenizer::Tokenizer(const std::string& input, bool normalize, bool no_op, const std::string& locale,
|
||||
@ -349,10 +350,15 @@ bool Tokenizer::should_skip_char(char c) {
|
||||
return is_ascii_char(c) && get_stream_mode(c) != INDEX;
|
||||
}
|
||||
|
||||
void Tokenizer::normalize_ascii(std::string& text) {
|
||||
for(size_t i = 0; i < text.size(); i++) {
|
||||
std::string Tokenizer::normalize_ascii_no_spaces(const std::string& text) {
|
||||
std::string analytics_query = text;
|
||||
StringUtils::trim(analytics_query);
|
||||
|
||||
for(size_t i = 0; i < analytics_query.size(); i++) {
|
||||
if(is_ascii_char(text[i])) {
|
||||
text[i] = std::tolower(text[i]);
|
||||
analytics_query[i] = std::tolower(analytics_query[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return analytics_query;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <collection_manager.h>
|
||||
#include <analytics_manager.h>
|
||||
#include "string_utils.h"
|
||||
#include "collection.h"
|
||||
|
||||
@ -24,6 +25,8 @@ protected:
|
||||
collectionManager.init(store, 1.0, "auth_key", quit);
|
||||
collectionManager.load(8, 1000);
|
||||
|
||||
AnalyticsManager::get_instance().init(store);
|
||||
|
||||
schema = R"({
|
||||
"name": "collection1",
|
||||
"enable_nested_fields": true,
|
||||
@ -587,6 +590,67 @@ TEST_F(CollectionManagerTest, VerifyEmbeddedParametersOfScopedAPIKey) {
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionManagerTest, QuerySuggestionsShouldBeTrimmed) {
|
||||
std::vector<field> fields = {field("title", field_types::STRING, false, false, true, "", -1, 1),
|
||||
field("year", field_types::INT32, false),
|
||||
field("points", field_types::INT32, false),};
|
||||
|
||||
Collection* coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
|
||||
nlohmann::json doc1;
|
||||
doc1["id"] = "0";
|
||||
doc1["title"] = "Tom Sawyer";
|
||||
doc1["year"] = 1876;
|
||||
doc1["points"] = 100;
|
||||
|
||||
ASSERT_TRUE(coll1->add(doc1.dump()).ok());
|
||||
|
||||
Config::get_instance().set_enable_search_analytics(true);
|
||||
|
||||
nlohmann::json analytics_rule = R"({
|
||||
"name": "top_search_queries",
|
||||
"type": "popular_queries",
|
||||
"params": {
|
||||
"limit": 100,
|
||||
"source": {
|
||||
"collections": ["coll1"]
|
||||
},
|
||||
"destination": {
|
||||
"collection": "top_queries"
|
||||
}
|
||||
}
|
||||
})"_json;
|
||||
|
||||
auto create_op = AnalyticsManager::get_instance().create_rule(analytics_rule, false, true);
|
||||
ASSERT_TRUE(create_op.ok());
|
||||
|
||||
nlohmann::json embedded_params;
|
||||
std::map<std::string, std::string> req_params;
|
||||
req_params["collection"] = "coll1";
|
||||
req_params["q"] = " tom ";
|
||||
req_params["query_by"] = "title";
|
||||
|
||||
std::string json_res;
|
||||
auto now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
|
||||
auto search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
ASSERT_TRUE(search_op.ok());
|
||||
|
||||
json_res.clear();
|
||||
req_params["q"] = " ";
|
||||
search_op = collectionManager.do_search(req_params, embedded_params, json_res, now_ts);
|
||||
ASSERT_TRUE(search_op.ok());
|
||||
|
||||
// check that suggestions have been trimmed
|
||||
auto popular_queries = AnalyticsManager::get_instance().get_popular_queries();
|
||||
ASSERT_EQ(2, popular_queries["top_queries"]->get_user_prefix_queries()[""].size());
|
||||
ASSERT_EQ("tom", popular_queries["top_queries"]->get_user_prefix_queries()[""][0].query);
|
||||
ASSERT_EQ("", popular_queries["top_queries"]->get_user_prefix_queries()[""][1].query);
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionManagerTest, RestoreAutoSchemaDocsOnRestart) {
|
||||
Collection *coll1;
|
||||
|
||||
|
@ -1161,6 +1161,27 @@ TEST_F(CollectionVectorTest, FreshEmplaceWithOptionalEmbeddingReferencedField) {
|
||||
"or make the embedding field optional.", add_op.error());
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, EmbeddingFieldWithIdFieldPrecedingInSchema) {
|
||||
auto schema = R"({
|
||||
"name": "objects",
|
||||
"fields": [
|
||||
{"name": "id", "type": "string"},
|
||||
{"name": "name", "type": "string"},
|
||||
{"name": "embedding", "type":"float[]", "embed":{"from": ["name"], "model_config": {"model_name": "ts/e5-small"}}}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
TextEmbedderManager::set_model_dir("/tmp/typesense_test/models");
|
||||
|
||||
auto op = collectionManager.create_collection(schema);
|
||||
ASSERT_TRUE(op.ok());
|
||||
Collection* coll = op.get();
|
||||
|
||||
auto fs = coll->get_fields();
|
||||
ASSERT_EQ(2, fs.size());
|
||||
ASSERT_EQ(384, fs[1].num_dim);
|
||||
}
|
||||
|
||||
TEST_F(CollectionVectorTest, SkipEmbeddingOpWhenValueExists) {
|
||||
nlohmann::json schema = R"({
|
||||
"name": "objects",
|
||||
|
Loading…
x
Reference in New Issue
Block a user