mirror of
https://github.com/typesense/typesense.git
synced 2025-05-18 12:42:50 +08:00
Parameterize the token ordering field.
This commit is contained in:
parent
fab27d9f5c
commit
70dda716c5
2
TODO.md
2
TODO.md
@ -28,7 +28,7 @@
|
||||
- ~~Filters~~
|
||||
- ~~Facets~~
|
||||
- ~~Schema validation during insertion (missing fields + type errors)~~
|
||||
- Proper score field for ranking tokens
|
||||
- ~~Proper score field for ranking tokens~~
|
||||
- Prevent string copy during indexing
|
||||
- clean special chars before indexing
|
||||
- Minimum results should be a variable instead of blindly going with max_results
|
||||
|
@ -33,6 +33,8 @@ private:
|
||||
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_map<uint32_t, int64_t>*> rank_index;
|
||||
|
||||
std::string token_ordering_field;
|
||||
|
||||
std::string get_doc_id_key(std::string doc_id);
|
||||
|
||||
std::string get_seq_id_key(uint32_t seq_id);
|
||||
@ -80,7 +82,7 @@ public:
|
||||
|
||||
Collection(const std::string name, const uint32_t collection_id, const uint32_t next_seq_id, Store *store,
|
||||
const std::vector<field> & search_fields, const std::vector<field> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields);
|
||||
const std::vector<std::string> & rank_fields, const std::string token_ordering_field);
|
||||
|
||||
~Collection();
|
||||
|
||||
@ -102,6 +104,8 @@ public:
|
||||
|
||||
spp::sparse_hash_map<std::string, field> get_schema();
|
||||
|
||||
std::string get_token_ordering_field();
|
||||
|
||||
Option<std::string> add(std::string json_str);
|
||||
|
||||
nlohmann::json search(std::string query, const std::vector<std::string> search_fields,
|
||||
|
@ -23,6 +23,7 @@ private:
|
||||
static constexpr const char* COLLECTION_SEARCH_FIELDS_KEY = "search_fields";
|
||||
static constexpr const char* COLLECTION_FACET_FIELDS_KEY = "facet_fields";
|
||||
static constexpr const char* COLLECTION_RANK_FIELDS_KEY = "rank_fields";
|
||||
static constexpr const char* COLLECTION_TOKEN_ORDERING_FIELD_KEY = "token_ordering_field";
|
||||
|
||||
CollectionManager();
|
||||
|
||||
@ -41,7 +42,8 @@ public:
|
||||
|
||||
Collection* create_collection(std::string name, const std::vector<field> & search_fields,
|
||||
const std::vector<field> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields);
|
||||
const std::vector<std::string> & rank_fields,
|
||||
const std::string & token_ordering_field = "");
|
||||
|
||||
Collection* get_collection(std::string collection_name);
|
||||
|
||||
|
@ -9,8 +9,9 @@
|
||||
|
||||
Collection::Collection(const std::string name, const uint32_t collection_id, const uint32_t next_seq_id, Store *store,
|
||||
const std::vector<field> &search_fields, const std::vector<field> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields):
|
||||
name(name), collection_id(collection_id), next_seq_id(next_seq_id), store(store), rank_fields(rank_fields) {
|
||||
const std::vector<std::string> & rank_fields, const std::string token_ordering_field):
|
||||
name(name), collection_id(collection_id), next_seq_id(next_seq_id), store(store),
|
||||
rank_fields(rank_fields), token_ordering_field(token_ordering_field) {
|
||||
|
||||
for(const field& field: search_fields) {
|
||||
art_tree *t = new art_tree;
|
||||
@ -79,9 +80,22 @@ Option<std::string> Collection::add(std::string json_str) {
|
||||
}
|
||||
|
||||
Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uint32_t seq_id) {
|
||||
if(!token_ordering_field.empty() && document.count(token_ordering_field) == 0) {
|
||||
return Option<>(400, "Field `" + token_ordering_field + "` has been declared as a token ordering field, "
|
||||
"but is not found in the document.");
|
||||
}
|
||||
|
||||
if(!document[token_ordering_field].is_number()) {
|
||||
return Option<>(400, "Token ordering field `" + token_ordering_field + "` must be an INT32.");
|
||||
}
|
||||
|
||||
if(document[token_ordering_field].get<int64_t>() > INT32_MAX) {
|
||||
return Option<>(400, "Token ordering field `" + token_ordering_field + "` exceeds maximum value of INT32.");
|
||||
}
|
||||
|
||||
uint32_t points = 0;
|
||||
if(document.count("points") != 0) {
|
||||
points = document["points"];
|
||||
if(!token_ordering_field.empty()) {
|
||||
points = document[token_ordering_field];
|
||||
}
|
||||
|
||||
for(const std::pair<std::string, field> & field_pair: search_schema) {
|
||||
@ -1061,4 +1075,8 @@ std::string Collection::get_meta_key(std::string collection_name) {
|
||||
|
||||
std::string Collection::get_seq_id_collection_prefix() {
|
||||
return std::to_string(collection_id) + "_" + std::string(SEQ_ID_PREFIX);
|
||||
}
|
||||
|
||||
std::string Collection::get_token_ordering_field() {
|
||||
return token_ordering_field;
|
||||
}
|
@ -48,13 +48,16 @@ void CollectionManager::init(Store *store) {
|
||||
std::vector<std::string> collection_rank_fields =
|
||||
collection_meta[COLLECTION_RANK_FIELDS_KEY].get<std::vector<std::string>>();
|
||||
|
||||
std::string token_ordering_field = collection_meta[COLLECTION_TOKEN_ORDERING_FIELD_KEY].get<std::string>();
|
||||
|
||||
Collection* collection = new Collection(this_collection_name,
|
||||
collection_meta[COLLECTION_ID_KEY].get<uint32_t>(),
|
||||
collection_next_seq_id,
|
||||
store,
|
||||
search_fields,
|
||||
facet_fields,
|
||||
collection_rank_fields);
|
||||
collection_rank_fields,
|
||||
token_ordering_field);
|
||||
|
||||
// Fetch records from the store and re-create memory index
|
||||
std::vector<std::string> documents;
|
||||
@ -79,7 +82,8 @@ void CollectionManager::init(Store *store) {
|
||||
|
||||
Collection* CollectionManager::create_collection(std::string name, const std::vector<field> & search_fields,
|
||||
const std::vector<field> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields) {
|
||||
const std::vector<std::string> & rank_fields,
|
||||
const std::string & token_ordering_field) {
|
||||
if(store->contains(Collection::get_meta_key(name))) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -107,8 +111,10 @@ Collection* CollectionManager::create_collection(std::string name, const std::ve
|
||||
collection_meta[COLLECTION_SEARCH_FIELDS_KEY] = search_fields_json;
|
||||
collection_meta[COLLECTION_FACET_FIELDS_KEY] = facet_fields_json;
|
||||
collection_meta[COLLECTION_RANK_FIELDS_KEY] = rank_fields;
|
||||
|
||||
Collection* new_collection = new Collection(name, next_collection_id, 0, store, search_fields, facet_fields, rank_fields);
|
||||
collection_meta[COLLECTION_TOKEN_ORDERING_FIELD_KEY] = token_ordering_field;
|
||||
|
||||
Collection* new_collection = new Collection(name, next_collection_id, 0, store, search_fields, facet_fields,
|
||||
rank_fields, token_ordering_field);
|
||||
|
||||
store->insert(Collection::get_meta_key(name), collection_meta.dump());
|
||||
store->insert(Collection::get_next_seq_id_key(name), std::to_string(0));
|
||||
|
@ -26,7 +26,8 @@ protected:
|
||||
facet_fields = {field("starring", field_types::STRING)};
|
||||
rank_fields = {"points"};
|
||||
|
||||
collection1 = collectionManager.create_collection("collection1", search_fields, facet_fields, rank_fields);
|
||||
collection1 = collectionManager.create_collection("collection1", search_fields, facet_fields,
|
||||
rank_fields, "points");
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
@ -71,6 +72,7 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
ASSERT_EQ(facet_fields_expected, collection1->get_facet_fields());
|
||||
ASSERT_EQ(rank_fields, collection1->get_rank_fields());
|
||||
ASSERT_EQ(schema.size(), collection1->get_schema().size());
|
||||
ASSERT_EQ("points", collection1->get_token_ordering_field());
|
||||
|
||||
results = collection1->search("thomas", search_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
@ -31,7 +31,8 @@ protected:
|
||||
|
||||
collection = collectionManager.get_collection("collection");
|
||||
if(collection == nullptr) {
|
||||
collection = collectionManager.create_collection("collection", search_fields, facet_fields, rank_fields);
|
||||
collection = collectionManager.create_collection("collection", search_fields, facet_fields,
|
||||
rank_fields, "points");
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
@ -787,23 +788,24 @@ TEST_F(CollectionTest, IndexingWithBadData) {
|
||||
// should not crash when document to-be-indexed doesn't match schema
|
||||
Collection *sample_collection;
|
||||
|
||||
std::vector<field> fields = {field("name", field_types::STRING), field("age", field_types::INT32)};
|
||||
std::vector<field> fields = {field("name", field_types::STRING)};
|
||||
facet_fields = {field("tags", field_types::STRING_ARRAY)};
|
||||
std::vector<std::string> rank_fields = {"age", "average"};
|
||||
|
||||
sample_collection = collectionManager.get_collection("sample_collection");
|
||||
if(sample_collection == nullptr) {
|
||||
sample_collection = collectionManager.create_collection("sample_collection", fields, facet_fields, rank_fields);
|
||||
sample_collection = collectionManager.create_collection("sample_collection", fields, facet_fields,
|
||||
rank_fields, "age");
|
||||
}
|
||||
|
||||
const Option<std::string> & search_fields_missing_op1 = sample_collection->add("{\"namezz\": \"foo\"}");
|
||||
const Option<std::string> & search_fields_missing_op1 = sample_collection->add("{\"namezz\": \"foo\", \"age\": 29}");
|
||||
ASSERT_FALSE(search_fields_missing_op1.ok());
|
||||
ASSERT_STREQ("Field `name` has been declared as a search field in the schema, but is not found in the document.",
|
||||
search_fields_missing_op1.error().c_str());
|
||||
|
||||
const Option<std::string> & search_fields_missing_op2 = sample_collection->add("{\"name\": \"foo\", \"agez\": 34}");
|
||||
const Option<std::string> & search_fields_missing_op2 = sample_collection->add("{\"namez\": \"foo\", \"age\": 34}");
|
||||
ASSERT_FALSE(search_fields_missing_op2.ok());
|
||||
ASSERT_STREQ("Field `age` has been declared as a search field in the schema, but is not found in the document.",
|
||||
ASSERT_STREQ("Field `name` has been declared as a search field in the schema, but is not found in the document.",
|
||||
search_fields_missing_op2.error().c_str());
|
||||
|
||||
const Option<std::string> & facet_fields_missing_op1 = sample_collection->add("{\"name\": \"foo\", \"age\": 34}");
|
||||
@ -830,9 +832,14 @@ TEST_F(CollectionTest, IndexingWithBadData) {
|
||||
ASSERT_TRUE(empty_facet_field_op.ok());
|
||||
|
||||
doc_str = "{\"name\": \"foo\", \"age\": \"34\", \"tags\": [], \"average\": 34 }";
|
||||
const Option<std::string> & bad_search_field_op = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_search_field_op.ok());
|
||||
ASSERT_STREQ("Search field `age` must be an INT32.", bad_search_field_op.error().c_str());
|
||||
const Option<std::string> & bad_token_ordering_field_op1 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_token_ordering_field_op1.ok());
|
||||
ASSERT_STREQ("Token ordering field `age` must be an INT32.", bad_token_ordering_field_op1.error().c_str());
|
||||
|
||||
doc_str = "{\"name\": \"foo\", \"age\": 343234324234233234, \"tags\": [], \"average\": 34 }";
|
||||
const Option<std::string> & bad_token_ordering_field_op2 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_token_ordering_field_op2.ok());
|
||||
ASSERT_STREQ("Token ordering field `age` exceeds maximum value of INT32.", bad_token_ordering_field_op2.error().c_str());
|
||||
|
||||
doc_str = "{\"name\": \"foo\", \"age\": 34, \"tags\": [], \"average\": \"34\"}";
|
||||
const Option<std::string> & bad_rank_field_op = sample_collection->add(doc_str);
|
||||
|
Loading…
x
Reference in New Issue
Block a user