mirror of
https://github.com/typesense/typesense.git
synced 2025-05-21 22:33:27 +08:00
Persist vector field properties and restore.
This commit is contained in:
parent
c7f879bf30
commit
49268d648e
@ -293,6 +293,16 @@ struct field {
|
||||
|
||||
field_val[fields::locale] = field.locale;
|
||||
|
||||
field_val[fields::nested] = field.nested;
|
||||
if(field.nested) {
|
||||
field_val[fields::nested_array] = field.nested_array;
|
||||
}
|
||||
|
||||
if(field.num_dim > 0) {
|
||||
field_val[fields::num_dim] = field.num_dim;
|
||||
field_val[fields::vec_dist] = field.vec_dist == ip ? "ip" : "cosine";
|
||||
}
|
||||
|
||||
fields_json.push_back(field_val);
|
||||
|
||||
if(!field.has_valid_type()) {
|
||||
|
@ -168,6 +168,15 @@ nlohmann::json Collection::get_summary_json() const {
|
||||
field_json[fields::sort] = coll_field.sort;
|
||||
field_json[fields::infix] = coll_field.infix;
|
||||
field_json[fields::locale] = coll_field.locale;
|
||||
field_json[fields::nested] = coll_field.nested;
|
||||
|
||||
if(coll_field.nested) {
|
||||
field_json[fields::nested_array] = coll_field.nested_array;
|
||||
}
|
||||
|
||||
if(coll_field.num_dim > 0) {
|
||||
field_json[fields::num_dim] = coll_field.num_dim;
|
||||
}
|
||||
|
||||
fields_arr.push_back(field_json);
|
||||
}
|
||||
|
@ -42,9 +42,31 @@ Collection* CollectionManager::init_collection(const nlohmann::json & collection
|
||||
field_obj[fields::infix] = -1;
|
||||
}
|
||||
|
||||
if(field_obj.count(fields::nested) == 0) {
|
||||
field_obj[fields::nested] = false;
|
||||
}
|
||||
|
||||
if(field_obj.count(fields::nested_array) == 0) {
|
||||
field_obj[fields::nested_array] = 0;
|
||||
}
|
||||
|
||||
if(field_obj.count(fields::num_dim) == 0) {
|
||||
field_obj[fields::num_dim] = 0;
|
||||
}
|
||||
|
||||
vector_distance_type_t vec_dist_type = vector_distance_type_t::cosine;
|
||||
|
||||
if(field_obj.count(fields::vec_dist) != 0) {
|
||||
auto vec_dist_type_op = magic_enum::enum_cast<vector_distance_type_t>(fields::vec_dist);
|
||||
if(vec_dist_type_op.has_value()) {
|
||||
vec_dist_type = vec_dist_type_op.value();
|
||||
}
|
||||
}
|
||||
|
||||
field f(field_obj[fields::name], field_obj[fields::type], field_obj[fields::facet],
|
||||
field_obj[fields::optional], field_obj[fields::index], field_obj[fields::locale],
|
||||
-1, field_obj[fields::infix]);
|
||||
-1, field_obj[fields::infix], field_obj[fields::nested], field_obj[fields::nested_array],
|
||||
field_obj[fields::num_dim], vec_dist_type);
|
||||
|
||||
// value of `sort` depends on field type
|
||||
if(field_obj.count(fields::sort) == 0) {
|
||||
|
@ -2576,6 +2576,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
|
||||
scores[0] = -float_to_int64_t(dist_label.first);
|
||||
int64_t match_score_index = -1;
|
||||
|
||||
//LOG(INFO) << "SEQ_ID: " << seq_id << ", score: " << dist_label.first;
|
||||
|
||||
KV kv(0, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
|
||||
topster->add(&kv);
|
||||
nearest_ids.push_back(seq_id);
|
||||
|
@ -12,8 +12,8 @@ protected:
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
std::atomic<bool> quit = false;
|
||||
Collection *collection1;
|
||||
std::vector<field> search_fields;
|
||||
std::vector<sort_by> sort_fields;
|
||||
nlohmann::json schema;
|
||||
|
||||
void setupCollection() {
|
||||
std::string state_dir_path = "/tmp/typesense_test/coll_manager_test_db";
|
||||
@ -24,19 +24,27 @@ protected:
|
||||
collectionManager.init(store, 1.0, "auth_key", quit);
|
||||
collectionManager.load(8, 1000);
|
||||
|
||||
search_fields = {
|
||||
field("title", field_types::STRING, false, false, true, "en", false),
|
||||
field("starring", field_types::STRING, false, false, true, "", false, true),
|
||||
field("cast", field_types::STRING_ARRAY, true, true, true, "", false),
|
||||
field(".*_year", field_types::INT32, true, true),
|
||||
field("location", field_types::GEOPOINT, false, true, true),
|
||||
field("not_stored", field_types::STRING, false, true, false),
|
||||
field("points", field_types::INT32, false)
|
||||
};
|
||||
schema = R"({
|
||||
"name": "collection1",
|
||||
"enable_nested_fields": true,
|
||||
"fields": [
|
||||
{"name": "title", "type": "string", "locale": "en"},
|
||||
{"name": "starring", "type": "string", "infix": true},
|
||||
{"name": "cast", "type": "string[]", "facet": true, "optional": true},
|
||||
{"name": ".*_year", "type": "int32", "facet": true, "optional": true},
|
||||
{"name": "location", "type": "geopoint", "optional": true},
|
||||
{"name": "not_stored", "type": "string", "optional": true, "index": false},
|
||||
{"name": "points", "type": "int32"},
|
||||
{"name": "person", "type": "object", "optional": true},
|
||||
{"name": "vec", "type": "float[]", "num_dim": 128, "optional": true}
|
||||
],
|
||||
"default_sorting_field": "points",
|
||||
"symbols_to_index":["+"],
|
||||
"token_separators":["-"]
|
||||
})"_json;
|
||||
|
||||
sort_fields = { sort_by("points", "DESC") };
|
||||
collection1 = collectionManager.create_collection("collection1", 4, search_fields,
|
||||
"points", 12345, "", {"+"}, {"-"}).get();
|
||||
collection1 = collectionManager.create_collection(schema).get();
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
@ -91,16 +99,135 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
|
||||
ASSERT_EQ(3, num_keys);
|
||||
// we already call `collection1->get_next_seq_id` above, which is side-effecting
|
||||
ASSERT_EQ(1, StringUtils::deserialize_uint32_t(next_seq_id));
|
||||
ASSERT_EQ("{\"created_at\":12345,\"default_sorting_field\":\"points\",\"enable_nested_fields\":false,\"fallback_field_type\":\"\","
|
||||
"\"fields\":[{\"facet\":false,\"index\":true,\"infix\":false,\"locale\":\"en\",\"name\":\"title\",\"optional\":false,\"sort\":false,\"type\":\"string\"},"
|
||||
"{\"facet\":false,\"index\":true,\"infix\":true,\"locale\":\"\",\"name\":\"starring\",\"optional\":false,\"sort\":false,\"type\":\"string\"},"
|
||||
"{\"facet\":true,\"index\":true,\"infix\":false,\"locale\":\"\",\"name\":\"cast\",\"optional\":true,\"sort\":false,\"type\":\"string[]\"},"
|
||||
"{\"facet\":true,\"index\":true,\"infix\":false,\"locale\":\"\",\"name\":\".*_year\",\"optional\":true,\"sort\":true,\"type\":\"int32\"},"
|
||||
"{\"facet\":false,\"index\":true,\"infix\":false,\"locale\":\"\",\"name\":\"location\",\"optional\":true,\"sort\":true,\"type\":\"geopoint\"},"
|
||||
"{\"facet\":false,\"index\":false,\"infix\":false,\"locale\":\"\",\"name\":\"not_stored\",\"optional\":true,\"sort\":false,\"type\":\"string\"},"
|
||||
"{\"facet\":false,\"index\":true,\"infix\":false,\"locale\":\"\",\"name\":\"points\",\"optional\":false,\"sort\":true,\"type\":\"int32\"}],\"id\":0,"
|
||||
"\"name\":\"collection1\",\"num_memory_shards\":4,\"symbols_to_index\":[\"+\"],\"token_separators\":[\"-\"]}",
|
||||
collection_meta_json);
|
||||
|
||||
LOG(INFO) << collection_meta_json;
|
||||
|
||||
nlohmann::json expected_meta_json = R"(
|
||||
{
|
||||
"created_at":1663234047,
|
||||
"default_sorting_field":"points",
|
||||
"enable_nested_fields":true,
|
||||
"fallback_field_type":"",
|
||||
"fields":[
|
||||
{
|
||||
"facet":false,
|
||||
"index":true,
|
||||
"infix":false,
|
||||
"locale":"en",
|
||||
"name":"title",
|
||||
"nested":false,
|
||||
"optional":false,
|
||||
"sort":false,
|
||||
"type":"string"
|
||||
},
|
||||
{
|
||||
"facet":false,
|
||||
"index":true,
|
||||
"infix":true,
|
||||
"locale":"",
|
||||
"name":"starring",
|
||||
"nested":false,
|
||||
"optional":false,
|
||||
"sort":false,
|
||||
"type":"string"
|
||||
},
|
||||
{
|
||||
"facet":true,
|
||||
"index":true,
|
||||
"infix":false,
|
||||
"locale":"",
|
||||
"name":"cast",
|
||||
"nested":false,
|
||||
"optional":true,
|
||||
"sort":false,
|
||||
"type":"string[]"
|
||||
},
|
||||
{
|
||||
"facet":true,
|
||||
"index":true,
|
||||
"infix":false,
|
||||
"locale":"",
|
||||
"name":".*_year",
|
||||
"nested":false,
|
||||
"optional":true,
|
||||
"sort":true,
|
||||
"type":"int32"
|
||||
},
|
||||
{
|
||||
"facet":false,
|
||||
"index":true,
|
||||
"infix":false,
|
||||
"locale":"",
|
||||
"name":"location",
|
||||
"nested":false,
|
||||
"optional":true,
|
||||
"sort":true,
|
||||
"type":"geopoint"
|
||||
},
|
||||
{
|
||||
"facet":false,
|
||||
"index":false,
|
||||
"infix":false,
|
||||
"locale":"",
|
||||
"name":"not_stored",
|
||||
"nested":false,
|
||||
"optional":true,
|
||||
"sort":false,
|
||||
"type":"string"
|
||||
},
|
||||
{
|
||||
"facet":false,
|
||||
"index":true,
|
||||
"infix":false,
|
||||
"locale":"",
|
||||
"name":"points",
|
||||
"nested":false,
|
||||
"optional":false,
|
||||
"sort":true,
|
||||
"type":"int32"
|
||||
},
|
||||
{
|
||||
"facet":false,
|
||||
"index":true,
|
||||
"infix":false,
|
||||
"locale":"",
|
||||
"name":"person",
|
||||
"nested":true,
|
||||
"nested_array":2,
|
||||
"optional":true,
|
||||
"sort":false,
|
||||
"type":"object"
|
||||
},
|
||||
{
|
||||
"facet":false,
|
||||
"index":true,
|
||||
"infix":false,
|
||||
"locale":"",
|
||||
"name":"vec",
|
||||
"nested":false,
|
||||
"num_dim":128,
|
||||
"optional":true,
|
||||
"sort":false,
|
||||
"type":"float[]",
|
||||
"vec_dist":"cosine"
|
||||
}
|
||||
],
|
||||
"id":0,
|
||||
"name":"collection1",
|
||||
"num_memory_shards":4,
|
||||
"symbols_to_index":[
|
||||
"+"
|
||||
],
|
||||
"token_separators":[
|
||||
"-"
|
||||
]
|
||||
}
|
||||
)"_json;
|
||||
|
||||
auto actual_json = nlohmann::json::parse(collection_meta_json);
|
||||
expected_meta_json["created_at"] = actual_json["created_at"];
|
||||
|
||||
ASSERT_EQ(expected_meta_json.dump(), actual_json.dump());
|
||||
ASSERT_EQ("1", next_collection_id);
|
||||
}
|
||||
|
||||
@ -185,7 +312,15 @@ TEST_F(CollectionManagerTest, GetAllCollections) {
|
||||
ASSERT_STREQ("collection1", collection_vec[0]->get_name().c_str());
|
||||
|
||||
// try creating one more collection
|
||||
collectionManager.create_collection("collection2", 4, search_fields, "points");
|
||||
auto new_schema = R"({
|
||||
"name": "collection2",
|
||||
"fields": [
|
||||
{"name": "title", "type": "string", "locale": "en"},
|
||||
{"name": "points", "type": "int32"}
|
||||
]
|
||||
})"_json;
|
||||
|
||||
collectionManager.create_collection(new_schema);
|
||||
collection_vec = collectionManager.get_collections();
|
||||
ASSERT_EQ(2, collection_vec.size());
|
||||
|
||||
@ -323,6 +458,11 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
ASSERT_EQ(false, restored_schema.at("title").facet);
|
||||
ASSERT_EQ(false, restored_schema.at("title").optional);
|
||||
ASSERT_EQ(false, restored_schema.at("not_stored").index);
|
||||
ASSERT_TRUE(restored_schema.at("person").nested);
|
||||
ASSERT_EQ(2, restored_schema.at("person").nested_array);
|
||||
ASSERT_EQ(128, restored_schema.at("vec").num_dim);
|
||||
|
||||
ASSERT_TRUE(collection1->get_enable_nested_fields());
|
||||
|
||||
ASSERT_EQ(2, collection1->get_overrides().size());
|
||||
ASSERT_STREQ("exclude-rule", collection1->get_overrides()["exclude-rule"].id.c_str());
|
||||
|
Loading…
x
Reference in New Issue
Block a user