Persist vector field properties and restore.

This commit is contained in:
Kishore Nallan 2022-09-15 17:32:17 +05:30
parent c7f879bf30
commit 49268d648e
5 changed files with 207 additions and 24 deletions

View File

@ -293,6 +293,16 @@ struct field {
field_val[fields::locale] = field.locale;
field_val[fields::nested] = field.nested;
if(field.nested) {
field_val[fields::nested_array] = field.nested_array;
}
if(field.num_dim > 0) {
field_val[fields::num_dim] = field.num_dim;
field_val[fields::vec_dist] = field.vec_dist == ip ? "ip" : "cosine";
}
fields_json.push_back(field_val);
if(!field.has_valid_type()) {

View File

@ -168,6 +168,15 @@ nlohmann::json Collection::get_summary_json() const {
field_json[fields::sort] = coll_field.sort;
field_json[fields::infix] = coll_field.infix;
field_json[fields::locale] = coll_field.locale;
field_json[fields::nested] = coll_field.nested;
if(coll_field.nested) {
field_json[fields::nested_array] = coll_field.nested_array;
}
if(coll_field.num_dim > 0) {
field_json[fields::num_dim] = coll_field.num_dim;
}
fields_arr.push_back(field_json);
}

View File

@ -42,9 +42,31 @@ Collection* CollectionManager::init_collection(const nlohmann::json & collection
field_obj[fields::infix] = -1;
}
if(field_obj.count(fields::nested) == 0) {
field_obj[fields::nested] = false;
}
if(field_obj.count(fields::nested_array) == 0) {
field_obj[fields::nested_array] = 0;
}
if(field_obj.count(fields::num_dim) == 0) {
field_obj[fields::num_dim] = 0;
}
vector_distance_type_t vec_dist_type = vector_distance_type_t::cosine;
if(field_obj.count(fields::vec_dist) != 0) {
auto vec_dist_type_op = magic_enum::enum_cast<vector_distance_type_t>(fields::vec_dist);
if(vec_dist_type_op.has_value()) {
vec_dist_type = vec_dist_type_op.value();
}
}
field f(field_obj[fields::name], field_obj[fields::type], field_obj[fields::facet],
field_obj[fields::optional], field_obj[fields::index], field_obj[fields::locale],
-1, field_obj[fields::infix]);
-1, field_obj[fields::infix], field_obj[fields::nested], field_obj[fields::nested_array],
field_obj[fields::num_dim], vec_dist_type);
// value of `sort` depends on field type
if(field_obj.count(fields::sort) == 0) {

View File

@ -2576,6 +2576,8 @@ void Index::search(std::vector<query_tokens_t>& field_query_tokens, const std::v
scores[0] = -float_to_int64_t(dist_label.first);
int64_t match_score_index = -1;
//LOG(INFO) << "SEQ_ID: " << seq_id << ", score: " << dist_label.first;
KV kv(0, searched_queries.size(), 0, seq_id, distinct_id, match_score_index, scores);
topster->add(&kv);
nearest_ids.push_back(seq_id);

View File

@ -12,8 +12,8 @@ protected:
CollectionManager & collectionManager = CollectionManager::get_instance();
std::atomic<bool> quit = false;
Collection *collection1;
std::vector<field> search_fields;
std::vector<sort_by> sort_fields;
nlohmann::json schema;
void setupCollection() {
std::string state_dir_path = "/tmp/typesense_test/coll_manager_test_db";
@ -24,19 +24,27 @@ protected:
collectionManager.init(store, 1.0, "auth_key", quit);
collectionManager.load(8, 1000);
search_fields = {
field("title", field_types::STRING, false, false, true, "en", false),
field("starring", field_types::STRING, false, false, true, "", false, true),
field("cast", field_types::STRING_ARRAY, true, true, true, "", false),
field(".*_year", field_types::INT32, true, true),
field("location", field_types::GEOPOINT, false, true, true),
field("not_stored", field_types::STRING, false, true, false),
field("points", field_types::INT32, false)
};
schema = R"({
"name": "collection1",
"enable_nested_fields": true,
"fields": [
{"name": "title", "type": "string", "locale": "en"},
{"name": "starring", "type": "string", "infix": true},
{"name": "cast", "type": "string[]", "facet": true, "optional": true},
{"name": ".*_year", "type": "int32", "facet": true, "optional": true},
{"name": "location", "type": "geopoint", "optional": true},
{"name": "not_stored", "type": "string", "optional": true, "index": false},
{"name": "points", "type": "int32"},
{"name": "person", "type": "object", "optional": true},
{"name": "vec", "type": "float[]", "num_dim": 128, "optional": true}
],
"default_sorting_field": "points",
"symbols_to_index":["+"],
"token_separators":["-"]
})"_json;
sort_fields = { sort_by("points", "DESC") };
collection1 = collectionManager.create_collection("collection1", 4, search_fields,
"points", 12345, "", {"+"}, {"-"}).get();
collection1 = collectionManager.create_collection(schema).get();
}
virtual void SetUp() {
@ -91,16 +99,135 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
ASSERT_EQ(3, num_keys);
// we already call `collection1->get_next_seq_id` above, which is side-effecting
ASSERT_EQ(1, StringUtils::deserialize_uint32_t(next_seq_id));
ASSERT_EQ("{\"created_at\":12345,\"default_sorting_field\":\"points\",\"enable_nested_fields\":false,\"fallback_field_type\":\"\","
"\"fields\":[{\"facet\":false,\"index\":true,\"infix\":false,\"locale\":\"en\",\"name\":\"title\",\"optional\":false,\"sort\":false,\"type\":\"string\"},"
"{\"facet\":false,\"index\":true,\"infix\":true,\"locale\":\"\",\"name\":\"starring\",\"optional\":false,\"sort\":false,\"type\":\"string\"},"
"{\"facet\":true,\"index\":true,\"infix\":false,\"locale\":\"\",\"name\":\"cast\",\"optional\":true,\"sort\":false,\"type\":\"string[]\"},"
"{\"facet\":true,\"index\":true,\"infix\":false,\"locale\":\"\",\"name\":\".*_year\",\"optional\":true,\"sort\":true,\"type\":\"int32\"},"
"{\"facet\":false,\"index\":true,\"infix\":false,\"locale\":\"\",\"name\":\"location\",\"optional\":true,\"sort\":true,\"type\":\"geopoint\"},"
"{\"facet\":false,\"index\":false,\"infix\":false,\"locale\":\"\",\"name\":\"not_stored\",\"optional\":true,\"sort\":false,\"type\":\"string\"},"
"{\"facet\":false,\"index\":true,\"infix\":false,\"locale\":\"\",\"name\":\"points\",\"optional\":false,\"sort\":true,\"type\":\"int32\"}],\"id\":0,"
"\"name\":\"collection1\",\"num_memory_shards\":4,\"symbols_to_index\":[\"+\"],\"token_separators\":[\"-\"]}",
collection_meta_json);
LOG(INFO) << collection_meta_json;
nlohmann::json expected_meta_json = R"(
{
"created_at":1663234047,
"default_sorting_field":"points",
"enable_nested_fields":true,
"fallback_field_type":"",
"fields":[
{
"facet":false,
"index":true,
"infix":false,
"locale":"en",
"name":"title",
"nested":false,
"optional":false,
"sort":false,
"type":"string"
},
{
"facet":false,
"index":true,
"infix":true,
"locale":"",
"name":"starring",
"nested":false,
"optional":false,
"sort":false,
"type":"string"
},
{
"facet":true,
"index":true,
"infix":false,
"locale":"",
"name":"cast",
"nested":false,
"optional":true,
"sort":false,
"type":"string[]"
},
{
"facet":true,
"index":true,
"infix":false,
"locale":"",
"name":".*_year",
"nested":false,
"optional":true,
"sort":true,
"type":"int32"
},
{
"facet":false,
"index":true,
"infix":false,
"locale":"",
"name":"location",
"nested":false,
"optional":true,
"sort":true,
"type":"geopoint"
},
{
"facet":false,
"index":false,
"infix":false,
"locale":"",
"name":"not_stored",
"nested":false,
"optional":true,
"sort":false,
"type":"string"
},
{
"facet":false,
"index":true,
"infix":false,
"locale":"",
"name":"points",
"nested":false,
"optional":false,
"sort":true,
"type":"int32"
},
{
"facet":false,
"index":true,
"infix":false,
"locale":"",
"name":"person",
"nested":true,
"nested_array":2,
"optional":true,
"sort":false,
"type":"object"
},
{
"facet":false,
"index":true,
"infix":false,
"locale":"",
"name":"vec",
"nested":false,
"num_dim":128,
"optional":true,
"sort":false,
"type":"float[]",
"vec_dist":"cosine"
}
],
"id":0,
"name":"collection1",
"num_memory_shards":4,
"symbols_to_index":[
"+"
],
"token_separators":[
"-"
]
}
)"_json;
auto actual_json = nlohmann::json::parse(collection_meta_json);
expected_meta_json["created_at"] = actual_json["created_at"];
ASSERT_EQ(expected_meta_json.dump(), actual_json.dump());
ASSERT_EQ("1", next_collection_id);
}
@ -185,7 +312,15 @@ TEST_F(CollectionManagerTest, GetAllCollections) {
ASSERT_STREQ("collection1", collection_vec[0]->get_name().c_str());
// try creating one more collection
collectionManager.create_collection("collection2", 4, search_fields, "points");
auto new_schema = R"({
"name": "collection2",
"fields": [
{"name": "title", "type": "string", "locale": "en"},
{"name": "points", "type": "int32"}
]
})"_json;
collectionManager.create_collection(new_schema);
collection_vec = collectionManager.get_collections();
ASSERT_EQ(2, collection_vec.size());
@ -323,6 +458,11 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
ASSERT_EQ(false, restored_schema.at("title").facet);
ASSERT_EQ(false, restored_schema.at("title").optional);
ASSERT_EQ(false, restored_schema.at("not_stored").index);
ASSERT_TRUE(restored_schema.at("person").nested);
ASSERT_EQ(2, restored_schema.at("person").nested_array);
ASSERT_EQ(128, restored_schema.at("vec").num_dim);
ASSERT_TRUE(collection1->get_enable_nested_fields());
ASSERT_EQ(2, collection1->get_overrides().size());
ASSERT_STREQ("exclude-rule", collection1->get_overrides()["exclude-rule"].id.c_str());