diff --git a/include/collection.h b/include/collection.h index f2164a11..853070ab 100644 --- a/include/collection.h +++ b/include/collection.h @@ -65,6 +65,31 @@ struct override_t { } } } + + std::string to_json_str() const { + nlohmann::json override; + override["id"] = id; + override["rule"]["query"] = rule.query; + override["rule"]["match"] = rule.match; + + override["includes"] = nlohmann::json::array(); + + for(const auto & add_hit: add_hits) { + nlohmann::json include; + include["id"] = add_hit.doc_id; + include["position"] = add_hit.position; + override["includes"].push_back(include); + } + + override["excludes"] = nlohmann::json::array(); + for(const auto & drop_hit: drop_hits) { + nlohmann::json exclude; + exclude["id"] = drop_hit.doc_id; + override["excludes"].push_back(exclude); + } + + return override.dump(); + } }; class Collection { @@ -142,6 +167,8 @@ public: static std::string get_meta_key(const std::string & collection_name); + static std::string get_override_key(const std::string & collection_name, const std::string & override_id); + std::string get_seq_id_collection_prefix(); std::string get_name(); @@ -190,24 +217,13 @@ public: Option remove(const std::string & id, const bool remove_from_store = true); - // FIXME: add persistence - bool add_override(override_t & override) { - if(overrides.count("id") != 0) { - return false; - } + Option add_override(const override_t & override); - overrides[override.id] = override; - return true; - } + Option remove_override(const std::string & id); - bool remove_override(const std::string & id) { - if(overrides.count("id") != 0) { - overrides.erase(id); - return true; - } - - return false; - } + std::map get_overrides() { + return overrides; + }; size_t get_num_indices(); @@ -232,6 +248,7 @@ public: // Using a $ prefix so that these meta keys stay above record entries in a lexicographically ordered KV store static constexpr const char* COLLECTION_META_PREFIX = "$CM"; static constexpr const char* COLLECTION_NEXT_SEQ_PREFIX = "$CS"; + static constexpr const char* COLLECTION_OVERRIDE_PREFIX = "$CO"; static constexpr const char* SEQ_ID_PREFIX = "$SI"; static constexpr const char* DOC_ID_PREFIX = "$DI"; }; diff --git a/src/collection.cpp b/src/collection.cpp index 38d7365b..a0240816 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -927,6 +927,33 @@ Option Collection::remove(const std::string & id, const bool remove return Option(id); } +Option Collection::add_override(const override_t & override) { + if(overrides.count("id") != 0) { + return Option(409, "There is already another entry with that `id`."); + } + + bool inserted = store->insert(Collection::get_override_key(name, override.id), override.to_json_str()); + if(!inserted) { + return Option(500, "Error while storing the override on disk."); + } + + overrides[override.id] = override; + return Option(200); +} + +Option Collection::remove_override(const std::string & id) { + if(overrides.count(id) != 0) { + bool removed = store->remove(Collection::get_override_key(name, id)); + if(!removed) { + return Option(500, "Error while deleting the override from disk."); + } + overrides.erase(id); + return Option(200); + } + + return Option(404, "Could not find that `id`."); +} + size_t Collection::get_num_indices() { return num_indices; } @@ -1013,6 +1040,10 @@ std::string Collection::get_meta_key(const std::string & collection_name) { return std::string(COLLECTION_META_PREFIX) + "_" + collection_name; } +std::string Collection::get_override_key(const std::string & collection_name, const std::string & override_id) { + return std::string(COLLECTION_OVERRIDE_PREFIX) + "_" + collection_name + "_" + override_id; +} + std::string Collection::get_seq_id_collection_prefix() { return std::to_string(collection_id) + "_" + std::string(SEQ_ID_PREFIX); } diff --git a/src/collection_manager.cpp b/src/collection_manager.cpp index 36ced335..ddfe6c10 100644 --- a/src/collection_manager.cpp +++ b/src/collection_manager.cpp @@ -97,6 +97,16 @@ Option CollectionManager::init(Store *store, LOG(INFO) << "Loading collection " << collection->get_name() << std::endl; + // initialize overrides + std::vector collection_override_jsons; + store->scan_fill(Collection::get_override_key(this_collection_name, ""), collection_override_jsons); + + for(const auto & collection_override_json: collection_override_jsons) { + nlohmann::json collection_override = nlohmann::json::parse(collection_override_json); + override_t override(collection_override); + collection->add_override(override); + } + // Fetch records from the store and re-create memory index std::vector documents; const std::string seq_id_prefix = collection->get_seq_id_collection_prefix(); diff --git a/src/index.cpp b/src/index.cpp index ff623363..5418323a 100644 --- a/src/index.cpp +++ b/src/index.cpp @@ -838,11 +838,6 @@ void Index::collate_curated_ids(const std::string & query, const std::string & f } } - if(override_query.size() == 0) { - // happens when the curated hit's field has no overlap with query string - //return ; - } - spp::sparse_hash_map leaf_to_indices; for (art_leaf *token_leaf : override_query) { diff --git a/test/collection_manager_test.cpp b/test/collection_manager_test.cpp index a9e2170d..6408c3e7 100644 --- a/test/collection_manager_test.cpp +++ b/test/collection_manager_test.cpp @@ -143,6 +143,63 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) { infile.close(); + // add some overrides + nlohmann::json override_json_include = { + {"id", "include-rule"}, + { + "rule", { + {"query", "in"}, + {"match", override_t::MATCH_EXACT} + } + } + }; + override_json_include["includes"] = nlohmann::json::array(); + override_json_include["includes"][0] = nlohmann::json::object(); + override_json_include["includes"][0]["id"] = "0"; + override_json_include["includes"][0]["position"] = 1; + + override_json_include["includes"][1] = nlohmann::json::object(); + override_json_include["includes"][1]["id"] = "3"; + override_json_include["includes"][1]["position"] = 2; + + override_t override_include(override_json_include); + + nlohmann::json override_json = { + {"id", "exclude-rule"}, + { + "rule", { + {"query", "of"}, + {"match", override_t::MATCH_EXACT} + } + } + }; + override_json["excludes"] = nlohmann::json::array(); + override_json["excludes"][0] = nlohmann::json::object(); + override_json["excludes"][0]["id"] = "4"; + + override_json["excludes"][1] = nlohmann::json::object(); + override_json["excludes"][1]["id"] = "11"; + + override_t override_exclude(override_json); + + nlohmann::json override_json_deleted = { + {"id", "deleted-rule"}, + { + "rule", { + {"query", "of"}, + {"match", override_t::MATCH_EXACT} + } + } + }; + + override_t override_deleted(override_json_deleted); + + collection1->add_override(override_include); + collection1->add_override(override_exclude); + collection1->add_override(override_deleted); + + collection1->remove_override("deleted-rule"); + std::vector search_fields = {"starring", "title"}; std::vector facets; @@ -168,6 +225,10 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) { ASSERT_EQ(schema.size(), collection1->get_schema().size()); ASSERT_EQ("points", collection1->get_default_sorting_field()); + ASSERT_EQ(2, collection1->get_overrides().size()); + ASSERT_STREQ("exclude-rule", collection1->get_overrides()["exclude-rule"].id.c_str()); + ASSERT_STREQ("include-rule", collection1->get_overrides()["include-rule"].id.c_str()); + results = collection1->search("thomas", search_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get(); ASSERT_EQ(4, results["hits"].size()); } diff --git a/test/collection_test.cpp b/test/collection_test.cpp index d9470efa..3df4847c 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -838,6 +838,10 @@ TEST_F(CollectionTest, ExcludeIncludeExactQueryMatch) { ASSERT_EQ(3, results["found"].get()); ASSERT_EQ(6, results["facet_counts"][0]["counts"].size()); + ASSERT_STREQ("12", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("5", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("17", results["hits"][2]["document"]["id"].get().c_str()); + // include nlohmann::json override_json_include = { {"id", "include-rule"}, @@ -868,6 +872,10 @@ TEST_F(CollectionTest, ExcludeIncludeExactQueryMatch) { ASSERT_EQ(3, results["hits"].size()); ASSERT_EQ(3, results["found"].get()); + ASSERT_STREQ("0", results["hits"][0]["document"]["id"].get().c_str()); + ASSERT_STREQ("3", results["hits"][1]["document"]["id"].get().c_str()); + ASSERT_STREQ("13", results["hits"][2]["document"]["id"].get().c_str()); + coll_mul_fields->remove_override("exclude-rule"); coll_mul_fields->remove_override("include-rule");