#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct override_t { static const std::string MATCH_EXACT; static const std::string MATCH_CONTAINS; struct rule_t { std::string query; std::string match; }; struct add_hit_t { std::string doc_id; uint32_t position; }; struct drop_hit_t { std::string doc_id; }; std::string id; rule_t rule; std::vector add_hits; std::vector drop_hits; override_t() {} override_t(const nlohmann::json & override) { id = override["id"].get(); rule.query = override["rule"]["query"].get(); rule.match = override["rule"]["match"].get(); if (override.count("includes") != 0) { for(const auto & include: override["includes"]) { add_hit_t add_hit; add_hit.doc_id = include["id"].get(); add_hit.position = include["position"].get(); add_hits.push_back(add_hit); } } if (override.count("excludes") != 0) { for(const auto & exclude: override["excludes"]) { drop_hit_t drop_hit; drop_hit.doc_id = exclude["id"].get(); drop_hits.push_back(drop_hit); } } } nlohmann::json to_json() const { nlohmann::json override; override["id"] = id; override["rule"]["query"] = rule.query; override["rule"]["match"] = rule.match; override["includes"] = nlohmann::json::array(); for(const auto & add_hit: add_hits) { nlohmann::json include; include["id"] = add_hit.doc_id; include["position"] = add_hit.position; override["includes"].push_back(include); } override["excludes"] = nlohmann::json::array(); for(const auto & drop_hit: drop_hits) { nlohmann::json exclude; exclude["id"] = drop_hit.doc_id; override["excludes"].push_back(exclude); } return override; } }; class Collection { private: struct highlight_t { std::string field; std::vector snippets; std::vector indices; uint64_t match_score; highlight_t() { } bool operator<(const highlight_t& a) const { return match_score > a.match_score; } }; std::map overrides; std::string name; uint32_t collection_id; uint64_t created_at; size_t num_documents; std::vector indices; std::vector index_threads; // Auto incrementing record ID used internally for indexing - not exposed to the client uint32_t next_seq_id; Store* store; std::vector fields; std::unordered_map search_schema; std::map facet_schema; // std::map guarantees order of fields std::unordered_map sort_schema; std::string default_sorting_field; size_t num_indices; std::string get_doc_id_key(const std::string & doc_id); std::string get_seq_id_key(uint32_t seq_id); void highlight_result(const field &search_field, const std::vector> &searched_queries, const KV &field_order_kv, const nlohmann::json &document, StringUtils & string_utils, highlight_t &highlight); void remove_document(nlohmann::json & document, const uint32_t seq_id, bool remove_from_store); void populate_overrides(std::string query, std::map & id_pos_map, std::vector & included_ids, std::vector & excluded_ids); static bool facet_count_compare(const std::pair& a, const std::pair& b) { return std::tie(a.second.count, a.first) > std::tie(b.second.count, b.first); } static bool facet_count_str_compare(const facet_value_t& a, const facet_value_t& b) { return a.count > b.count; } public: Collection() = delete; Collection(const std::string name, const uint32_t collection_id, const uint64_t created_at, const uint32_t next_seq_id, Store *store, const std::vector & fields, const std::string & default_sorting_field, const size_t num_indices=4); ~Collection(); static std::string get_next_seq_id_key(const std::string & collection_name); static std::string get_meta_key(const std::string & collection_name); static std::string get_override_key(const std::string & collection_name, const std::string & override_id); std::string get_seq_id_collection_prefix(); std::string get_name(); uint64_t get_created_at(); size_t get_num_documents(); uint32_t get_collection_id(); uint32_t get_next_seq_id(); void set_next_seq_id(uint32_t seq_id); void increment_next_seq_id_field(); Option doc_id_to_seq_id(const std::string & doc_id); std::vector get_facet_fields(); std::vector get_sort_fields(); std::vector get_fields(); std::unordered_map get_schema(); std::string get_default_sorting_field(); Option to_doc(const std::string & json_str, nlohmann::json & document); Option add(const std::string & json_str); Option add_many(const std::string & json_str); Option search(const std::string & query, const std::vector & search_fields, const std::string & simple_filter_query, const std::vector & facet_fields, const std::vector & sort_fields, int num_typos, size_t per_page = 10, size_t page = 1, token_ordering token_order = FREQUENCY, bool prefix = false, size_t drop_tokens_threshold = Index::DROP_TOKENS_THRESHOLD, const spp::sparse_hash_set & include_fields = spp::sparse_hash_set(), const spp::sparse_hash_set & exclude_fields = spp::sparse_hash_set(), size_t max_facet_values=10, size_t max_hits=500, const std::string & simple_facet_query = ""); Option get(const std::string & id); Option remove(const std::string & id, bool remove_from_store = true); Option add_override(const override_t & override); Option remove_override(const std::string & id); std::map get_overrides() { return overrides; }; size_t get_num_indices(); static uint32_t get_seq_id_from_key(const std::string & key); Option get_document_from_store(const std::string & seq_id_key, nlohmann::json & document); Option index_in_memory(const nlohmann::json & document, uint32_t seq_id); void par_index_in_memory(std::vector> & iter_batch, batch_index_result & result); static void prune_document(nlohmann::json &document, const spp::sparse_hash_set & include_fields, const spp::sparse_hash_set & exclude_fields); const std::vector &_get_indexes() const; // strings under this length will be fully highlighted, instead of showing a snippet of relevant portion enum {SNIPPET_STR_ABOVE_LEN = 30}; enum {MAX_ARRAY_MATCHES = 5}; // Using a $ prefix so that these meta keys stay above record entries in a lexicographically ordered KV store static constexpr const char* COLLECTION_META_PREFIX = "$CM"; static constexpr const char* COLLECTION_NEXT_SEQ_PREFIX = "$CS"; static constexpr const char* COLLECTION_OVERRIDE_PREFIX = "$CO"; static constexpr const char* SEQ_ID_PREFIX = "$SI"; static constexpr const char* DOC_ID_PREFIX = "$DI"; };