#pragma once #include #include #include #include #include #include #include #include class Collection { private: // Using a $ prefix so that these meta keys stay above record entries in a lexicographically ordered KV store static constexpr const char* COLLECTION_META_PREFIX = "$CM"; static constexpr const char* DOC_ID_PREFIX = "$DI"; static constexpr const char* COLLECTION_NEXT_SEQ_PREFIX = "$CS"; static constexpr const char* SEQ_ID_PREFIX = "$SI"; std::string name; uint32_t collection_id; // Auto incrementing record ID used internally for indexing - not exposed to the client uint32_t next_seq_id; spp::sparse_hash_map schema; std::vector rank_fields; Store* store; spp::sparse_hash_map index_map; spp::sparse_hash_map primary_rank_scores; spp::sparse_hash_map secondary_rank_scores; std::string get_doc_id_key(std::string doc_id); std::string get_seq_id_key(uint32_t seq_id); static inline std::vector next_suggestion(const std::vector> &token_leaves, long long int n); void log_leaves(const int cost, const std::string &token, const std::vector &leaves) const; void search(std::string & query, const std::string & field, const int num_typos, const size_t num_results, Topster<100> & topster, size_t & num_found, const token_ordering token_order = FREQUENCY, const bool prefix = false); void search_candidates(int & token_rank, std::vector> & token_leaves, Topster<100> & topster, size_t & total_results, size_t & num_found, const size_t & max_results); void index_string_field(const std::string & text, const uint32_t score, art_tree *t, uint32_t seq_id) const; void index_string_array_field(const std::vector & strings, const uint32_t score, art_tree *t, uint32_t seq_id) const; void index_int32_field(const int32_t value, uint32_t score, art_tree *t, uint32_t seq_id) const; void index_int64_field(const int64_t value, uint32_t score, art_tree *t, uint32_t seq_id) const; void index_int32_array_field(const std::vector & values, const uint32_t score, art_tree *t, uint32_t seq_id) const; void index_int64_array_field(const std::vector & values, const uint32_t score, art_tree *t, uint32_t seq_id) const; public: Collection() = delete; Collection(const std::string name, const uint32_t collection_id, const uint32_t next_seq_id, Store *store, const std::vector & search_fields, const std::vector rank_fields); ~Collection(); static std::string get_next_seq_id_key(std::string collection_name); static std::string get_meta_key(std::string collection_name); std::string get_seq_id_prefix(); uint32_t get_collection_id(); uint32_t get_next_seq_id(); uint32_t doc_id_to_seq_id(std::string doc_id); std::vector get_rank_fields(); spp::sparse_hash_map get_schema(); std::string add(std::string json_str); nlohmann::json search(std::string query, const std::vector fields, const std::vector filters, const int num_typos, const size_t num_results, const token_ordering token_order = FREQUENCY, const bool prefix = false); void remove(std::string id); void score_results(Topster<100> &topster, const int & token_rank, const std::vector &query_suggestion, const uint32_t *result_ids, const size_t result_size) const; enum {MAX_SEARCH_TOKENS = 20}; enum {MAX_RESULTS = 100}; void index_in_memory(const nlohmann::json &document, uint32_t seq_id); };