mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 13:12:22 +08:00
Merge branch 'doc-update'
This commit is contained in:
commit
3d1ea448b6
@ -52,7 +52,7 @@ Here's a quick example showcasing how you can create a collection, index a docum
|
||||
Let's begin by starting the Typesense server via Docker:
|
||||
|
||||
```
|
||||
docker run -p 8108:8108 -v/tmp/data:/data typesense/typesense:0.15.0 --data-dir /data --api-key=Hu52dwsas2AdxdE
|
||||
docker run -p 8108:8108 -v/tmp/data:/data typesense/typesense:0.16.0 --data-dir /data --api-key=Hu52dwsas2AdxdE
|
||||
```
|
||||
|
||||
We have [API Clients](#api-clients) in a couple of languages, but let's use the Python client for this example.
|
||||
|
@ -18,6 +18,8 @@ private:
|
||||
}
|
||||
|
||||
public:
|
||||
void load(const uint32_t *sorted_array, uint32_t array_length, uint32_t m, uint32_t M);
|
||||
|
||||
uint32_t at(uint32_t index);
|
||||
|
||||
bool contains(uint32_t value);
|
||||
@ -26,5 +28,7 @@ public:
|
||||
|
||||
bool append(uint32_t value);
|
||||
|
||||
bool insert(size_t index, const uint32_t* values, size_t num_values);
|
||||
|
||||
void remove_index(uint32_t start_index, uint32_t end_index);
|
||||
};
|
@ -36,7 +36,8 @@ public:
|
||||
in = nullptr;
|
||||
}
|
||||
|
||||
uint32_t* uncompress();
|
||||
// len determines length of output buffer (default: length of input)
|
||||
uint32_t* uncompress(uint32_t len=0);
|
||||
|
||||
uint32_t getSizeInBytes();
|
||||
|
||||
|
@ -96,9 +96,9 @@ typedef struct {
|
||||
* of arbitrary size, as they include the key.
|
||||
*/
|
||||
typedef struct {
|
||||
art_values* values;
|
||||
int32_t max_score;
|
||||
uint32_t key_len;
|
||||
int64_t max_score;
|
||||
art_values* values;
|
||||
unsigned char key[];
|
||||
} art_leaf;
|
||||
|
||||
|
@ -92,6 +92,11 @@ struct override_t {
|
||||
}
|
||||
};
|
||||
|
||||
struct doc_seq_id_t {
|
||||
uint32_t seq_id;
|
||||
bool is_new;
|
||||
};
|
||||
|
||||
class Collection {
|
||||
private:
|
||||
|
||||
@ -150,7 +155,9 @@ private:
|
||||
|
||||
void highlight_result(const field &search_field, const std::vector<std::vector<art_leaf *>> &searched_queries,
|
||||
const KV* field_order_kv, const nlohmann::json &document,
|
||||
StringUtils & string_utils, size_t snippet_threshold,
|
||||
StringUtils & string_utils,
|
||||
const size_t snippet_threshold,
|
||||
const size_t highlight_affix_num_tokens,
|
||||
bool highlighted_fully,
|
||||
highlight_t &highlight);
|
||||
|
||||
@ -217,13 +224,16 @@ public:
|
||||
|
||||
std::string get_default_sorting_field();
|
||||
|
||||
Option<uint32_t> to_doc(const std::string & json_str, nlohmann::json & document);
|
||||
Option<doc_seq_id_t> to_doc(const std::string& json_str, nlohmann::json& document,
|
||||
const index_operation_t& operation, const std::string& id="");
|
||||
|
||||
nlohmann::json get_summary_json();
|
||||
|
||||
Option<nlohmann::json> add(const std::string & json_str);
|
||||
Option<nlohmann::json> add(const std::string & json_str,
|
||||
const index_operation_t& operation=CREATE, const std::string& id="");
|
||||
|
||||
nlohmann::json add_many(std::vector<std::string>& json_lines);
|
||||
nlohmann::json add_many(std::vector<std::string>& json_lines, nlohmann::json& document,
|
||||
const index_operation_t& operation=CREATE, const std::string& id="");
|
||||
|
||||
Option<nlohmann::json> search(const std::string & query, const std::vector<std::string> & search_fields,
|
||||
const std::string & simple_filter_query, const std::vector<std::string> & facet_fields,
|
||||
@ -236,6 +246,7 @@ public:
|
||||
size_t max_facet_values=10,
|
||||
const std::string & simple_facet_query = "",
|
||||
const size_t snippet_threshold = 30,
|
||||
const size_t highlight_affix_num_tokens = 4,
|
||||
const std::string & highlight_full_fields = "",
|
||||
size_t typo_tokens_threshold = Index::TYPO_TOKENS_THRESHOLD,
|
||||
const std::map<size_t, std::vector<std::string>>& pinned_hits={},
|
||||
@ -263,7 +274,7 @@ public:
|
||||
|
||||
Option<bool> get_document_from_store(const std::string & seq_id_key, nlohmann::json & document);
|
||||
|
||||
Option<uint32_t> index_in_memory(const nlohmann::json & document, uint32_t seq_id);
|
||||
Option<uint32_t> index_in_memory(const nlohmann::json & document, uint32_t seq_id, bool is_update);
|
||||
|
||||
size_t par_index_in_memory(std::vector<std::vector<index_record>> & iter_batch, std::vector<size_t>& indexed_counts);
|
||||
|
||||
@ -296,5 +307,9 @@ public:
|
||||
size_t &num_indexed);
|
||||
|
||||
bool is_exceeding_memory_threshold() const;
|
||||
|
||||
void get_doc_changes(const nlohmann::json &document, nlohmann::json &old_doc,
|
||||
nlohmann::json &new_doc,
|
||||
nlohmann::json &del_doc);
|
||||
};
|
||||
|
||||
|
@ -23,6 +23,8 @@ bool get_export_documents(http_req& req, http_res& res);
|
||||
|
||||
bool post_add_document(http_req& req, http_res& res);
|
||||
|
||||
bool patch_update_document(http_req& req, http_res& res);
|
||||
|
||||
bool post_import_documents(http_req& req, http_res& res);
|
||||
|
||||
bool get_fetch_document(http_req& req, http_res& res);
|
||||
|
@ -127,6 +127,8 @@ public:
|
||||
|
||||
void put(const std::string & path, bool (*handler)(http_req & req, http_res & res), bool async_req=false, bool async_res=false);
|
||||
|
||||
void patch(const std::string & path, bool (*handler)(http_req & req, http_res & res), bool async_req=false, bool async_res=false);
|
||||
|
||||
void del(const std::string & path, bool (*handler)(http_req & req, http_res & res), bool async_req=false, bool async_res=false);
|
||||
|
||||
void on(const std::string & message, bool (*handler)(void*));
|
||||
|
@ -79,15 +79,29 @@ struct search_args {
|
||||
};
|
||||
};
|
||||
|
||||
enum index_operation_t {
|
||||
CREATE,
|
||||
UPSERT,
|
||||
UPDATE,
|
||||
DELETE
|
||||
};
|
||||
|
||||
struct index_record {
|
||||
size_t position; // position of record in the original request
|
||||
size_t position; // position of record in the original request
|
||||
uint32_t seq_id;
|
||||
nlohmann::json document;
|
||||
|
||||
Option<bool> indexed; // indicates if the indexing operation was a success
|
||||
nlohmann::json doc;
|
||||
nlohmann::json old_doc;
|
||||
nlohmann::json new_doc;
|
||||
nlohmann::json del_doc;
|
||||
|
||||
index_record(size_t record_pos, uint32_t seq_id, const nlohmann::json& doc):
|
||||
position(record_pos), seq_id(seq_id), document(doc), indexed(true) {
|
||||
index_operation_t operation;
|
||||
bool is_update;
|
||||
|
||||
Option<bool> indexed; // indicates if the indexing operation was a success
|
||||
|
||||
index_record(size_t record_pos, uint32_t seq_id, const nlohmann::json& doc, index_operation_t operation):
|
||||
position(record_pos), seq_id(seq_id), doc(doc), operation(operation), is_update(false), indexed(false) {
|
||||
|
||||
}
|
||||
|
||||
@ -95,7 +109,7 @@ struct index_record {
|
||||
indexed = Option<bool>(err_code, err_msg);
|
||||
}
|
||||
|
||||
void index_success(const index_record & record) {
|
||||
void index_success() {
|
||||
indexed = Option<bool>(true);
|
||||
}
|
||||
};
|
||||
@ -154,32 +168,32 @@ private:
|
||||
size_t & all_result_ids_len,
|
||||
const size_t typo_tokens_threshold);
|
||||
|
||||
void insert_doc(const uint32_t score, art_tree *t, uint32_t seq_id,
|
||||
void insert_doc(const int64_t score, art_tree *t, uint32_t seq_id,
|
||||
const std::unordered_map<std::string, std::vector<uint32_t>> &token_to_offsets) const;
|
||||
|
||||
void index_string_field(const std::string & text, const uint32_t score, art_tree *t, uint32_t seq_id,
|
||||
void index_string_field(const std::string & text, const int64_t score, art_tree *t, uint32_t seq_id,
|
||||
int facet_id, const field & a_field);
|
||||
|
||||
void index_string_array_field(const std::vector<std::string> & strings, const uint32_t score, art_tree *t,
|
||||
void index_string_array_field(const std::vector<std::string> & strings, const int64_t score, art_tree *t,
|
||||
uint32_t seq_id, int facet_id, const field & a_field);
|
||||
|
||||
void index_int32_field(const int32_t value, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_int32_field(const int32_t value, const int64_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_int64_field(const int64_t value, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_int64_field(const int64_t value, const int64_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_float_field(const float value, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_float_field(const float value, const int64_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_bool_field(const bool value, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_bool_field(const bool value, const int64_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_int32_array_field(const std::vector<int32_t> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_int32_array_field(const std::vector<int32_t> & values, const int64_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_int64_array_field(const std::vector<int64_t> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_int64_array_field(const std::vector<int64_t> & values, const int64_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_float_array_field(const std::vector<float> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_float_array_field(const std::vector<float> & values, const int64_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void index_bool_array_field(const std::vector<bool> & values, const uint32_t score, art_tree *t, uint32_t seq_id) const;
|
||||
void index_bool_array_field(const std::vector<bool> & values, const int64_t score, art_tree *t, uint32_t seq_id) const;
|
||||
|
||||
void remove_and_shift_offset_index(sorted_array &offset_index, const uint32_t *indices_sorted,
|
||||
void remove_and_shift_offset_index(sorted_array& offset_index, const uint32_t* indices_sorted,
|
||||
const uint32_t indices_length);
|
||||
|
||||
uint32_t* collate_leaf_ids(const std::vector<const art_leaf *> &leaves, size_t& result_ids_len) const;
|
||||
@ -238,21 +252,22 @@ public:
|
||||
spp::sparse_hash_set<uint64_t>& groups_processed,
|
||||
const uint32_t *result_ids, const size_t result_size);
|
||||
|
||||
static int32_t get_points_from_doc(const nlohmann::json &document, const std::string & default_sorting_field);
|
||||
static int64_t get_points_from_doc(const nlohmann::json &document, const std::string & default_sorting_field);
|
||||
|
||||
Option<uint32_t> index_in_memory(const nlohmann::json & document, uint32_t seq_id,
|
||||
const std::string & default_sorting_field);
|
||||
const std::string & default_sorting_field, bool is_update);
|
||||
|
||||
static Option<uint32_t> validate_index_in_memory(const nlohmann::json &document, uint32_t seq_id,
|
||||
const std::string & default_sorting_field,
|
||||
const std::unordered_map<std::string, field> & search_schema,
|
||||
const std::map<std::string, field> & facet_schema);
|
||||
const std::map<std::string, field> & facet_schema,
|
||||
bool is_update);
|
||||
|
||||
static size_t batch_memory_index(Index *index,
|
||||
std::vector<index_record> & iter_batch,
|
||||
const std::string & default_sorting_field,
|
||||
const std::unordered_map<std::string, field> & search_schema,
|
||||
const std::map<std::string, field> & facet_schema);
|
||||
std::vector<index_record> & iter_batch,
|
||||
const std::string & default_sorting_field,
|
||||
const std::unordered_map<std::string, field> & search_schema,
|
||||
const std::map<std::string, field> & facet_schema);
|
||||
|
||||
const spp::sparse_hash_map<std::string, art_tree *> &_get_search_index() const;
|
||||
|
||||
@ -291,5 +306,10 @@ public:
|
||||
void eq_str_filter_plain(const uint32_t *strt_ids, size_t strt_ids_size,
|
||||
const std::vector<art_leaf *> &query_suggestion,
|
||||
uint32_t *exact_strt_ids, size_t& exact_strt_size) const;
|
||||
|
||||
void scrub_reindex_doc(nlohmann::json& update_doc, nlohmann::json& del_doc, nlohmann::json& old_doc);
|
||||
|
||||
void tokenize_doc_field(const nlohmann::json& document, const std::string& field_name, const field& search_field,
|
||||
std::vector<std::string>& tokens);
|
||||
};
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include "array_base.h"
|
||||
#include "logger.h"
|
||||
|
||||
class sorted_array: public array_base {
|
||||
private:
|
||||
@ -16,7 +17,15 @@ private:
|
||||
uint32_t m = std::min(min, value);
|
||||
uint32_t M = std::max(max, value);
|
||||
uint32_t bnew = required_bits(M - m);
|
||||
return METADATA_OVERHEAD + 4 + for_compressed_size_bits(new_length, bnew);
|
||||
uint32_t size_bits = for_compressed_size_bits(new_length, bnew);
|
||||
|
||||
|
||||
/*if(new_length == 15) {
|
||||
LOG(INFO) << "value: " << value << ", m: " << m << ", M: " << M << ", bnew: "
|
||||
<< bnew << ", size_bits: " << size_bits;
|
||||
}*/
|
||||
|
||||
return METADATA_OVERHEAD + 4 + size_bits;
|
||||
}
|
||||
|
||||
uint32_t lower_bound_search_bits(const uint8_t *in, uint32_t imin, uint32_t imax, uint32_t base,
|
||||
@ -39,7 +48,11 @@ public:
|
||||
void indexOf(const uint32_t *values, const size_t values_len, uint32_t* indices);
|
||||
|
||||
// returns false if malloc fails
|
||||
bool append(uint32_t value);
|
||||
size_t append(uint32_t value);
|
||||
|
||||
void remove_values(uint32_t *sorted_values, uint32_t values_length);
|
||||
bool insert(size_t index, uint32_t value);
|
||||
|
||||
void remove_value(uint32_t value);
|
||||
|
||||
void remove_values(uint32_t *sorted_values, uint32_t sorted_values_length);
|
||||
};
|
@ -199,6 +199,15 @@ struct StringUtils {
|
||||
return (*p == 0) && val >= std::numeric_limits<int32_t>::min() && val <= std::numeric_limits<int32_t>::max();
|
||||
}
|
||||
|
||||
static bool is_bool(std::string &s) {
|
||||
if(s.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
StringUtils::tolowercase(s);
|
||||
return s == "true" || s == "false";
|
||||
}
|
||||
|
||||
static void toupper(std::string& str) {
|
||||
std::transform(str.begin(), str.end(), str.begin(), ::toupper);
|
||||
}
|
||||
|
@ -41,6 +41,47 @@ bool array::append(uint32_t value) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void array::load(const uint32_t *sorted_array, const uint32_t array_length, const uint32_t m, const uint32_t M) {
|
||||
min = m;
|
||||
max = M;
|
||||
|
||||
uint32_t size_required = (uint32_t) (unsorted_append_size_required(max, array_length) * FOR_GROWTH_FACTOR);
|
||||
uint8_t *out = (uint8_t *) malloc(size_required * sizeof *out);
|
||||
uint32_t actual_size = for_compress_unsorted(sorted_array, out, array_length);
|
||||
|
||||
free(in);
|
||||
in = nullptr;
|
||||
|
||||
in = out;
|
||||
length = array_length;
|
||||
size_bytes = size_required;
|
||||
length_bytes = actual_size;
|
||||
}
|
||||
|
||||
bool array::insert(size_t index, const uint32_t* values, size_t num_values) {
|
||||
if(index >= length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t *curr_array = uncompress(length+num_values);
|
||||
memmove(&curr_array[index+num_values], &curr_array[index], sizeof(uint32_t)*(length-index));
|
||||
|
||||
uint32_t m = min, M = max;
|
||||
|
||||
for(size_t i=0; i<num_values; i++) {
|
||||
uint32_t value = values[i];
|
||||
if(value < m) m = value;
|
||||
if(value > M) M = value;
|
||||
curr_array[index+i] = value;
|
||||
}
|
||||
|
||||
load(curr_array, length+num_values, m, M);
|
||||
|
||||
delete [] curr_array;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void array::remove_index(uint32_t start_index, uint32_t end_index) {
|
||||
uint32_t *curr_array = uncompress();
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
#include "array_base.h"
|
||||
|
||||
uint32_t* array_base::uncompress() {
|
||||
uint32_t *out = new uint32_t[length];
|
||||
uint32_t* array_base::uncompress(uint32_t len) {
|
||||
uint32_t actual_len = std::max(len, length);
|
||||
uint32_t *out = new uint32_t[actual_len];
|
||||
for_uncompress(in, out, length);
|
||||
return out;
|
||||
}
|
||||
|
39
src/art.cpp
39
src/art.cpp
@ -39,6 +39,8 @@ static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *
|
||||
void art_int_fuzzy_recurse(art_node *n, int depth, const unsigned char* int_str, int int_str_len,
|
||||
NUM_COMPARATOR comparator, std::vector<const art_leaf *> &results);
|
||||
|
||||
static void insert_and_shift_offset_index(sorted_array& offset_index, const uint32_t index, const uint32_t num_offsets);
|
||||
|
||||
bool compare_art_leaf_frequency(const art_leaf *a, const art_leaf *b) {
|
||||
return a->values->ids.getLength() > b->values->ids.getLength();
|
||||
}
|
||||
@ -408,15 +410,42 @@ art_leaf* art_maximum(art_tree *t) {
|
||||
|
||||
static void add_document_to_leaf(const art_document *document, art_leaf *leaf) {
|
||||
leaf->max_score = MAX(leaf->max_score, document->score);
|
||||
leaf->values->ids.append(document->id);
|
||||
uint32_t curr_index = leaf->values->offsets.getLength();
|
||||
leaf->values->offset_index.append(curr_index);
|
||||
size_t inserted_index = leaf->values->ids.append(document->id);
|
||||
|
||||
for(uint32_t i=0; i<document->offsets_len; i++) {
|
||||
leaf->values->offsets.append(document->offsets[i]);
|
||||
if(inserted_index == leaf->values->ids.getLength()-1) {
|
||||
// treat as appends
|
||||
uint32_t curr_index = leaf->values->offsets.getLength();
|
||||
leaf->values->offset_index.append(curr_index);
|
||||
for(uint32_t i=0; i<document->offsets_len; i++) {
|
||||
leaf->values->offsets.append(document->offsets[i]);
|
||||
}
|
||||
} else {
|
||||
uint32_t existing_offset_index = leaf->values->offset_index.at(inserted_index);
|
||||
insert_and_shift_offset_index(leaf->values->offset_index, inserted_index, document->offsets_len);
|
||||
leaf->values->offsets.insert(existing_offset_index, document->offsets, document->offsets_len);
|
||||
}
|
||||
}
|
||||
|
||||
void insert_and_shift_offset_index(sorted_array& offset_index, const uint32_t index, const uint32_t num_offsets) {
|
||||
uint32_t existing_offset_index = offset_index.at(index);
|
||||
uint32_t length = offset_index.getLength();
|
||||
uint32_t new_length = length + 1;
|
||||
uint32_t *curr_array = offset_index.uncompress(new_length);
|
||||
|
||||
memmove(&curr_array[index+1], &curr_array[index], sizeof(uint32_t)*(length - index));
|
||||
curr_array[index] = existing_offset_index;
|
||||
|
||||
uint32_t curr_index = index + 1;
|
||||
while(curr_index < new_length) {
|
||||
curr_array[curr_index] += num_offsets;
|
||||
curr_index++;
|
||||
}
|
||||
|
||||
offset_index.load(curr_array, new_length);
|
||||
|
||||
delete [] curr_array;
|
||||
}
|
||||
|
||||
static art_leaf* make_leaf(const unsigned char *key, uint32_t key_len, art_document *document) {
|
||||
art_leaf *l = (art_leaf *) malloc(sizeof(art_leaf) + key_len);
|
||||
l->values = new art_values;
|
||||
|
@ -5,7 +5,7 @@ constexpr const char* AuthManager::DOCUMENTS_SEARCH_ACTION;
|
||||
|
||||
Option<bool> AuthManager::init(Store *store) {
|
||||
// This function must be idempotent, i.e. when called multiple times, must produce the same state without leaks
|
||||
LOG(INFO) << "AuthManager::init()";
|
||||
//LOG(INFO) << "AuthManager::init()";
|
||||
|
||||
this->store = store;
|
||||
|
||||
@ -157,7 +157,7 @@ bool AuthManager::authenticate(const std::string& req_api_key, const std::string
|
||||
}
|
||||
|
||||
// enrich params with values from embedded_params
|
||||
for (const auto& it: embedded_params.items()){
|
||||
for(auto it = embedded_params.begin(); it != embedded_params.end(); ++it) {
|
||||
if(params.count(it.key()) == 0) {
|
||||
params[it.key()] = it.value();
|
||||
} else if(it.key() == "filter_by") {
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <art.h>
|
||||
#include <thread>
|
||||
#include <future>
|
||||
#include <chrono>
|
||||
#include <rocksdb/write_batch.h>
|
||||
#include <system_metrics.h>
|
||||
#include "topster.h"
|
||||
@ -99,33 +98,75 @@ void Collection::increment_next_seq_id_field() {
|
||||
next_seq_id++;
|
||||
}
|
||||
|
||||
Option<uint32_t> Collection::to_doc(const std::string & json_str, nlohmann::json & document) {
|
||||
Option<doc_seq_id_t> Collection::to_doc(const std::string & json_str, nlohmann::json& document,
|
||||
const index_operation_t& operation, const std::string& id) {
|
||||
try {
|
||||
document = nlohmann::json::parse(json_str);
|
||||
} catch(const std::exception& e) {
|
||||
LOG(ERROR) << "JSON error: " << e.what();
|
||||
return Option<uint32_t>(400, std::string("Bad JSON: ") + e.what());
|
||||
return Option<doc_seq_id_t>(400, std::string("Bad JSON: ") + e.what());
|
||||
}
|
||||
|
||||
if(!document.is_object()) {
|
||||
return Option<uint32_t>(400, "Bad JSON: not a properly formed document.");
|
||||
return Option<doc_seq_id_t>(400, "Bad JSON: not a properly formed document.");
|
||||
}
|
||||
|
||||
uint32_t seq_id = get_next_seq_id();
|
||||
std::string seq_id_str = std::to_string(seq_id);
|
||||
if(document.count("id") != 0 && id != "" && document["id"] != id) {
|
||||
return Option<doc_seq_id_t>(400, "The `id` of the resource does not match the `id` in the JSON body.");
|
||||
}
|
||||
|
||||
if(document.count("id") == 0 && !id.empty()) {
|
||||
// use the explicit ID (usually from a PUT request) if document body does not have it
|
||||
document["id"] = id;
|
||||
}
|
||||
|
||||
if(document.count("id") != 0 && document["id"] == "") {
|
||||
return Option<doc_seq_id_t>(400, "The `id` should not be empty.");
|
||||
}
|
||||
|
||||
if(document.count("id") == 0) {
|
||||
document["id"] = seq_id_str;
|
||||
} else if(!document["id"].is_string()) {
|
||||
return Option<uint32_t>(400, "Document's `id` field should be a string.");
|
||||
}
|
||||
if(operation == UPDATE) {
|
||||
return Option<doc_seq_id_t>(400, "For update, the `id` key must be provided.");
|
||||
}
|
||||
// for UPSERT or CREATE, if a document does not have an ID, we will treat it as a new doc
|
||||
uint32_t seq_id = get_next_seq_id();
|
||||
document["id"] = std::to_string(seq_id);
|
||||
return Option<doc_seq_id_t>(doc_seq_id_t{seq_id, true});
|
||||
} else {
|
||||
if(!document["id"].is_string()) {
|
||||
return Option<doc_seq_id_t>(400, "Document's `id` field should be a string.");
|
||||
}
|
||||
|
||||
const std::string& doc_id = document["id"];
|
||||
if(doc_exists(doc_id)) {
|
||||
return Option<uint32_t>(409, std::string("A document with id ") + doc_id + " already exists.");
|
||||
}
|
||||
const std::string& doc_id = document["id"];
|
||||
|
||||
return Option<uint32_t>(seq_id);
|
||||
// try to get the corresponding sequence id from disk if present
|
||||
std::string seq_id_str;
|
||||
StoreStatus seq_id_status = store->get(get_doc_id_key(doc_id), seq_id_str);
|
||||
|
||||
if(seq_id_status == StoreStatus::ERROR) {
|
||||
return Option<doc_seq_id_t>(500, "Error fetching the sequence key for document with id: " + doc_id);
|
||||
}
|
||||
|
||||
if(seq_id_status == StoreStatus::FOUND) {
|
||||
if(operation == CREATE) {
|
||||
return Option<doc_seq_id_t>(409, std::string("A document with id ") + doc_id + " already exists.");
|
||||
}
|
||||
|
||||
// UPSERT or UPDATE
|
||||
uint32_t seq_id = (uint32_t) std::stoul(seq_id_str);
|
||||
return Option<doc_seq_id_t>(doc_seq_id_t{seq_id, false});
|
||||
|
||||
} else {
|
||||
if(operation == UPDATE) {
|
||||
// for UPDATE, a document with given ID must be found
|
||||
return Option<doc_seq_id_t>(404, "Could not find a document with id: " + doc_id);
|
||||
} else {
|
||||
// for UPSERT or CREATE, if a document with given ID is not found, we will treat it as a new doc
|
||||
uint32_t seq_id = get_next_seq_id();
|
||||
return Option<doc_seq_id_t>(doc_seq_id_t{seq_id, true});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nlohmann::json Collection::get_summary_json() {
|
||||
@ -152,45 +193,48 @@ nlohmann::json Collection::get_summary_json() {
|
||||
return json_response;
|
||||
}
|
||||
|
||||
Option<nlohmann::json> Collection::add(const std::string & json_str) {
|
||||
Option<nlohmann::json> Collection::add(const std::string & json_str,
|
||||
const index_operation_t& operation, const std::string& id) {
|
||||
nlohmann::json document;
|
||||
Option<uint32_t> doc_seq_id_op = to_doc(json_str, document);
|
||||
std::vector<std::string> json_lines = {json_str};
|
||||
const nlohmann::json& res = add_many(json_lines, document, operation, id);
|
||||
|
||||
if(!doc_seq_id_op.ok()) {
|
||||
return Option<nlohmann::json>(doc_seq_id_op.code(), doc_seq_id_op.error());
|
||||
}
|
||||
if(!res["success"].get<bool>()) {
|
||||
nlohmann::json res_doc;
|
||||
|
||||
/*if(is_exceeding_memory_threshold()) {
|
||||
return Option<nlohmann::json>(403, "Max memory ratio exceeded.");
|
||||
}*/
|
||||
try {
|
||||
res_doc = nlohmann::json::parse(json_lines[0]);
|
||||
} catch(const std::exception& e) {
|
||||
LOG(ERROR) << "JSON error: " << e.what();
|
||||
return Option<nlohmann::json>(400, std::string("Bad JSON: ") + e.what());
|
||||
}
|
||||
|
||||
const uint32_t seq_id = doc_seq_id_op.get();
|
||||
const std::string seq_id_str = std::to_string(seq_id);
|
||||
|
||||
const Option<uint32_t> & index_memory_op = index_in_memory(document, seq_id);
|
||||
|
||||
if(!index_memory_op.ok()) {
|
||||
return Option<nlohmann::json>(index_memory_op.code(), index_memory_op.error());
|
||||
}
|
||||
|
||||
const std::string& serialized_json = document.dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore);
|
||||
|
||||
rocksdb::WriteBatch batch;
|
||||
batch.Put(get_doc_id_key(document["id"]), seq_id_str);
|
||||
batch.Put(get_seq_id_key(seq_id), serialized_json);
|
||||
bool write_ok = store->batch_write(batch);
|
||||
|
||||
if(!write_ok) {
|
||||
remove_document(document, seq_id, false); // remove from in-memory store too
|
||||
return Option<nlohmann::json>(500, "Could not write to on-disk storage.");
|
||||
return Option<nlohmann::json>(res_doc["code"].get<size_t>(), res_doc["error"].get<std::string>());
|
||||
}
|
||||
|
||||
return Option<nlohmann::json>(document);
|
||||
}
|
||||
|
||||
nlohmann::json Collection::add_many(std::vector<std::string>& json_lines) {
|
||||
//LOG(INFO) << "Memory ratio. Max = " << max_memory_ratio << ", Used = " << SystemMetrics::used_memory_ratio();
|
||||
void Collection::get_doc_changes(const nlohmann::json &document, nlohmann::json &old_doc,
|
||||
nlohmann::json &new_doc, nlohmann::json &del_doc) {
|
||||
|
||||
for(auto it = old_doc.begin(); it != old_doc.end(); ++it) {
|
||||
new_doc[it.key()] = it.value();
|
||||
}
|
||||
|
||||
for(auto it = document.begin(); it != document.end(); ++it) {
|
||||
new_doc[it.key()] = it.value();
|
||||
if(old_doc.count(it.key()) != 0) {
|
||||
// key exists in the stored doc, so it must be reindexed
|
||||
// we need to check for this because a field can be optional
|
||||
del_doc[it.key()] = old_doc[it.key()];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nlohmann::json Collection::add_many(std::vector<std::string>& json_lines, nlohmann::json& document,
|
||||
const index_operation_t& operation, const std::string& id) {
|
||||
//LOG(INFO) << "Memory ratio. Max = " << max_memory_ratio << ", Used = " << SystemMetrics::used_memory_ratio();
|
||||
std::vector<std::vector<index_record>> iter_batch;
|
||||
|
||||
for(size_t i = 0; i < num_memory_shards; i++) {
|
||||
@ -203,16 +247,23 @@ nlohmann::json Collection::add_many(std::vector<std::string>& json_lines) {
|
||||
|
||||
for(size_t i=0; i < json_lines.size(); i++) {
|
||||
const std::string & json_line = json_lines[i];
|
||||
nlohmann::json document;
|
||||
Option<uint32_t> doc_seq_id_op = to_doc(json_line, document);
|
||||
Option<doc_seq_id_t> doc_seq_id_op = to_doc(json_line, document, operation, id);
|
||||
|
||||
const uint32_t seq_id = doc_seq_id_op.ok() ? doc_seq_id_op.get() : 0;
|
||||
index_record record(i, seq_id, document);
|
||||
const uint32_t seq_id = doc_seq_id_op.ok() ? doc_seq_id_op.get().seq_id : 0;
|
||||
index_record record(i, seq_id, document, operation);
|
||||
|
||||
// NOTE: we overwrite the input json_lines with result to avoid memory pressure
|
||||
|
||||
record.is_update = false;
|
||||
|
||||
if(!doc_seq_id_op.ok()) {
|
||||
record.index_failure(doc_seq_id_op.code(), doc_seq_id_op.error());
|
||||
} else {
|
||||
record.is_update = !doc_seq_id_op.get().is_new;
|
||||
if(record.is_update) {
|
||||
get_document_from_store(get_seq_id_key(seq_id), record.old_doc);
|
||||
get_doc_changes(document, record.old_doc, record.new_doc, record.del_doc);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -261,45 +312,74 @@ void Collection::batch_index(std::vector<std::vector<index_record>> &index_batch
|
||||
// store only documents that were indexed in-memory successfully
|
||||
for(auto& index_batch: index_batches) {
|
||||
for(auto& index_record: index_batch) {
|
||||
nlohmann::json res;
|
||||
|
||||
if(index_record.indexed.ok()) {
|
||||
const std::string& seq_id_str = std::to_string(index_record.seq_id);
|
||||
const std::string& serialized_json = index_record.document.dump(-1, ' ', false,
|
||||
nlohmann::detail::error_handler_t::ignore);
|
||||
if(index_record.is_update) {
|
||||
const std::string& serialized_json = index_record.new_doc.dump(-1, ' ', false, nlohmann::detail::error_handler_t::ignore);
|
||||
bool write_ok = store->insert(get_seq_id_key(index_record.seq_id), serialized_json);
|
||||
|
||||
rocksdb::WriteBatch batch;
|
||||
batch.Put(get_doc_id_key(index_record.document["id"]), seq_id_str);
|
||||
batch.Put(get_seq_id_key(index_record.seq_id), serialized_json);
|
||||
bool write_ok = store->batch_write(batch);
|
||||
if(!write_ok) {
|
||||
// we will attempt to reindex the old doc on a best-effort basis
|
||||
remove_document(index_record.new_doc, index_record.seq_id, false);
|
||||
index_in_memory(index_record.old_doc, index_record.seq_id, false);
|
||||
index_record.index_failure(500, "Could not write to on-disk storage.");
|
||||
} else {
|
||||
num_indexed++;
|
||||
index_record.index_success();
|
||||
}
|
||||
|
||||
if(!write_ok) {
|
||||
index_record.indexed = Option<bool>(500, "Could not write to on-disk storage.");;
|
||||
// remove from in-memory store to keep the state synced
|
||||
remove_document(index_record.document, index_record.seq_id, false);
|
||||
} else {
|
||||
const std::string& seq_id_str = std::to_string(index_record.seq_id);
|
||||
const std::string& serialized_json = index_record.doc.dump(-1, ' ', false,
|
||||
nlohmann::detail::error_handler_t::ignore);
|
||||
|
||||
rocksdb::WriteBatch batch;
|
||||
batch.Put(get_doc_id_key(index_record.doc["id"]), seq_id_str);
|
||||
batch.Put(get_seq_id_key(index_record.seq_id), serialized_json);
|
||||
bool write_ok = store->batch_write(batch);
|
||||
|
||||
if(!write_ok) {
|
||||
// remove from in-memory store to keep the state synced
|
||||
remove_document(index_record.doc, index_record.seq_id, false);
|
||||
index_record.index_failure(500, "Could not write to on-disk storage.");
|
||||
} else {
|
||||
num_indexed++;
|
||||
index_record.index_success();
|
||||
}
|
||||
}
|
||||
|
||||
json_out[index_record.position] = R"({"success": true})";
|
||||
num_indexed++;
|
||||
res["success"] = index_record.indexed.ok();
|
||||
if(!index_record.indexed.ok()) {
|
||||
res["document"] = json_out[index_record.position];
|
||||
res["error"] = index_record.indexed.error();
|
||||
res["code"] = index_record.indexed.code();
|
||||
}
|
||||
} else {
|
||||
nlohmann::json res;
|
||||
res["success"] = false;
|
||||
res["error"] = index_record.indexed.error();
|
||||
res["document"] = json_out[index_record.position];
|
||||
json_out[index_record.position] = res.dump();
|
||||
res["error"] = index_record.indexed.error();
|
||||
res["code"] = index_record.indexed.code();
|
||||
}
|
||||
|
||||
json_out[index_record.position] = res.dump();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uint32_t seq_id) {
|
||||
Option<uint32_t> validation_op = Index::validate_index_in_memory(document, seq_id, default_sorting_field,
|
||||
search_schema, facet_schema);
|
||||
Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uint32_t seq_id, bool is_update) {
|
||||
if(!is_update) {
|
||||
// for update, validation should be done prior
|
||||
Option<uint32_t> validation_op = Index::validate_index_in_memory(document, seq_id, default_sorting_field,
|
||||
search_schema, facet_schema, is_update);
|
||||
|
||||
if(!validation_op.ok()) {
|
||||
return validation_op;
|
||||
if(!validation_op.ok()) {
|
||||
return validation_op;
|
||||
}
|
||||
}
|
||||
|
||||
Index* index = indices[seq_id % num_memory_shards];
|
||||
index->index_in_memory(document, seq_id, default_sorting_field);
|
||||
index->index_in_memory(document, seq_id, default_sorting_field, is_update);
|
||||
|
||||
num_documents += 1;
|
||||
return Option<>(200);
|
||||
@ -418,6 +498,7 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
const size_t max_facet_values,
|
||||
const std::string & simple_facet_query,
|
||||
const size_t snippet_threshold,
|
||||
const size_t highlight_affix_num_tokens,
|
||||
const std::string & highlight_full_fields,
|
||||
size_t typo_tokens_threshold,
|
||||
const std::map<size_t, std::vector<std::string>>& pinned_hits,
|
||||
@ -992,7 +1073,8 @@ Option<nlohmann::json> Collection::search(const std::string & query, const std::
|
||||
bool highlighted_fully = (fields_highlighted_fully.find(field_name) != fields_highlighted_fully.end());
|
||||
highlight_t highlight;
|
||||
highlight_result(search_field, searched_queries, field_order_kv, document,
|
||||
string_utils, snippet_threshold, highlighted_fully, highlight);
|
||||
string_utils, snippet_threshold, highlight_affix_num_tokens,
|
||||
highlighted_fully, highlight);
|
||||
|
||||
if(!highlight.snippets.empty()) {
|
||||
highlights.push_back(highlight);
|
||||
@ -1238,7 +1320,9 @@ void Collection::facet_value_to_string(const facet &a_facet, const facet_count_t
|
||||
void Collection::highlight_result(const field &search_field,
|
||||
const std::vector<std::vector<art_leaf *>> &searched_queries,
|
||||
const KV* field_order_kv, const nlohmann::json & document,
|
||||
StringUtils & string_utils, size_t snippet_threshold,
|
||||
StringUtils & string_utils,
|
||||
const size_t snippet_threshold,
|
||||
const size_t highlight_affix_num_tokens,
|
||||
bool highlighted_fully,
|
||||
highlight_t & highlight) {
|
||||
|
||||
@ -1316,6 +1400,10 @@ void Collection::highlight_result(const field &search_field,
|
||||
if(match.offsets[i].offset != MAX_DISPLACEMENT) {
|
||||
size_t token_index = (size_t)(match.offsets[i].offset);
|
||||
token_indices.push_back(token_index);
|
||||
if(token_index >= tokens.size()) {
|
||||
LOG(ERROR) << "Highlight token index " << token_index << " is greater than length of store field.";
|
||||
continue;
|
||||
}
|
||||
std::string token = tokens[token_index];
|
||||
string_utils.unicode_normalize(token);
|
||||
token_hits.insert(token);
|
||||
@ -1324,12 +1412,15 @@ void Collection::highlight_result(const field &search_field,
|
||||
|
||||
auto minmax = std::minmax_element(token_indices.begin(), token_indices.end());
|
||||
|
||||
size_t prefix_length = highlight_affix_num_tokens;
|
||||
size_t suffix_length = highlight_affix_num_tokens + 1;
|
||||
|
||||
// For longer strings, pick surrounding tokens within 4 tokens of min_index and max_index for the snippet
|
||||
const size_t start_index = (tokens.size() <= snippet_threshold) ? 0 :
|
||||
std::max(0, (int)(*(minmax.first) - 4));
|
||||
std::max(0, (int)(*(minmax.first) - prefix_length));
|
||||
|
||||
const size_t end_index = (tokens.size() <= snippet_threshold) ? tokens.size() :
|
||||
std::min((int)tokens.size(), (int)(*(minmax.second) + 5));
|
||||
std::min((int)tokens.size(), (int)(*(minmax.second) + suffix_length));
|
||||
|
||||
std::stringstream snippet_stream;
|
||||
for(size_t snippet_index = start_index; snippet_index < end_index; snippet_index++) {
|
||||
@ -1401,7 +1492,7 @@ Option<nlohmann::json> Collection::get(const std::string & id) {
|
||||
return Option<nlohmann::json>(500, "Error while fetching the document.");
|
||||
}
|
||||
|
||||
uint32_t seq_id = (uint32_t) std::stol(seq_id_str);
|
||||
uint32_t seq_id = (uint32_t) std::stoul(seq_id_str);
|
||||
|
||||
std::string parsed_document;
|
||||
StoreStatus doc_status = store->get(get_seq_id_key(seq_id), parsed_document);
|
||||
@ -1450,7 +1541,7 @@ Option<std::string> Collection::remove(const std::string & id, const bool remove
|
||||
return Option<std::string>(500, "Error while fetching the document.");
|
||||
}
|
||||
|
||||
uint32_t seq_id = (uint32_t) std::stol(seq_id_str);
|
||||
uint32_t seq_id = (uint32_t) std::stoul(seq_id_str);
|
||||
|
||||
std::string parsed_document;
|
||||
StoreStatus doc_status = store->get(get_seq_id_key(seq_id), parsed_document);
|
||||
|
@ -173,7 +173,7 @@ Option<bool> CollectionManager::load(const size_t init_batch_size) {
|
||||
}
|
||||
|
||||
num_valid_docs++;
|
||||
iter_batch[seq_id % collection->get_num_memory_shards()].emplace_back(index_record(0, seq_id, document));
|
||||
iter_batch[seq_id % collection->get_num_memory_shards()].emplace_back(index_record(0, seq_id, document, CREATE));
|
||||
|
||||
// Peek and check for last record right here so that we handle batched indexing correctly
|
||||
// Without doing this, the "last batch" would have to be indexed outside the loop.
|
||||
@ -195,7 +195,7 @@ Option<bool> CollectionManager::load(const size_t init_batch_size) {
|
||||
|
||||
if(num_indexed != num_records) {
|
||||
const Option<std::string> & index_error_op = get_first_index_error(iter_batch[i]);
|
||||
if(index_error_op.ok()) {
|
||||
if(!index_error_op.ok()) {
|
||||
return Option<bool>(false, index_error_op.get());
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,18 @@ bool handle_authentication(std::map<std::string, std::string>& req_params, const
|
||||
return collectionManager.auth_key_matches(auth_key, rpath.action, collection, req_params);
|
||||
}
|
||||
|
||||
index_operation_t get_index_operation(const std::string& action) {
|
||||
if(action == "create") {
|
||||
return CREATE;
|
||||
} else if(action == "update") {
|
||||
return UPDATE;
|
||||
} else if(action == "upsert") {
|
||||
return UPSERT;
|
||||
}
|
||||
|
||||
return CREATE;
|
||||
}
|
||||
|
||||
bool get_collections(http_req & req, http_res & res) {
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
std::vector<Collection*> collections = collectionManager.get_collections();
|
||||
@ -254,6 +266,9 @@ bool get_search(http_req & req, http_res & res) {
|
||||
// strings under this length will be fully highlighted, instead of showing a snippet of relevant portion
|
||||
const char *SNIPPET_THRESHOLD = "snippet_threshold";
|
||||
|
||||
// the number of tokens that should surround the highlighted text
|
||||
const char *HIGHLIGHT_AFFIX_NUM_TOKENS = "highlight_affix_num_tokens";
|
||||
|
||||
// list of fields which will be highlighted fully without snippeting
|
||||
const char *HIGHLIGHT_FULL_FIELDS = "highlight_full_fields";
|
||||
|
||||
@ -290,6 +305,10 @@ bool get_search(http_req & req, http_res & res) {
|
||||
req.params[SNIPPET_THRESHOLD] = "30";
|
||||
}
|
||||
|
||||
if(req.params.count(HIGHLIGHT_AFFIX_NUM_TOKENS) == 0) {
|
||||
req.params[HIGHLIGHT_AFFIX_NUM_TOKENS] = "4";
|
||||
}
|
||||
|
||||
if(req.params.count(HIGHLIGHT_FULL_FIELDS) == 0) {
|
||||
req.params[HIGHLIGHT_FULL_FIELDS] = "";
|
||||
}
|
||||
@ -362,6 +381,11 @@ bool get_search(http_req & req, http_res & res) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!StringUtils::is_uint32_t(req.params[HIGHLIGHT_AFFIX_NUM_TOKENS])) {
|
||||
res.set_400("Parameter `" + std::string(HIGHLIGHT_AFFIX_NUM_TOKENS) + "` must be an unsigned integer.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!StringUtils::is_uint32_t(req.params[GROUP_LIMIT])) {
|
||||
res.set_400("Parameter `" + std::string(GROUP_LIMIT) + "` must be an unsigned integer.");
|
||||
return false;
|
||||
@ -474,6 +498,7 @@ bool get_search(http_req & req, http_res & res) {
|
||||
static_cast<size_t>(std::stol(req.params[MAX_FACET_VALUES])),
|
||||
req.params[FACET_QUERY],
|
||||
static_cast<size_t>(std::stol(req.params[SNIPPET_THRESHOLD])),
|
||||
static_cast<size_t>(std::stol(req.params[HIGHLIGHT_AFFIX_NUM_TOKENS])),
|
||||
req.params[HIGHLIGHT_FULL_FIELDS],
|
||||
typo_tokens_threshold,
|
||||
pinned_hits,
|
||||
@ -579,11 +604,16 @@ bool post_import_documents(http_req& req, http_res& res) {
|
||||
//LOG(INFO) << "post_import_documents";
|
||||
//LOG(INFO) << "req.first_chunk=" << req.first_chunk_aggregate << ", last_chunk=" << req.last_chunk_aggregate;
|
||||
const char *BATCH_SIZE = "batch_size";
|
||||
const char *ACTION = "action";
|
||||
|
||||
if(req.params.count(BATCH_SIZE) == 0) {
|
||||
req.params[BATCH_SIZE] = "40";
|
||||
}
|
||||
|
||||
if(req.params.count(ACTION) == 0) {
|
||||
req.params[ACTION] = "create";
|
||||
}
|
||||
|
||||
if(!StringUtils::is_uint32_t(req.params[BATCH_SIZE])) {
|
||||
req.last_chunk_aggregate = true;
|
||||
res.final = true;
|
||||
@ -592,6 +622,14 @@ bool post_import_documents(http_req& req, http_res& res) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if(req.params[ACTION] != "create" && req.params[ACTION] != "update" && req.params[ACTION] != "upsert") {
|
||||
req.last_chunk_aggregate = true;
|
||||
res.final = true;
|
||||
res.set_400("Parameter `" + std::string(ACTION) + "` must be a create|update|upsert.");
|
||||
HttpServer::stream_response(req, res);
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t IMPORT_BATCH_SIZE = std::stoi(req.params[BATCH_SIZE]);
|
||||
|
||||
if(IMPORT_BATCH_SIZE == 0) {
|
||||
@ -667,8 +705,11 @@ bool post_import_documents(http_req& req, http_res& res) {
|
||||
|
||||
//LOG(INFO) << "single_partial_record_body: " << single_partial_record_body;
|
||||
|
||||
const index_operation_t operation = get_index_operation(req.params[ACTION]);
|
||||
|
||||
if(!single_partial_record_body) {
|
||||
nlohmann::json json_res = collection->add_many(json_lines);
|
||||
nlohmann::json document;
|
||||
nlohmann::json json_res = collection->add_many(json_lines, document, operation);
|
||||
//const std::string& import_summary_json = json_res.dump();
|
||||
//response_stream << import_summary_json << "\n";
|
||||
|
||||
@ -698,6 +739,16 @@ bool post_import_documents(http_req& req, http_res& res) {
|
||||
}
|
||||
|
||||
bool post_add_document(http_req & req, http_res & res) {
|
||||
const char *ACTION = "action";
|
||||
if(req.params.count(ACTION) == 0) {
|
||||
req.params[ACTION] = "create";
|
||||
}
|
||||
|
||||
if(req.params[ACTION] != "create" && req.params[ACTION] != "update" && req.params[ACTION] != "upsert") {
|
||||
res.set_400("Parameter `" + std::string(ACTION) + "` must be a create|update|upsert.");
|
||||
return false;
|
||||
}
|
||||
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
Collection* collection = collectionManager.get_collection(req.params["collection"]);
|
||||
|
||||
@ -706,7 +757,8 @@ bool post_add_document(http_req & req, http_res & res) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Option<nlohmann::json> inserted_doc_op = collection->add(req.body);
|
||||
const index_operation_t operation = get_index_operation(req.params[ACTION]);
|
||||
Option<nlohmann::json> inserted_doc_op = collection->add(req.body, operation);
|
||||
|
||||
if(!inserted_doc_op.ok()) {
|
||||
res.set(inserted_doc_op.code(), inserted_doc_op.error());
|
||||
@ -717,6 +769,28 @@ bool post_add_document(http_req & req, http_res & res) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool patch_update_document(http_req & req, http_res & res) {
|
||||
std::string doc_id = req.params["id"];
|
||||
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
Collection* collection = collectionManager.get_collection(req.params["collection"]);
|
||||
|
||||
if(collection == nullptr) {
|
||||
res.set_404();
|
||||
return false;
|
||||
}
|
||||
|
||||
Option<nlohmann::json> upserted_doc_op = collection->add(req.body, index_operation_t::UPDATE, doc_id);
|
||||
|
||||
if(!upserted_doc_op.ok()) {
|
||||
res.set(upserted_doc_op.code(), upserted_doc_op.error());
|
||||
return false;
|
||||
}
|
||||
|
||||
res.set_201(upserted_doc_op.get().dump());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool get_fetch_document(http_req & req, http_res & res) {
|
||||
std::string doc_id = req.params["id"];
|
||||
|
||||
@ -1044,7 +1118,7 @@ bool get_key(http_req &req, http_res &res) {
|
||||
AuthManager &auth_manager = collectionManager.getAuthManager();
|
||||
|
||||
const std::string& key_id_str = req.params["id"];
|
||||
uint32_t key_id = (uint32_t) std::stol(key_id_str);
|
||||
uint32_t key_id = (uint32_t) std::stoul(key_id_str);
|
||||
|
||||
const Option<api_key_t>& key_op = auth_manager.get_key(key_id);
|
||||
|
||||
@ -1066,7 +1140,7 @@ bool del_key(http_req &req, http_res &res) {
|
||||
AuthManager &auth_manager = collectionManager.getAuthManager();
|
||||
|
||||
const std::string& key_id_str = req.params["id"];
|
||||
uint32_t key_id = (uint32_t) std::stol(key_id_str);
|
||||
uint32_t key_id = (uint32_t) std::stoul(key_id_str);
|
||||
|
||||
const Option<api_key_t> &del_op = auth_manager.remove_key(key_id);
|
||||
|
||||
|
@ -129,6 +129,7 @@ int HttpServer::create_listener() {
|
||||
ctx.globalconf->server_name = h2o_strdup(nullptr, "", SIZE_MAX);
|
||||
ctx.globalconf->http2.active_stream_window_size = ACTIVE_STREAM_WINDOW_SIZE;
|
||||
ctx.globalconf->http2.idle_timeout = REQ_TIMEOUT_MS;
|
||||
ctx.globalconf->max_request_entity_size = (1024 * 1024 * 1024); // 1 GB
|
||||
|
||||
ctx.globalconf->http1.req_timeout = REQ_TIMEOUT_MS;
|
||||
ctx.globalconf->http1.req_io_timeout = REQ_TIMEOUT_MS;
|
||||
@ -705,6 +706,13 @@ void HttpServer::put(const std::string & path, bool (*handler)(http_req &, http_
|
||||
routes.emplace_back(rpath.route_hash(), rpath);
|
||||
}
|
||||
|
||||
void HttpServer::patch(const std::string & path, bool (*handler)(http_req &, http_res &), bool async_req, bool async_res) {
|
||||
std::vector<std::string> path_parts;
|
||||
StringUtils::split(path, path_parts, "/");
|
||||
route_path rpath("PATCH", path_parts, handler, async_req, async_res);
|
||||
routes.emplace_back(rpath.route_hash(), rpath);
|
||||
}
|
||||
|
||||
void HttpServer::del(const std::string & path, bool (*handler)(http_req &, http_res &), bool async_req, bool async_res) {
|
||||
std::vector<std::string> path_parts;
|
||||
StringUtils::split(path, path_parts, "/");
|
||||
|
327
src/index.cpp
327
src/index.cpp
@ -56,8 +56,8 @@ Index::~Index() {
|
||||
sort_index.clear();
|
||||
}
|
||||
|
||||
int32_t Index::get_points_from_doc(const nlohmann::json &document, const std::string & default_sorting_field) {
|
||||
int32_t points = 0;
|
||||
int64_t Index::get_points_from_doc(const nlohmann::json &document, const std::string & default_sorting_field) {
|
||||
int64_t points = 0;
|
||||
|
||||
if(!default_sorting_field.empty()) {
|
||||
if(document[default_sorting_field].is_number_float()) {
|
||||
@ -85,8 +85,15 @@ int64_t Index::float_to_in64_t(float f) {
|
||||
}
|
||||
|
||||
Option<uint32_t> Index::index_in_memory(const nlohmann::json &document, uint32_t seq_id,
|
||||
const std::string & default_sorting_field) {
|
||||
int32_t points = get_points_from_doc(document, default_sorting_field);
|
||||
const std::string & default_sorting_field, bool is_update) {
|
||||
|
||||
int64_t points = 0;
|
||||
|
||||
if(is_update && document.count(default_sorting_field) == 0) {
|
||||
points = sort_index[default_sorting_field]->at(seq_id);
|
||||
} else {
|
||||
points = get_points_from_doc(document, default_sorting_field);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, size_t> facet_to_id;
|
||||
size_t i_facet = 0;
|
||||
@ -104,7 +111,7 @@ Option<uint32_t> Index::index_in_memory(const nlohmann::json &document, uint32_t
|
||||
for(const std::pair<std::string, field> & field_pair: search_schema) {
|
||||
const std::string & field_name = field_pair.first;
|
||||
|
||||
if(field_pair.second.optional && document.count(field_name) == 0) {
|
||||
if((field_pair.second.optional || is_update) && document.count(field_name) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -212,17 +219,22 @@ Option<uint32_t> Index::index_in_memory(const nlohmann::json &document, uint32_t
|
||||
Option<uint32_t> Index::validate_index_in_memory(const nlohmann::json &document, uint32_t seq_id,
|
||||
const std::string & default_sorting_field,
|
||||
const std::unordered_map<std::string, field> & search_schema,
|
||||
const std::map<std::string, field> & facet_schema) {
|
||||
if(document.count(default_sorting_field) == 0) {
|
||||
const std::map<std::string, field> & facet_schema,
|
||||
bool is_update) {
|
||||
|
||||
bool has_default_sort_field = (document.count(default_sorting_field) != 0);
|
||||
|
||||
if(!has_default_sort_field && !is_update) {
|
||||
return Option<>(400, "Field `" + default_sorting_field + "` has been declared as a default sorting field, "
|
||||
"but is not found in the document.");
|
||||
}
|
||||
|
||||
if(!document[default_sorting_field].is_number_integer() && !document[default_sorting_field].is_number_float()) {
|
||||
if(has_default_sort_field &&
|
||||
!document[default_sorting_field].is_number_integer() && !document[default_sorting_field].is_number_float()) {
|
||||
return Option<>(400, "Default sorting field `" + default_sorting_field + "` must be a single valued numerical field.");
|
||||
}
|
||||
|
||||
if(search_schema.at(default_sorting_field).is_single_float() &&
|
||||
if(has_default_sort_field && search_schema.at(default_sorting_field).is_single_float() &&
|
||||
document[default_sorting_field].get<float>() > std::numeric_limits<float>::max()) {
|
||||
return Option<>(400, "Default sorting field `" + default_sorting_field + "` exceeds maximum value of a float.");
|
||||
}
|
||||
@ -230,7 +242,7 @@ Option<uint32_t> Index::validate_index_in_memory(const nlohmann::json &document,
|
||||
for(const std::pair<std::string, field> & field_pair: search_schema) {
|
||||
const std::string & field_name = field_pair.first;
|
||||
|
||||
if(field_pair.second.optional && document.count(field_name) == 0) {
|
||||
if((field_pair.second.optional || is_update) && document.count(field_name) == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -309,6 +321,48 @@ Option<uint32_t> Index::validate_index_in_memory(const nlohmann::json &document,
|
||||
return Option<>(200);
|
||||
}
|
||||
|
||||
void Index::scrub_reindex_doc(nlohmann::json& update_doc, nlohmann::json& del_doc, nlohmann::json& old_doc) {
|
||||
auto it = del_doc.cbegin();
|
||||
while(it != del_doc.cend()) {
|
||||
const std::string& field_name = it.key();
|
||||
const auto& search_field_it = search_schema.find(field_name);
|
||||
if(search_field_it == search_schema.end()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& search_field = search_field_it->second;
|
||||
|
||||
// Go through all the field names and find the keys+values so that they can be removed from in-memory index
|
||||
std::vector<std::string> reindex_tokens;
|
||||
std::vector<std::string> old_tokens;
|
||||
tokenize_doc_field(update_doc, field_name, search_field, reindex_tokens);
|
||||
tokenize_doc_field(old_doc, field_name, search_field, old_tokens);
|
||||
|
||||
if(old_tokens.size() != reindex_tokens.size()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
|
||||
bool exact_match = true;
|
||||
|
||||
for(size_t i=0; i<reindex_tokens.size(); i++) {
|
||||
const std::string& reindex_val = reindex_tokens[i];
|
||||
const std::string& old_val = old_tokens[i];
|
||||
if(reindex_val != old_val) {
|
||||
exact_match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(exact_match) {
|
||||
it = del_doc.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_batch,
|
||||
const std::string & default_sorting_field,
|
||||
const std::unordered_map<std::string, field> & search_schema,
|
||||
@ -322,29 +376,42 @@ size_t Index::batch_memory_index(Index *index, std::vector<index_record> & iter_
|
||||
continue;
|
||||
}
|
||||
|
||||
Option<uint32_t> validation_op = validate_index_in_memory(index_rec.document, index_rec.seq_id,
|
||||
default_sorting_field,
|
||||
search_schema, facet_schema);
|
||||
if(index_rec.operation != DELETE) {
|
||||
Option<uint32_t> validation_op = validate_index_in_memory(index_rec.doc, index_rec.seq_id,
|
||||
default_sorting_field,
|
||||
search_schema, facet_schema, index_rec.is_update);
|
||||
|
||||
if(!validation_op.ok()) {
|
||||
index_rec.index_failure(validation_op.code(), validation_op.error());
|
||||
continue;
|
||||
if(!validation_op.ok()) {
|
||||
index_rec.index_failure(validation_op.code(), validation_op.error());
|
||||
continue;
|
||||
}
|
||||
|
||||
if(index_rec.is_update) {
|
||||
// scrub string fields to reduce delete ops
|
||||
index->scrub_reindex_doc(index_rec.doc, index_rec.del_doc, index_rec.old_doc);
|
||||
index->remove(index_rec.seq_id, index_rec.del_doc);
|
||||
}
|
||||
|
||||
Option<uint32_t> index_mem_op = index->index_in_memory(index_rec.doc, index_rec.seq_id,
|
||||
default_sorting_field, index_rec.is_update);
|
||||
if(!index_mem_op.ok()) {
|
||||
index->index_in_memory(index_rec.del_doc, index_rec.seq_id, default_sorting_field, true);
|
||||
index_rec.index_failure(index_mem_op.code(), index_mem_op.error());
|
||||
continue;
|
||||
}
|
||||
|
||||
index_rec.index_success();
|
||||
|
||||
if(!index_rec.is_update) {
|
||||
num_indexed++;
|
||||
}
|
||||
}
|
||||
|
||||
Option<uint32_t> index_mem_op = index->index_in_memory(index_rec.document, index_rec.seq_id, default_sorting_field);
|
||||
if(!index_mem_op.ok()) {
|
||||
index_rec.index_failure(index_mem_op.code(), index_mem_op.error());
|
||||
continue;
|
||||
}
|
||||
|
||||
index_rec.index_success(index_rec);
|
||||
num_indexed++;
|
||||
}
|
||||
|
||||
return num_indexed;
|
||||
}
|
||||
|
||||
void Index::insert_doc(const uint32_t score, art_tree *t, uint32_t seq_id,
|
||||
void Index::insert_doc(const int64_t score, art_tree *t, uint32_t seq_id,
|
||||
const std::unordered_map<std::string, std::vector<uint32_t>> &token_to_offsets) const {
|
||||
for(auto & kv: token_to_offsets) {
|
||||
art_document art_doc;
|
||||
@ -369,13 +436,14 @@ void Index::insert_doc(const uint32_t score, art_tree *t, uint32_t seq_id,
|
||||
art_doc.offsets[i] = kv.second[i];
|
||||
}
|
||||
|
||||
//LOG(INFO) << "key: " << key << ", art_doc.id: " << art_doc.id;
|
||||
art_insert(t, key, key_len, &art_doc, num_hits);
|
||||
delete [] art_doc.offsets;
|
||||
art_doc.offsets = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void Index::index_int32_field(const int32_t value, uint32_t score, art_tree *t, uint32_t seq_id) const {
|
||||
void Index::index_int32_field(const int32_t value, int64_t score, art_tree *t, uint32_t seq_id) const {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
|
||||
@ -398,7 +466,7 @@ void Index::index_int32_field(const int32_t value, uint32_t score, art_tree *t,
|
||||
art_insert(t, key, KEY_LEN, &art_doc, num_hits);
|
||||
}
|
||||
|
||||
void Index::index_int64_field(const int64_t value, uint32_t score, art_tree *t, uint32_t seq_id) const {
|
||||
void Index::index_int64_field(const int64_t value, int64_t score, art_tree *t, uint32_t seq_id) const {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
|
||||
@ -421,7 +489,7 @@ void Index::index_int64_field(const int64_t value, uint32_t score, art_tree *t,
|
||||
art_insert(t, key, KEY_LEN, &art_doc, num_hits);
|
||||
}
|
||||
|
||||
void Index::index_bool_field(const bool value, const uint32_t score, art_tree *t, uint32_t seq_id) const {
|
||||
void Index::index_bool_field(const bool value, const int64_t score, art_tree *t, uint32_t seq_id) const {
|
||||
const int KEY_LEN = 1;
|
||||
unsigned char key[KEY_LEN];
|
||||
key[0] = value ? '1' : '0';
|
||||
@ -443,7 +511,7 @@ void Index::index_bool_field(const bool value, const uint32_t score, art_tree *t
|
||||
art_insert(t, key, KEY_LEN, &art_doc, num_hits);
|
||||
}
|
||||
|
||||
void Index::index_float_field(const float value, uint32_t score, art_tree *t, uint32_t seq_id) const {
|
||||
void Index::index_float_field(const float value, int64_t score, art_tree *t, uint32_t seq_id) const {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
|
||||
@ -484,7 +552,7 @@ uint64_t Index::facet_token_hash(const field & a_field, const std::string &token
|
||||
return hash;
|
||||
}
|
||||
|
||||
void Index::index_string_field(const std::string & text, const uint32_t score, art_tree *t,
|
||||
void Index::index_string_field(const std::string & text, const int64_t score, art_tree *t,
|
||||
uint32_t seq_id, int facet_id, const field & a_field) {
|
||||
std::vector<std::string> tokens;
|
||||
StringUtils::split(text, tokens, " ");
|
||||
@ -506,6 +574,10 @@ void Index::index_string_field(const std::string & text, const uint32_t score, a
|
||||
token_to_offsets[token].push_back(i);
|
||||
}
|
||||
|
||||
/*if(seq_id == 0) {
|
||||
LOG(INFO) << "field name: " << a_field.name;
|
||||
}*/
|
||||
|
||||
insert_doc(score, t, seq_id, token_to_offsets);
|
||||
|
||||
if(facet_id >= 0) {
|
||||
@ -513,7 +585,7 @@ void Index::index_string_field(const std::string & text, const uint32_t score, a
|
||||
}
|
||||
}
|
||||
|
||||
void Index::index_string_array_field(const std::vector<std::string> & strings, const uint32_t score, art_tree *t,
|
||||
void Index::index_string_array_field(const std::vector<std::string> & strings, const int64_t score, art_tree *t,
|
||||
uint32_t seq_id, int facet_id, const field & a_field) {
|
||||
std::unordered_map<std::string, std::vector<uint32_t>> token_positions;
|
||||
|
||||
@ -565,28 +637,28 @@ void Index::index_string_array_field(const std::vector<std::string> & strings, c
|
||||
insert_doc(score, t, seq_id, token_positions);
|
||||
}
|
||||
|
||||
void Index::index_int32_array_field(const std::vector<int32_t> & values, const uint32_t score, art_tree *t,
|
||||
void Index::index_int32_array_field(const std::vector<int32_t> & values, const int64_t score, art_tree *t,
|
||||
uint32_t seq_id) const {
|
||||
for(const int32_t value: values) {
|
||||
index_int32_field(value, score, t, seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Index::index_int64_array_field(const std::vector<int64_t> & values, const uint32_t score, art_tree *t,
|
||||
void Index::index_int64_array_field(const std::vector<int64_t> & values, const int64_t score, art_tree *t,
|
||||
uint32_t seq_id) const {
|
||||
for(const int64_t value: values) {
|
||||
index_int64_field(value, score, t, seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Index::index_bool_array_field(const std::vector<bool> & values, const uint32_t score, art_tree *t,
|
||||
void Index::index_bool_array_field(const std::vector<bool> & values, const int64_t score, art_tree *t,
|
||||
uint32_t seq_id) const {
|
||||
for(const bool value: values) {
|
||||
index_bool_field(value, score, t, seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Index::index_float_array_field(const std::vector<float> & values, const uint32_t score, art_tree *t,
|
||||
void Index::index_float_array_field(const std::vector<float> & values, const int64_t score, art_tree *t,
|
||||
uint32_t seq_id) const {
|
||||
for(const float value: values) {
|
||||
index_float_field(value, score, t, seq_id);
|
||||
@ -996,7 +1068,7 @@ Option<uint32_t> Index::do_filtering(uint32_t** filter_ids_out, const std::vecto
|
||||
bool found_filter = false;
|
||||
|
||||
if(!f.is_array()) {
|
||||
found_filter = (str_tokens.size() == fvalues.size());
|
||||
found_filter = (query_suggestion.size() == fvalues.size());
|
||||
} else {
|
||||
uint64_t filter_hash = 1;
|
||||
|
||||
@ -1712,6 +1784,11 @@ void Index::populate_token_positions(const std::vector<art_leaf *>& query_sugges
|
||||
// a) last element is array_index b) second and third last elements will be largest offset
|
||||
// (last element is repeated to indicate end of offsets for a given array index)
|
||||
|
||||
/*uint32_t* offsets = token_leaf->values->offsets.uncompress();
|
||||
for(size_t ii=0; ii < token_leaf->values->offsets.getLength(); ii++) {
|
||||
LOG(INFO) << "offset: " << offsets[ii];
|
||||
}*/
|
||||
|
||||
uint32_t start_offset = token_leaf->values->offset_index.at(doc_index);
|
||||
uint32_t end_offset = (doc_index == token_leaf->values->ids.getLength() - 1) ?
|
||||
token_leaf->values->offsets.getLength() :
|
||||
@ -1767,8 +1844,8 @@ inline std::vector<art_leaf *> Index::next_suggestion(const std::vector<token_ca
|
||||
return query_suggestion;
|
||||
}
|
||||
|
||||
void Index::remove_and_shift_offset_index(sorted_array &offset_index, const uint32_t *indices_sorted,
|
||||
const uint32_t indices_length) {
|
||||
void Index::remove_and_shift_offset_index(sorted_array& offset_index, const uint32_t* indices_sorted,
|
||||
const uint32_t indices_length) {
|
||||
uint32_t *curr_array = offset_index.uncompress();
|
||||
uint32_t *new_array = new uint32_t[offset_index.getLength()];
|
||||
|
||||
@ -1801,83 +1878,27 @@ void Index::remove_and_shift_offset_index(sorted_array &offset_index, const uint
|
||||
}
|
||||
|
||||
Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & document) {
|
||||
for(auto & name_field: search_schema) {
|
||||
if(name_field.second.optional && document.count(name_field.first) == 0) {
|
||||
std::unordered_map<std::string, size_t> facet_to_index;
|
||||
get_facet_to_index(facet_to_index);
|
||||
|
||||
for(auto it = document.begin(); it != document.end(); ++it) {
|
||||
const std::string& field_name = it.key();
|
||||
const auto& search_field_it = search_schema.find(field_name);
|
||||
if(search_field_it == search_schema.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& search_field = search_field_it->second;
|
||||
|
||||
// Go through all the field names and find the keys+values so that they can be removed from in-memory index
|
||||
std::vector<std::string> tokens;
|
||||
if(name_field.second.type == field_types::STRING) {
|
||||
StringUtils::split(document[name_field.first], tokens, " ");
|
||||
} else if(name_field.second.type == field_types::STRING_ARRAY) {
|
||||
std::vector<std::string> values = document[name_field.first].get<std::vector<std::string>>();
|
||||
for(const std::string & value: values) {
|
||||
StringUtils::split(value, tokens, " ");
|
||||
}
|
||||
} else if(name_field.second.type == field_types::INT32) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
int32_t value = document[name_field.first].get<int32_t>();
|
||||
encode_int32(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
} else if(name_field.second.type == field_types::INT32_ARRAY) {
|
||||
std::vector<int32_t> values = document[name_field.first].get<std::vector<int32_t>>();
|
||||
for(const int32_t value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_int32(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
}
|
||||
} else if(name_field.second.type == field_types::INT64) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
int64_t value = document[name_field.first].get<int64_t>();
|
||||
encode_int64(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
} else if(name_field.second.type == field_types::INT64_ARRAY) {
|
||||
std::vector<int64_t> values = document[name_field.first].get<std::vector<int64_t>>();
|
||||
for(const int64_t value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_int64(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
}
|
||||
} else if(name_field.second.type == field_types::FLOAT) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
int64_t value = document[name_field.first].get<int64_t>();
|
||||
encode_float(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
} else if(name_field.second.type == field_types::FLOAT_ARRAY) {
|
||||
std::vector<float> values = document[name_field.first].get<std::vector<float>>();
|
||||
for(const float value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_float(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
}
|
||||
} else if(name_field.second.type == field_types::BOOL) {
|
||||
const int KEY_LEN = 1;
|
||||
unsigned char key[KEY_LEN];
|
||||
bool value = document[name_field.first].get<bool>();
|
||||
key[0] = value ? '1' : '0';
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
} else if(name_field.second.type == field_types::BOOL_ARRAY) {
|
||||
std::vector<bool> values = document[name_field.first].get<std::vector<bool>>();
|
||||
for(const bool value: values) {
|
||||
const int KEY_LEN = 1;
|
||||
unsigned char key[KEY_LEN];
|
||||
key[0] = value ? '1' : '0';
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
}
|
||||
}
|
||||
tokenize_doc_field(document, field_name, search_field, tokens);
|
||||
|
||||
for(auto & token: tokens) {
|
||||
const unsigned char *key;
|
||||
int key_len;
|
||||
|
||||
if(name_field.second.type == field_types::STRING_ARRAY || name_field.second.type == field_types::STRING) {
|
||||
if(search_field.type == field_types::STRING_ARRAY || search_field.type == field_types::STRING) {
|
||||
string_utils.unicode_normalize(token);
|
||||
key = (const unsigned char *) token.c_str();
|
||||
key_len = (int) (token.length() + 1);
|
||||
@ -1886,9 +1907,8 @@ Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & doc
|
||||
key_len = (int) (token.length());
|
||||
}
|
||||
|
||||
art_leaf* leaf = (art_leaf *) art_search(search_index.at(name_field.first), key, key_len);
|
||||
if(leaf != NULL) {
|
||||
uint32_t seq_id_values[1] = {seq_id};
|
||||
art_leaf* leaf = (art_leaf *) art_search(search_index.at(field_name), key, key_len);
|
||||
if(leaf != nullptr) {
|
||||
uint32_t doc_index = leaf->values->ids.indexOf(seq_id);
|
||||
|
||||
if(doc_index == leaf->values->ids.getLength()) {
|
||||
@ -1905,7 +1925,7 @@ Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & doc
|
||||
remove_and_shift_offset_index(leaf->values->offset_index, doc_indices, 1);
|
||||
|
||||
leaf->values->offsets.remove_index(start_offset, end_offset);
|
||||
leaf->values->ids.remove_values(seq_id_values, 1);
|
||||
leaf->values->ids.remove_value(seq_id);
|
||||
|
||||
/*len = leaf->values->offset_index.getLength();
|
||||
for(auto i=0; i<len; i++) {
|
||||
@ -1914,25 +1934,96 @@ Option<uint32_t> Index::remove(const uint32_t seq_id, const nlohmann::json & doc
|
||||
LOG(INFO) << "----";*/
|
||||
|
||||
if(leaf->values->ids.getLength() == 0) {
|
||||
art_values* values = (art_values*) art_delete(search_index.at(name_field.first), key, key_len);
|
||||
art_values* values = (art_values*) art_delete(search_index.at(field_name), key, key_len);
|
||||
delete values;
|
||||
values = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// remove facets if any
|
||||
facet_index_v2.erase(seq_id);
|
||||
// remove facets
|
||||
if(facet_to_index.count(field_name) != 0 && facet_index_v2.count(seq_id) != 0) {
|
||||
size_t facet_index = facet_to_index[field_name];
|
||||
std::vector<std::vector<uint64_t>>& facet_values = facet_index_v2[seq_id];
|
||||
facet_values[facet_index].clear();
|
||||
}
|
||||
|
||||
// remove sort index if any
|
||||
for(auto & field_doc_value_map: sort_index) {
|
||||
field_doc_value_map.second->erase(seq_id);
|
||||
// remove sort field
|
||||
if(sort_index.count(field_name) != 0) {
|
||||
sort_index[field_name]->erase(seq_id);
|
||||
}
|
||||
}
|
||||
|
||||
return Option<uint32_t>(seq_id);
|
||||
}
|
||||
|
||||
void Index::tokenize_doc_field(const nlohmann::json& document, const std::string& field_name, const field& search_field,
|
||||
std::vector<std::string>& tokens) {
|
||||
if(search_field.type == field_types::STRING) {
|
||||
StringUtils::split(document[field_name], tokens, " ");
|
||||
} else if(search_field.type == field_types::STRING_ARRAY) {
|
||||
const std::vector<std::string>& values = document[field_name].get<std::vector<std::string>>();
|
||||
for(const std::string & value: values) {
|
||||
StringUtils::split(value, tokens, " ");
|
||||
}
|
||||
} else if(search_field.type == field_types::INT32) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
const int32_t& value = document[field_name].get<int32_t>();
|
||||
encode_int32(value, key);
|
||||
tokens.emplace_back((char*)key, KEY_LEN);
|
||||
} else if(search_field.type == field_types::INT32_ARRAY) {
|
||||
const std::vector<int32_t>& values = document[field_name].get<std::vector<int32_t>>();
|
||||
for(const int32_t value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_int32(value, key);
|
||||
tokens.emplace_back((char*)key, KEY_LEN);
|
||||
}
|
||||
} else if(search_field.type == field_types::INT64) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
const int64_t& value = document[field_name].get<int64_t>();
|
||||
encode_int64(value, key);
|
||||
tokens.emplace_back((char*)key, KEY_LEN);
|
||||
} else if(search_field.type == field_types::INT64_ARRAY) {
|
||||
const std::vector<int64_t>& values = document[field_name].get<std::vector<int64_t>>();
|
||||
for(const int64_t value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_int64(value, key);
|
||||
tokens.emplace_back((char*)key, KEY_LEN);
|
||||
}
|
||||
} else if(search_field.type == field_types::FLOAT) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
const int64_t& value = document[field_name].get<int64_t>();
|
||||
encode_float(value, key);
|
||||
tokens.emplace_back((char*)key, KEY_LEN);
|
||||
} else if(search_field.type == field_types::FLOAT_ARRAY) {
|
||||
const std::vector<float>& values = document[field_name].get<std::vector<float>>();
|
||||
for(const float value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_float(value, key);
|
||||
tokens.emplace_back((char*)key, KEY_LEN);
|
||||
}
|
||||
} else if(search_field.type == field_types::BOOL) {
|
||||
const int KEY_LEN = 1;
|
||||
unsigned char key[KEY_LEN];
|
||||
const bool& value = document[field_name].get<bool>();
|
||||
key[0] = value ? '1' : '0';
|
||||
tokens.emplace_back((char*)key, KEY_LEN);
|
||||
} else if(search_field.type == field_types::BOOL_ARRAY) {
|
||||
const std::vector<bool>& values = document[field_name].get<std::vector<bool>>();
|
||||
for(const bool value: values) {
|
||||
const int KEY_LEN = 1;
|
||||
unsigned char key[KEY_LEN];
|
||||
key[0] = value ? '1' : '0';
|
||||
tokens.emplace_back((char*)key, KEY_LEN);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
art_leaf* Index::get_token_leaf(const std::string & field_name, const unsigned char* token, uint32_t token_len) {
|
||||
const art_tree *t = search_index.at(field_name);
|
||||
return (art_leaf*) art_search(t, token, (int) token_len);
|
||||
|
@ -21,6 +21,7 @@ void master_server_routes() {
|
||||
|
||||
// document management - `/documents/:id` end-points must be placed last in the list
|
||||
server->post("/collections/:collection/documents", post_add_document);
|
||||
server->patch("/collections/:collection/documents/:id", patch_update_document);
|
||||
server->get("/collections/:collection/documents/search", get_search);
|
||||
|
||||
server->post("/collections/:collection/documents/import", post_import_documents, true, true);
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "sorted_array.h"
|
||||
#include "array_utils.h"
|
||||
#include "logger.h"
|
||||
|
||||
void sorted_array::load(const uint32_t *sorted_array, const uint32_t array_length) {
|
||||
min = array_length != 0 ? sorted_array[0] : 0;
|
||||
@ -18,28 +19,67 @@ void sorted_array::load(const uint32_t *sorted_array, const uint32_t array_lengt
|
||||
length_bytes = actual_size;
|
||||
}
|
||||
|
||||
bool sorted_array::append(uint32_t value) {
|
||||
uint32_t size_required = sorted_append_size_required(value, length+1);
|
||||
size_t sorted_array::append(uint32_t value) {
|
||||
if(value < max) {
|
||||
// we will have to re-encode the whole sequence again
|
||||
uint32_t* arr = uncompress(length+1);
|
||||
|
||||
if(size_required+FOR_ELE_SIZE > size_bytes) {
|
||||
// grow the array first
|
||||
size_t new_size = (size_t) (size_required * FOR_GROWTH_FACTOR);
|
||||
uint8_t *new_location = (uint8_t *) realloc(in, new_size);
|
||||
if(new_location == NULL) {
|
||||
abort();
|
||||
// find the index of the element which is >= to `value`
|
||||
uint32_t found_val;
|
||||
uint32_t gte_index = for_lower_bound_search(in, length, value, &found_val);
|
||||
|
||||
for(size_t j=length; j>gte_index; j--) {
|
||||
arr[j] = arr[j-1];
|
||||
}
|
||||
in = new_location;
|
||||
size_bytes = (uint32_t) new_size;
|
||||
|
||||
arr[gte_index] = value;
|
||||
|
||||
load(arr, length+1);
|
||||
delete [] arr;
|
||||
|
||||
return gte_index;
|
||||
} else {
|
||||
uint32_t size_required = sorted_append_size_required(value, length+1);
|
||||
size_t min_expected_size = size_required + FOR_ELE_SIZE;
|
||||
|
||||
if(size_bytes < min_expected_size) {
|
||||
// grow the array first
|
||||
size_t new_size = min_expected_size * FOR_GROWTH_FACTOR;
|
||||
uint8_t *new_location = (uint8_t *) realloc(in, new_size);
|
||||
if(new_location == NULL) {
|
||||
abort();
|
||||
}
|
||||
in = new_location;
|
||||
size_bytes = (uint32_t) new_size;
|
||||
|
||||
//LOG(INFO) << "new_size: " << new_size;
|
||||
}
|
||||
|
||||
uint32_t new_length_bytes = for_append_sorted(in, length, value);
|
||||
if(new_length_bytes == 0) return false;
|
||||
|
||||
length_bytes = new_length_bytes;
|
||||
length++;
|
||||
|
||||
if(value < min) min = value;
|
||||
if(value > max) max = value;
|
||||
|
||||
return length-1;
|
||||
}
|
||||
}
|
||||
|
||||
bool sorted_array::insert(size_t index, uint32_t value) {
|
||||
if(index >= length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t new_length_bytes = for_append_sorted(in, length, value);
|
||||
if(new_length_bytes == 0) return false;
|
||||
uint32_t *curr_array = uncompress(length+1);
|
||||
memmove(&curr_array[index+1], &curr_array[index], sizeof(uint32_t)*(length-index));
|
||||
curr_array[index] = value;
|
||||
|
||||
length_bytes = new_length_bytes;
|
||||
length++;
|
||||
load(curr_array, length+1);
|
||||
|
||||
if(value < min) min = value;
|
||||
if(value > max) max = value;
|
||||
delete [] curr_array;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -61,7 +101,11 @@ uint32_t sorted_array::indexOf(uint32_t value) {
|
||||
|
||||
uint32_t actual;
|
||||
uint32_t index = for_lower_bound_search(in, length, value, &actual);
|
||||
if(actual == value) return index;
|
||||
|
||||
if(actual == value) {
|
||||
return index;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
@ -150,20 +194,40 @@ void sorted_array::indexOf(const uint32_t *values, const size_t values_len, uint
|
||||
binary_search_indices(values, head, tail, low_index, high_index, base, bits, indices);
|
||||
}
|
||||
|
||||
void sorted_array::remove_values(uint32_t *sorted_values, uint32_t values_length) {
|
||||
void sorted_array::remove_value(uint32_t value) {
|
||||
// A lower bound search returns the first element in the sequence that is >= `value`
|
||||
// So, `found_val` will be either equal or greater than `value`
|
||||
uint32_t found_val;
|
||||
uint32_t found_index = for_lower_bound_search(in, length, value, &found_val);
|
||||
|
||||
if(found_val != value) {
|
||||
return ;
|
||||
}
|
||||
|
||||
uint32_t *curr_array = uncompress();
|
||||
|
||||
if(found_index + 1 < length) {
|
||||
memmove(&curr_array[found_index], &curr_array[found_index+1], sizeof(uint32_t) * (length - found_index - 1));
|
||||
}
|
||||
|
||||
size_t new_length = (length == 0) ? 0 : (length - 1);
|
||||
load(curr_array, new_length);
|
||||
|
||||
delete [] curr_array;
|
||||
}
|
||||
|
||||
void sorted_array::remove_values(uint32_t *sorted_values, uint32_t sorted_values_length) {
|
||||
uint32_t *curr_array = uncompress();
|
||||
|
||||
uint32_t *new_array = new uint32_t[length];
|
||||
uint32_t new_index = 0;
|
||||
uint32_t curr_index = 0;
|
||||
|
||||
uint32_t sorted_values_index = 0;
|
||||
uint32_t curr_index = 0;
|
||||
|
||||
while(curr_index < length) {
|
||||
if(sorted_values_index < values_length && curr_array[curr_index] >= sorted_values[sorted_values_index]) {
|
||||
// skip copying
|
||||
if(curr_array[curr_index] == sorted_values[sorted_values_index]) {
|
||||
curr_index++;
|
||||
}
|
||||
if(sorted_values_index < sorted_values_length && sorted_values[sorted_values_index] == curr_array[curr_index]) {
|
||||
curr_index++;
|
||||
sorted_values_index++;
|
||||
} else {
|
||||
new_array[new_index++] = curr_array[curr_index++];
|
||||
|
@ -45,6 +45,31 @@ TEST(ArrayTest, Append) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ArrayTest, InsertValues) {
|
||||
std::vector<uint32_t> eles = {10, 1, 4, 5, 7};
|
||||
array arr;
|
||||
|
||||
for(size_t i=0; i < eles.size(); i++) {
|
||||
arr.append(eles[i]);
|
||||
}
|
||||
|
||||
uint32_t insert_arr[2] = {2, 3};
|
||||
arr.insert(2, insert_arr, 2);
|
||||
eles = {10, 1, 2, 3, 4, 5, 7};
|
||||
|
||||
for(size_t i=0; i < eles.size(); i++) {
|
||||
ASSERT_EQ(eles[i], arr.at(i));
|
||||
}
|
||||
|
||||
uint32_t insert_arr2[2] = {20, 25};
|
||||
arr.insert(6, insert_arr2, 2);
|
||||
|
||||
eles = {10, 1, 2, 3, 4, 5, 20, 25, 7};
|
||||
for(size_t i=0; i < eles.size(); i++) {
|
||||
ASSERT_EQ(eles[i], arr.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ArrayTest, Uncompress) {
|
||||
const size_t SIZE = 10*1000;
|
||||
|
||||
|
@ -71,6 +71,11 @@ TEST_F(CollectionFacetingTest, FacetFieldStringFiltering) {
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
ASSERT_EQ(0, results["found"].get<size_t>());
|
||||
|
||||
// multiple tokens but with a typo on one of them
|
||||
results = coll_str->search("*", query_fields, "starring:= ssamuel l. Jackson", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
ASSERT_EQ(0, results["found"].get<size_t>());
|
||||
|
||||
// same should succeed when verbatim filter is made
|
||||
results = coll_str->search("*", query_fields, "starring:= samuel l. Jackson", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
@ -85,6 +90,11 @@ TEST_F(CollectionFacetingTest, FacetFieldStringFiltering) {
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
|
||||
// contains when only 1 token matches
|
||||
results = coll_str->search("*", query_fields, "starring: samuel johnson", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ASSERT_EQ(2, results["found"].get<size_t>());
|
||||
|
||||
collectionManager.drop_collection("coll_str");
|
||||
}
|
||||
|
||||
@ -131,6 +141,9 @@ TEST_F(CollectionFacetingTest, FacetFieldStringArrayFiltering) {
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags:= FINE", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags:= FFINE PLATINUM", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// partial token filter should be made without "=" operator
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags: PLATINUM", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
@ -64,7 +64,7 @@ TEST_F(CollectionGroupingTest, GroupingBasics) {
|
||||
auto res = coll_group->search("*", {}, "", {"brand"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10,
|
||||
{}, {}, {"size"}, 2).get();
|
||||
|
||||
@ -107,7 +107,7 @@ TEST_F(CollectionGroupingTest, GroupingBasics) {
|
||||
res = coll_group->search("*", {}, "", {"brand"}, sort_size, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "brand: omeg", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "brand: omeg", 30, 5,
|
||||
"", 10,
|
||||
{}, {}, {"rating"}, 2).get();
|
||||
|
||||
@ -147,7 +147,7 @@ TEST_F(CollectionGroupingTest, GroupingCompoundKey) {
|
||||
auto res = coll_group->search("*", {}, "", {"brand"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10,
|
||||
{}, {}, {"size", "brand"}, 2).get();
|
||||
|
||||
@ -194,7 +194,7 @@ TEST_F(CollectionGroupingTest, GroupingCompoundKey) {
|
||||
res = coll_group->search("*", {}, "", {"brand"}, {}, 0, 2, 2, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10,
|
||||
{}, {}, {"size", "brand"}, 2).get();
|
||||
|
||||
@ -230,7 +230,7 @@ TEST_F(CollectionGroupingTest, GroupingCompoundKey) {
|
||||
auto res_op = coll_group->search("*", {}, "", {"brand"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "brand: omeg", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "brand: omeg", 30, 5,
|
||||
"", 10,
|
||||
{}, {}, {"rating"}, 100);
|
||||
|
||||
@ -240,7 +240,7 @@ TEST_F(CollectionGroupingTest, GroupingCompoundKey) {
|
||||
res_op = coll_group->search("*", {}, "", {"brand"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "brand: omeg", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "brand: omeg", 30, 5,
|
||||
"", 10,
|
||||
{}, {}, {"rating"}, 0);
|
||||
|
||||
@ -252,7 +252,7 @@ TEST_F(CollectionGroupingTest, GroupingWithGropLimitOfOne) {
|
||||
auto res = coll_group->search("*", {}, "", {"brand"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10,
|
||||
{}, {}, {"brand"}, 1).get();
|
||||
|
||||
@ -322,7 +322,7 @@ TEST_F(CollectionGroupingTest, GroupingWithArrayFieldAndOverride) {
|
||||
auto res = coll_group->search("shirt", {"title"}, "", {"brand"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 30, 5,
|
||||
"", 10,
|
||||
{}, {}, {"colors"}, 2).get();
|
||||
|
||||
|
@ -213,7 +213,13 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
// create a new collection manager to ensure that it restores the records from the disk backed store
|
||||
CollectionManager & collectionManager2 = CollectionManager::get_instance();
|
||||
collectionManager2.init(store, 1.0, "auth_key");
|
||||
collectionManager2.load();
|
||||
auto load_op = collectionManager2.load();
|
||||
|
||||
if(!load_op.ok()) {
|
||||
LOG(ERROR) << load_op.error();
|
||||
}
|
||||
|
||||
ASSERT_TRUE(load_op.ok());
|
||||
|
||||
collection1 = collectionManager2.get_collection("collection1");
|
||||
ASSERT_NE(nullptr, collection1);
|
||||
|
@ -271,7 +271,7 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) {
|
||||
auto results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, {}).get();
|
||||
|
||||
@ -289,7 +289,7 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) {
|
||||
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, hidden_hits).get();
|
||||
|
||||
@ -305,7 +305,7 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) {
|
||||
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 2, 2, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, hidden_hits).get();
|
||||
|
||||
@ -341,7 +341,7 @@ TEST_F(CollectionOverrideTest, IncludeExcludeHitsQuery) {
|
||||
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
{}, {hidden_hits}).get();
|
||||
|
||||
@ -362,7 +362,7 @@ TEST_F(CollectionOverrideTest, PinnedHitsGrouping) {
|
||||
auto results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, {}).get();
|
||||
|
||||
@ -383,7 +383,7 @@ TEST_F(CollectionOverrideTest, PinnedHitsGrouping) {
|
||||
results = coll_mul_fields->search("the", {"title"}, "", {"starring"}, {}, 0, 50, 1, FREQUENCY,
|
||||
false, Index::DROP_TOKENS_THRESHOLD,
|
||||
spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30,
|
||||
spp::sparse_hash_set<std::string>(), 10, "starring: will", 30, 5,
|
||||
"", 10,
|
||||
pinned_hits, {}, {"cast"}, 2).get();
|
||||
|
||||
|
@ -14,6 +14,9 @@ protected:
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
std::vector<sort_by> sort_fields;
|
||||
|
||||
// used for generating random text
|
||||
std::vector<std::string> words;
|
||||
|
||||
void setupCollection() {
|
||||
std::string state_dir_path = "/tmp/typesense_test/collection";
|
||||
LOG(INFO) << "Truncating and creating: " << state_dir_path;
|
||||
@ -48,6 +51,12 @@ protected:
|
||||
}
|
||||
|
||||
infile.close();
|
||||
|
||||
std::ifstream words_file(std::string(ROOT_DIR)+"test/resources/common100_english.txt");
|
||||
std::stringstream strstream;
|
||||
strstream << words_file.rdbuf();
|
||||
words_file.close();
|
||||
StringUtils::split(strstream.str(), words, "\n");
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
@ -59,6 +68,18 @@ protected:
|
||||
collectionManager.dispose();
|
||||
delete store;
|
||||
}
|
||||
|
||||
std::string get_text(size_t num_words) {
|
||||
time_t t;
|
||||
srand((unsigned) time(&t));
|
||||
std::vector<std::string> strs;
|
||||
|
||||
for(size_t i = 0 ; i < num_words ; i++ ) {
|
||||
int word_index = rand() % 100;
|
||||
strs.push_back(words[word_index]);
|
||||
}
|
||||
return StringUtils::join(strs, " ");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(CollectionTest, VerifyCountOfDocuments) {
|
||||
@ -558,14 +579,14 @@ TEST_F(CollectionTest, TypoTokensThreshold) {
|
||||
// Query expansion should happen only based on the `typo_tokens_threshold` value
|
||||
auto results = collection->search("launch", {"title"}, "", {}, sort_fields, 2, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, "", 0).get();
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "", 0).get();
|
||||
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ASSERT_EQ(5, results["found"].get<size_t>());
|
||||
|
||||
results = collection->search("launch", {"title"}, "", {}, sort_fields, 2, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, "", 10).get();
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "", 10).get();
|
||||
|
||||
ASSERT_EQ(7, results["hits"].size());
|
||||
ASSERT_EQ(7, results["found"].get<size_t>());
|
||||
@ -1296,6 +1317,243 @@ std::vector<nlohmann::json> import_res_to_json(const std::vector<std::string>& i
|
||||
return out;
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, ImportDocumentsUpsert) {
|
||||
Collection *coll_mul_fields;
|
||||
|
||||
std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl");
|
||||
std::stringstream strstream;
|
||||
strstream << infile.rdbuf();
|
||||
infile.close();
|
||||
|
||||
std::vector<std::string> import_records;
|
||||
StringUtils::split(strstream.str(), import_records, "\n");
|
||||
|
||||
std::vector<field> fields = {
|
||||
field("title", field_types::STRING, false),
|
||||
field("starring", field_types::STRING, false),
|
||||
field("cast", field_types::STRING_ARRAY, false),
|
||||
field("points", field_types::INT32, false)
|
||||
};
|
||||
|
||||
coll_mul_fields = collectionManager.get_collection("coll_mul_fields");
|
||||
if(coll_mul_fields == nullptr) {
|
||||
coll_mul_fields = collectionManager.create_collection("coll_mul_fields", 1, fields, "points").get();
|
||||
}
|
||||
|
||||
// try importing records
|
||||
nlohmann::json document;
|
||||
nlohmann::json import_response = coll_mul_fields->add_many(import_records, document);
|
||||
ASSERT_TRUE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(18, import_response["num_imported"].get<int>());
|
||||
|
||||
// update + upsert records
|
||||
std::vector<std::string> more_records = {R"({"id": "0", "title": "The Fifth Harry"})",
|
||||
R"({"id": "2", "cast": ["Chris Fisher", "Rand Alan"]})",
|
||||
R"({"id": "18", "title": "Back Again Forest", "points": 45, "starring": "Ronald Wells", "cast": ["Dant Saren"]})",
|
||||
R"({"id": "6", "points": 77})"};
|
||||
|
||||
import_response = coll_mul_fields->add_many(more_records, document, UPSERT);
|
||||
|
||||
ASSERT_TRUE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(4, import_response["num_imported"].get<int>());
|
||||
|
||||
std::vector<nlohmann::json> import_results = import_res_to_json(more_records);
|
||||
ASSERT_EQ(4, import_results.size());
|
||||
|
||||
for(size_t i=0; i<4; i++) {
|
||||
ASSERT_TRUE(import_results[i]["success"].get<bool>());
|
||||
ASSERT_EQ(1, import_results[i].size());
|
||||
}
|
||||
|
||||
auto results = coll_mul_fields->search("*", query_fields, "", {}, sort_fields, 0, 30, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(19, results["hits"].size());
|
||||
|
||||
ASSERT_EQ(19, coll_mul_fields->get_num_documents());
|
||||
|
||||
results = coll_mul_fields->search("back again forest", query_fields, "", {}, sort_fields, 0, 30, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("Back Again Forest", coll_mul_fields->get("18").get()["title"].get<std::string>().c_str());
|
||||
|
||||
results = coll_mul_fields->search("fifth", query_fields, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ASSERT_STREQ("The <mark>Fifth</mark> Harry", results["hits"][0]["highlights"][0]["snippet"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("The Woman in the <mark>Fifth</mark> from Kristin", results["hits"][1]["highlights"][0]["snippet"].get<std::string>().c_str());
|
||||
|
||||
results = coll_mul_fields->search("burgundy", query_fields, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
results = coll_mul_fields->search("harry", query_fields, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
results = coll_mul_fields->search("captain america", query_fields, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
ASSERT_EQ(77, results["hits"][0]["document"]["points"].get<size_t>());
|
||||
|
||||
// upserting with some bad docs
|
||||
more_records = {R"({"id": "1", "title": "Wake up, Harry"})",
|
||||
R"({"id": "90", "cast": ["Kim Werrel", "Random Wake"]})", // missing fields
|
||||
R"({"id": "5", "points": 60})",
|
||||
R"({"id": "24", "starring": "John", "cast": ["John Kim"], "points": 11})"}; // missing fields
|
||||
|
||||
import_response = coll_mul_fields->add_many(more_records, document, UPSERT);
|
||||
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(2, import_response["num_imported"].get<int>());
|
||||
|
||||
import_results = import_res_to_json(more_records);
|
||||
ASSERT_FALSE(import_results[1]["success"].get<bool>());
|
||||
ASSERT_FALSE(import_results[3]["success"].get<bool>());
|
||||
ASSERT_STREQ("Field `points` has been declared as a default sorting field, but is not found in the document.", import_results[1]["error"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("Field `title` has been declared in the schema, but is not found in the document.", import_results[3]["error"].get<std::string>().c_str());
|
||||
|
||||
// try to duplicate records without upsert option
|
||||
|
||||
more_records = {R"({"id": "1", "title": "Wake up, Harry"})",
|
||||
R"({"id": "5", "points": 60})"};
|
||||
|
||||
import_response = coll_mul_fields->add_many(more_records, document, CREATE);
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(0, import_response["num_imported"].get<int>());
|
||||
|
||||
import_results = import_res_to_json(more_records);
|
||||
ASSERT_FALSE(import_results[0]["success"].get<bool>());
|
||||
ASSERT_FALSE(import_results[1]["success"].get<bool>());
|
||||
ASSERT_STREQ("A document with id 1 already exists.", import_results[0]["error"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("A document with id 5 already exists.", import_results[1]["error"].get<std::string>().c_str());
|
||||
|
||||
// update document with verbatim fields, except for points
|
||||
more_records = {R"({"id": "3", "cast":["Matt Damon","Ben Affleck","Minnie Driver"],
|
||||
"points":70,"starring":"Robin Williams","starring_facet":"Robin Williams",
|
||||
"title":"Good Will Hunting"})"};
|
||||
|
||||
import_response = coll_mul_fields->add_many(more_records, document, UPDATE);
|
||||
ASSERT_TRUE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(1, import_response["num_imported"].get<int>());
|
||||
|
||||
results = coll_mul_fields->search("Good Will Hunting", query_fields, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(70, results["hits"][0]["document"]["points"].get<uint32_t>());
|
||||
|
||||
// updating a document that does not exist should fail, others should succeed
|
||||
more_records = {R"({"id": "20", "points": 51})",
|
||||
R"({"id": "1", "points": 64})"};
|
||||
|
||||
import_response = coll_mul_fields->add_many(more_records, document, UPDATE);
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(1, import_response["num_imported"].get<int>());
|
||||
|
||||
import_results = import_res_to_json(more_records);
|
||||
ASSERT_FALSE(import_results[0]["success"].get<bool>());
|
||||
ASSERT_TRUE(import_results[1]["success"].get<bool>());
|
||||
ASSERT_STREQ("Could not find a document with id: 20", import_results[0]["error"].get<std::string>().c_str());
|
||||
ASSERT_EQ(404, import_results[0]["code"].get<size_t>());
|
||||
|
||||
results = coll_mul_fields->search("wake up harry", query_fields, "", {}, sort_fields, 0, 10, 1, FREQUENCY, false).get();
|
||||
ASSERT_EQ(64, results["hits"][0]["document"]["points"].get<uint32_t>());
|
||||
|
||||
// trying to create documents with existing IDs should fail
|
||||
more_records = {R"({"id": "2", "points": 51})",
|
||||
R"({"id": "1", "points": 64})"};
|
||||
|
||||
import_response = coll_mul_fields->add_many(more_records, document, CREATE);
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(0, import_response["num_imported"].get<int>());
|
||||
|
||||
import_results = import_res_to_json(more_records);
|
||||
ASSERT_FALSE(import_results[0]["success"].get<bool>());
|
||||
ASSERT_FALSE(import_results[1]["success"].get<bool>());
|
||||
ASSERT_STREQ("A document with id 2 already exists.", import_results[0]["error"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("A document with id 1 already exists.", import_results[1]["error"].get<std::string>().c_str());
|
||||
|
||||
ASSERT_EQ(409, import_results[0]["code"].get<size_t>());
|
||||
ASSERT_EQ(409, import_results[1]["code"].get<size_t>());
|
||||
}
|
||||
|
||||
|
||||
TEST_F(CollectionTest, ImportDocumentsUpsertOptional) {
|
||||
Collection *coll1;
|
||||
std::vector<field> fields = {
|
||||
field("title", field_types::STRING_ARRAY, false, true),
|
||||
field("points", field_types::INT32, false)
|
||||
};
|
||||
|
||||
coll1 = collectionManager.get_collection("coll1");
|
||||
if(coll1 == nullptr) {
|
||||
coll1 = collectionManager.create_collection("coll1", 4, fields, "points").get();
|
||||
}
|
||||
|
||||
std::vector<std::string> records;
|
||||
|
||||
size_t NUM_RECORDS = 1000;
|
||||
|
||||
for(size_t i=0; i<NUM_RECORDS; i++) {
|
||||
nlohmann::json doc;
|
||||
doc["id"] = std::to_string(i);
|
||||
doc["points"] = i;
|
||||
records.push_back(doc.dump());
|
||||
}
|
||||
|
||||
// import records without title
|
||||
|
||||
nlohmann::json document;
|
||||
nlohmann::json import_response = coll1->add_many(records, document, CREATE);
|
||||
ASSERT_TRUE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(1000, import_response["num_imported"].get<int>());
|
||||
|
||||
// upsert documents with title
|
||||
|
||||
records.clear();
|
||||
|
||||
for(size_t i=0; i<NUM_RECORDS; i++) {
|
||||
nlohmann::json updoc;
|
||||
updoc["id"] = std::to_string(i);
|
||||
updoc["title"] = {
|
||||
get_text(10),
|
||||
get_text(10),
|
||||
get_text(10),
|
||||
get_text(10),
|
||||
};
|
||||
records.push_back(updoc.dump());
|
||||
}
|
||||
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
import_response = coll1->add_many(records, document, UPSERT);
|
||||
auto time_micros = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::high_resolution_clock::now() - begin).count();
|
||||
|
||||
//LOG(INFO) << "Time taken for first upsert: " << time_micros;
|
||||
|
||||
ASSERT_TRUE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(1000, import_response["num_imported"].get<int>());
|
||||
|
||||
// run upsert again with title override
|
||||
|
||||
records.clear();
|
||||
|
||||
for(size_t i=0; i<NUM_RECORDS; i++) {
|
||||
nlohmann::json updoc;
|
||||
updoc["id"] = std::to_string(i);
|
||||
updoc["title"] = {
|
||||
get_text(10),
|
||||
get_text(10),
|
||||
get_text(10),
|
||||
get_text(10),
|
||||
};
|
||||
records.push_back(updoc.dump());
|
||||
}
|
||||
|
||||
begin = std::chrono::high_resolution_clock::now();
|
||||
import_response = coll1->add_many(records, document, UPSERT);
|
||||
time_micros = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::high_resolution_clock::now() - begin).count();
|
||||
|
||||
//LOG(INFO) << "Time taken for second upsert: " << time_micros;
|
||||
|
||||
ASSERT_TRUE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(1000, import_response["num_imported"].get<int>());
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, ImportDocuments) {
|
||||
Collection *coll_mul_fields;
|
||||
|
||||
@ -1320,8 +1578,8 @@ TEST_F(CollectionTest, ImportDocuments) {
|
||||
}
|
||||
|
||||
// try importing records
|
||||
|
||||
nlohmann::json import_response = coll_mul_fields->add_many(import_records);
|
||||
nlohmann::json document;
|
||||
nlohmann::json import_response = coll_mul_fields->add_many(import_records, document);
|
||||
ASSERT_TRUE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(18, import_response["num_imported"].get<int>());
|
||||
|
||||
@ -1346,7 +1604,7 @@ TEST_F(CollectionTest, ImportDocuments) {
|
||||
|
||||
// verify that empty import is handled gracefully
|
||||
std::vector<std::string> empty_records;
|
||||
import_response = coll_mul_fields->add_many(empty_records);
|
||||
import_response = coll_mul_fields->add_many(empty_records, document);
|
||||
ASSERT_TRUE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(0, import_response["num_imported"].get<int>());
|
||||
|
||||
@ -1360,7 +1618,7 @@ TEST_F(CollectionTest, ImportDocuments) {
|
||||
"{\"title\": \"Test4\", \"points\": 55, "
|
||||
"\"cast\": [\"Tom Skerritt\"] }"};
|
||||
|
||||
import_response = coll_mul_fields->add_many(more_records);
|
||||
import_response = coll_mul_fields->add_many(more_records, document);
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(2, import_response["num_imported"].get<int>());
|
||||
|
||||
@ -1385,7 +1643,7 @@ TEST_F(CollectionTest, ImportDocuments) {
|
||||
"{\"id\": \"id1\", \"title\": \"Test1\", \"starring\": \"Rand Fish\", \"points\": 12, "
|
||||
"\"cast\": [\"Tom Skerritt\"] }"};
|
||||
|
||||
import_response = coll_mul_fields->add_many(more_records);
|
||||
import_response = coll_mul_fields->add_many(more_records, document);
|
||||
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(1, import_response["num_imported"].get<int>());
|
||||
@ -1403,7 +1661,7 @@ TEST_F(CollectionTest, ImportDocuments) {
|
||||
|
||||
// valid JSON but not a document
|
||||
more_records = {"[]"};
|
||||
import_response = coll_mul_fields->add_many(more_records);
|
||||
import_response = coll_mul_fields->add_many(more_records, document);
|
||||
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(0, import_response["num_imported"].get<int>());
|
||||
@ -1417,7 +1675,7 @@ TEST_F(CollectionTest, ImportDocuments) {
|
||||
|
||||
// invalid JSON
|
||||
more_records = {"{"};
|
||||
import_response = coll_mul_fields->add_many(more_records);
|
||||
import_response = coll_mul_fields->add_many(more_records, document);
|
||||
|
||||
ASSERT_FALSE(import_response["success"].get<bool>());
|
||||
ASSERT_EQ(0, import_response["num_imported"].get<int>());
|
||||
@ -1756,7 +2014,7 @@ TEST_F(CollectionTest, IndexingWithBadData) {
|
||||
sample_collection = collectionManager.create_collection("sample_collection", 4, fields, "age").get();
|
||||
}
|
||||
|
||||
const Option<nlohmann::json> & search_fields_missing_op1 = sample_collection->add("{\"namezz\": \"foo\", \"age\": 29, \"average\": 78}");
|
||||
const Option<nlohmann::json> & search_fields_missing_op1 = sample_collection->add("{\"name\": \"foo\", \"age\": 29, \"average\": 78}");
|
||||
ASSERT_FALSE(search_fields_missing_op1.ok());
|
||||
ASSERT_STREQ("Field `tags` has been declared in the schema, but is not found in the document.",
|
||||
search_fields_missing_op1.error().c_str());
|
||||
@ -2210,9 +2468,169 @@ TEST_F(CollectionTest, SearchHighlightShouldFollowThreshold) {
|
||||
ASSERT_STREQ("fox jumped over the <mark>lazy</mark> dog and ran straight",
|
||||
res["hits"][0]["highlights"][0]["snippet"].get<std::string>().c_str());
|
||||
|
||||
// specify the number of surrounding tokens to return
|
||||
size_t highlight_affix_num_tokens = 2;
|
||||
|
||||
res = coll1->search("lazy", {"title"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, highlight_affix_num_tokens).get();
|
||||
ASSERT_STREQ("over the <mark>lazy</mark> dog and",
|
||||
res["hits"][0]["highlights"][0]["snippet"].get<std::string>().c_str());
|
||||
|
||||
highlight_affix_num_tokens = 0;
|
||||
res = coll1->search("lazy", {"title"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, highlight_affix_num_tokens).get();
|
||||
ASSERT_STREQ("<mark>lazy</mark>",
|
||||
res["hits"][0]["highlights"][0]["snippet"].get<std::string>().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, UpdateDocument) {
|
||||
Collection *coll1;
|
||||
|
||||
std::vector<field> fields = {field("title", field_types::STRING, true),
|
||||
field("tags", field_types::STRING_ARRAY, true),
|
||||
field("points", field_types::INT32, false)};
|
||||
|
||||
std::vector<sort_by> sort_fields = {sort_by("points", "DESC")};
|
||||
|
||||
coll1 = collectionManager.get_collection("coll1");
|
||||
if (coll1 == nullptr) {
|
||||
coll1 = collectionManager.create_collection("coll1", 1, fields, "points").get();
|
||||
}
|
||||
|
||||
nlohmann::json doc;
|
||||
doc["id"] = "100";
|
||||
doc["title"] = "The quick brown fox jumped over the lazy dog and ran straight to the forest to sleep.";
|
||||
doc["tags"] = {"NEWS", "LAZY"};
|
||||
doc["points"] = 25;
|
||||
|
||||
auto add_op = coll1->add(doc.dump());
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
auto res = coll1->search("lazy", {"title"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
ASSERT_STREQ("The quick brown fox jumped over the lazy dog and ran straight to the forest to sleep.",
|
||||
res["hits"][0]["document"]["title"].get<std::string>().c_str());
|
||||
|
||||
// try changing the title and searching for an older token
|
||||
doc["title"] = "The quick brown fox.";
|
||||
add_op = coll1->add(doc.dump(), UPSERT);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
ASSERT_EQ(1, coll1->get_num_documents());
|
||||
|
||||
res = coll1->search("lazy", {"title"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
|
||||
res = coll1->search("quick", {"title"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
ASSERT_STREQ("The quick brown fox.", res["hits"][0]["document"]["title"].get<std::string>().c_str());
|
||||
|
||||
// try to update document tags without `id`
|
||||
nlohmann::json doc2;
|
||||
doc2["tags"] = {"SENTENCE"};
|
||||
add_op = coll1->add(doc2.dump(), UPDATE);
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
ASSERT_STREQ("For update, the `id` key must be provided.", add_op.error().c_str());
|
||||
|
||||
// now change tags with id
|
||||
doc2["id"] = "100";
|
||||
add_op = coll1->add(doc2.dump(), UPDATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
// check for old tag
|
||||
res = coll1->search("NEWS", {"tags"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
|
||||
// now check for new tag and also try faceting on that field
|
||||
res = coll1->search("SENTENCE", {"tags"}, "", {"tags"}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
ASSERT_STREQ("SENTENCE", res["facet_counts"][0]["counts"][0]["value"].get<std::string>().c_str());
|
||||
|
||||
// try changing points
|
||||
nlohmann::json doc3;
|
||||
doc3["points"] = 99;
|
||||
doc3["id"] = "100";
|
||||
|
||||
add_op = coll1->add(doc3.dump(), UPDATE);
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
res = coll1->search("*", {"tags"}, "points: > 90", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
ASSERT_EQ(99, res["hits"][0]["document"]["points"].get<size_t>());
|
||||
|
||||
// id can be passed by param
|
||||
nlohmann::json doc4;
|
||||
doc4["points"] = 105;
|
||||
|
||||
add_op = coll1->add(doc4.dump(), UPSERT, "100");
|
||||
ASSERT_TRUE(add_op.ok());
|
||||
|
||||
res = coll1->search("*", {"tags"}, "points: > 101", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
ASSERT_EQ(105, res["hits"][0]["document"]["points"].get<size_t>());
|
||||
|
||||
// try to change a field with bad value and verify that old document is put back
|
||||
doc4["points"] = "abc";
|
||||
add_op = coll1->add(doc4.dump(), UPSERT, "100");
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
|
||||
res = coll1->search("*", {"tags"}, "points: > 101", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(1, res["hits"].size());
|
||||
ASSERT_EQ(105, res["hits"][0]["document"]["points"].get<size_t>());
|
||||
|
||||
// when explicit path id does not match doc id, error should be returned
|
||||
nlohmann::json doc5;
|
||||
doc5["id"] = "800";
|
||||
doc5["title"] = "The Secret Seven";
|
||||
doc5["points"] = 250;
|
||||
doc5["tags"] = {"BOOK", "ENID BLYTON"};
|
||||
|
||||
add_op = coll1->add(doc5.dump(), UPSERT, "799");
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
ASSERT_EQ(400, add_op.code());
|
||||
ASSERT_STREQ("The `id` of the resource does not match the `id` in the JSON body.", add_op.error().c_str());
|
||||
|
||||
// passing an empty id should not succeed
|
||||
nlohmann::json doc6;
|
||||
doc6["id"] = "";
|
||||
doc6["title"] = "The Secret Seven";
|
||||
doc6["points"] = 250;
|
||||
doc6["tags"] = {"BOOK", "ENID BLYTON"};
|
||||
|
||||
add_op = coll1->add(doc6.dump(), UPDATE);
|
||||
ASSERT_FALSE(add_op.ok());
|
||||
ASSERT_EQ(400, add_op.code());
|
||||
ASSERT_STREQ("The `id` should not be empty.", add_op.error().c_str());
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, SearchHighlightFieldFully) {
|
||||
Collection *coll1;
|
||||
|
||||
@ -2240,7 +2658,7 @@ TEST_F(CollectionTest, SearchHighlightFieldFully) {
|
||||
|
||||
auto res = coll1->search("lazy", {"title"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, "title").get();
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title").get();
|
||||
|
||||
ASSERT_EQ(1, res["hits"][0]["highlights"].size());
|
||||
ASSERT_STREQ("The quick brown fox jumped over the <mark>lazy</mark> dog and ran straight to the forest to sleep.",
|
||||
@ -2249,14 +2667,14 @@ TEST_F(CollectionTest, SearchHighlightFieldFully) {
|
||||
// should not return value key when highlight_full_fields is not specified
|
||||
res = coll1->search("lazy", {"title"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, "").get();
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "").get();
|
||||
|
||||
ASSERT_EQ(2, res["hits"][0]["highlights"][0].size());
|
||||
|
||||
// query multiple fields
|
||||
res = coll1->search("lazy", {"title", "tags"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, "title, tags").get();
|
||||
spp::sparse_hash_set<std::string>(), 10, "", 5, 5, "title, tags").get();
|
||||
|
||||
ASSERT_EQ(2, res["hits"][0]["highlights"].size());
|
||||
ASSERT_STREQ("The quick brown fox jumped over the <mark>lazy</mark> dog and ran straight to the forest to sleep.",
|
||||
@ -2269,7 +2687,7 @@ TEST_F(CollectionTest, SearchHighlightFieldFully) {
|
||||
spp::sparse_hash_set<std::string> excluded_fields = {"tags"};
|
||||
res = coll1->search("lazy", {"title", "tags"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
excluded_fields, 10, "", 5, "title, tags").get();
|
||||
excluded_fields, 10, "", 5, 5, "title, tags").get();
|
||||
|
||||
ASSERT_EQ(1, res["hits"][0]["highlights"].size());
|
||||
ASSERT_STREQ("The quick brown fox jumped over the <mark>lazy</mark> dog and ran straight to the forest to sleep.",
|
||||
@ -2279,7 +2697,7 @@ TEST_F(CollectionTest, SearchHighlightFieldFully) {
|
||||
excluded_fields = {"tags", "title"};
|
||||
res = coll1->search("lazy", {"title", "tags"}, "", {}, sort_fields, 0, 10, 1,
|
||||
token_ordering::FREQUENCY, true, 10, spp::sparse_hash_set<std::string>(),
|
||||
excluded_fields, 10, "", 5, "title, tags").get();
|
||||
excluded_fields, 10, "", 5, 5, "title, tags").get();
|
||||
ASSERT_EQ(0, res["hits"][0]["highlights"].size());
|
||||
|
||||
collectionManager.drop_collection("coll1");
|
||||
|
59
test/index_test.cpp
Normal file
59
test/index_test.cpp
Normal file
@ -0,0 +1,59 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "index.h"
|
||||
#include <vector>
|
||||
|
||||
TEST(IndexTest, ScrubReindexDoc) {
|
||||
std::unordered_map<std::string, field> search_schema;
|
||||
search_schema.emplace("title", field("title", field_types::STRING, false));
|
||||
search_schema.emplace("points", field("title", field_types::INT32, false));
|
||||
search_schema.emplace("cast", field("cast", field_types::STRING_ARRAY, false));
|
||||
search_schema.emplace("movie", field("movie", field_types::BOOL, false));
|
||||
|
||||
Index index("index", search_schema, {}, {});
|
||||
nlohmann::json old_doc;
|
||||
old_doc["id"] = "1";
|
||||
old_doc["title"] = "One more thing.";
|
||||
old_doc["points"] = 100;
|
||||
old_doc["cast"] = {"John Wick", "Jeremy Renner"};
|
||||
old_doc["movie"] = true;
|
||||
|
||||
// all fields remain same
|
||||
|
||||
nlohmann::json update_doc1, del_doc1;
|
||||
update_doc1 = old_doc;
|
||||
del_doc1 = old_doc;
|
||||
|
||||
index.scrub_reindex_doc(update_doc1, del_doc1, old_doc);
|
||||
ASSERT_EQ(1, del_doc1.size());
|
||||
ASSERT_STREQ("1", del_doc1["id"].get<std::string>().c_str());
|
||||
|
||||
// when only some fields are updated
|
||||
|
||||
nlohmann::json update_doc2, del_doc2;
|
||||
update_doc2["id"] = "1";
|
||||
update_doc2["points"] = 100;
|
||||
update_doc2["cast"] = {"Jack"};
|
||||
|
||||
del_doc2 = update_doc2;
|
||||
|
||||
index.scrub_reindex_doc(update_doc2, del_doc2, old_doc);
|
||||
ASSERT_EQ(2, del_doc2.size());
|
||||
ASSERT_STREQ("1", del_doc2["id"].get<std::string>().c_str());
|
||||
std::vector<std::string> cast = del_doc2["cast"].get<std::vector<std::string>>();
|
||||
ASSERT_EQ(1, cast.size());
|
||||
ASSERT_STREQ("Jack", cast[0].c_str());
|
||||
|
||||
// containing fields not part of search schema
|
||||
|
||||
nlohmann::json update_doc3, del_doc3;
|
||||
update_doc3["id"] = "1";
|
||||
update_doc3["title"] = "The Lawyer";
|
||||
update_doc3["foo"] = "Bar";
|
||||
|
||||
del_doc3 = update_doc3;
|
||||
index.scrub_reindex_doc(update_doc3, del_doc3, old_doc);
|
||||
ASSERT_EQ(3, del_doc3.size());
|
||||
ASSERT_STREQ("1", del_doc3["id"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("The Lawyer", del_doc3["title"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("Bar", del_doc3["foo"].get<std::string>().c_str());
|
||||
}
|
100
test/resources/common100_english.txt
Normal file
100
test/resources/common100_english.txt
Normal file
@ -0,0 +1,100 @@
|
||||
the
|
||||
of
|
||||
to
|
||||
and
|
||||
a
|
||||
in
|
||||
is
|
||||
it
|
||||
you
|
||||
that
|
||||
he
|
||||
was
|
||||
for
|
||||
on
|
||||
are
|
||||
with
|
||||
as
|
||||
I
|
||||
his
|
||||
they
|
||||
be
|
||||
at
|
||||
one
|
||||
have
|
||||
this
|
||||
from
|
||||
or
|
||||
had
|
||||
by
|
||||
not
|
||||
word
|
||||
but
|
||||
what
|
||||
some
|
||||
we
|
||||
can
|
||||
out
|
||||
other
|
||||
were
|
||||
all
|
||||
there
|
||||
when
|
||||
up
|
||||
use
|
||||
your
|
||||
how
|
||||
said
|
||||
an
|
||||
each
|
||||
she
|
||||
which
|
||||
do
|
||||
their
|
||||
time
|
||||
if
|
||||
will
|
||||
way
|
||||
about
|
||||
many
|
||||
then
|
||||
them
|
||||
write
|
||||
would
|
||||
like
|
||||
so
|
||||
these
|
||||
her
|
||||
long
|
||||
make
|
||||
thing
|
||||
see
|
||||
him
|
||||
two
|
||||
has
|
||||
look
|
||||
more
|
||||
day
|
||||
could
|
||||
go
|
||||
come
|
||||
did
|
||||
number
|
||||
sound
|
||||
no
|
||||
most
|
||||
people
|
||||
my
|
||||
over
|
||||
know
|
||||
water
|
||||
than
|
||||
call
|
||||
first
|
||||
who
|
||||
may
|
||||
down
|
||||
side
|
||||
been
|
||||
now
|
||||
find
|
@ -12,7 +12,8 @@ TEST(SortedArrayTest, Append) {
|
||||
EXPECT_EQ(arr.indexOf(100), 0); // when not found must be equal to length (0 in this case)
|
||||
|
||||
for(uint32_t i=0; i < SIZE; i++) {
|
||||
arr.append(i);
|
||||
size_t appended_index = arr.append(i);
|
||||
ASSERT_EQ(i, appended_index);
|
||||
}
|
||||
|
||||
EXPECT_EQ(arr.getLength(), SIZE);
|
||||
@ -28,11 +29,94 @@ TEST(SortedArrayTest, Append) {
|
||||
EXPECT_EQ(arr.indexOf(SIZE+1), SIZE);
|
||||
|
||||
sorted_array arr_small;
|
||||
arr_small.append(100);
|
||||
size_t appended_index = arr_small.append(100);
|
||||
EXPECT_EQ(0, appended_index);
|
||||
EXPECT_EQ(arr_small.getLength(), 1);
|
||||
EXPECT_EQ(arr_small.at(0), 100);
|
||||
}
|
||||
|
||||
TEST(SortedArrayTest, AppendOutOfOrder) {
|
||||
sorted_array arr;
|
||||
for(size_t i=5; i<=10; i++) {
|
||||
size_t appended_index = arr.append(i);
|
||||
ASSERT_EQ(i-5, appended_index);
|
||||
}
|
||||
|
||||
EXPECT_EQ(6, arr.getLength());
|
||||
|
||||
int appended_index = -1;
|
||||
|
||||
appended_index = arr.append(1);
|
||||
ASSERT_EQ(0, appended_index);
|
||||
|
||||
appended_index = arr.append(3);
|
||||
ASSERT_EQ(1, appended_index);
|
||||
|
||||
appended_index = arr.append(2);
|
||||
ASSERT_EQ(1, appended_index);
|
||||
|
||||
appended_index = arr.append(4);
|
||||
ASSERT_EQ(3, appended_index);
|
||||
|
||||
appended_index = arr.append(11);
|
||||
ASSERT_EQ(10, appended_index);
|
||||
|
||||
appended_index = arr.append(14);
|
||||
ASSERT_EQ(11, appended_index);
|
||||
|
||||
appended_index = arr.append(12);
|
||||
ASSERT_EQ(11, appended_index);
|
||||
|
||||
EXPECT_EQ(13, arr.getLength());
|
||||
}
|
||||
|
||||
TEST(SortedArrayTest, InsertAtIndex) {
|
||||
std::vector<uint32_t> eles;
|
||||
sorted_array arr;
|
||||
for(size_t i=5; i<=9; i++) {
|
||||
arr.append(i);
|
||||
}
|
||||
|
||||
arr.append(11);
|
||||
eles = {5, 6, 7, 8, 9, 11};
|
||||
|
||||
for(size_t i=0; i < eles.size(); i++) {
|
||||
ASSERT_EQ(eles[i], arr.at(i));
|
||||
}
|
||||
|
||||
arr.insert(0, 1);
|
||||
eles = { 1, 5, 6, 7, 8, 9, 11 };
|
||||
|
||||
for(size_t i=0; i < eles.size(); i++) {
|
||||
ASSERT_EQ(eles[i], arr.at(i));
|
||||
}
|
||||
|
||||
ASSERT_EQ(1, arr.at(0));
|
||||
ASSERT_EQ(5, arr.at(1));
|
||||
|
||||
arr.insert(1, 2);
|
||||
eles = {1, 2, 5, 6, 7, 8, 9, 11};
|
||||
ASSERT_EQ(1, arr.at(0));
|
||||
ASSERT_EQ(2, arr.at(1));
|
||||
ASSERT_EQ(8, arr.getLength());
|
||||
|
||||
for(size_t i=0; i < eles.size(); i++) {
|
||||
ASSERT_EQ(eles[i], arr.at(i));
|
||||
}
|
||||
|
||||
arr.insert(7, 10);
|
||||
eles = { 1, 2, 5, 6, 7, 8, 9, 10, 11};
|
||||
ASSERT_EQ(10, arr.at(7));
|
||||
ASSERT_EQ(11, arr.at(8));
|
||||
ASSERT_EQ(9, arr.getLength());
|
||||
|
||||
for(size_t i=0; i < eles.size(); i++) {
|
||||
ASSERT_EQ(eles[i], arr.at(i));
|
||||
}
|
||||
|
||||
ASSERT_FALSE(arr.insert(9, 12)); // index out of range
|
||||
}
|
||||
|
||||
TEST(SortedArrayTest, Load) {
|
||||
sorted_array arr;
|
||||
|
||||
@ -70,6 +154,32 @@ TEST(SortedArrayTest, Uncompress) {
|
||||
delete[] raw_sorted_arr;
|
||||
}
|
||||
|
||||
TEST(SortedArrayTest, RemoveValue) {
|
||||
sorted_array arr;
|
||||
|
||||
const size_t SIZE = 10*1000;
|
||||
for(size_t i=0; i<SIZE; i++) {
|
||||
arr.append(i);
|
||||
}
|
||||
|
||||
uint32_t values[5] = {0, 100, 1000, 2000, SIZE-1};
|
||||
|
||||
for(size_t i=0; i<5; i++) {
|
||||
arr.remove_value(values[i]);
|
||||
}
|
||||
|
||||
ASSERT_EQ(arr.getLength(), SIZE-5);
|
||||
|
||||
for(size_t i=0; i<SIZE-5; i++) {
|
||||
uint32_t value = arr.at(i);
|
||||
ASSERT_FALSE(value == 0);
|
||||
ASSERT_FALSE(value == 100);
|
||||
ASSERT_FALSE(value == 1000);
|
||||
ASSERT_FALSE(value == 2000);
|
||||
ASSERT_FALSE(value == SIZE-1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SortedArrayTest, RemoveValues) {
|
||||
sorted_array arr;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user