diff --git a/src/array_utils.cpp b/src/array_utils.cpp index 610aa96c..c4c4df98 100644 --- a/src/array_utils.cpp +++ b/src/array_utils.cpp @@ -32,6 +32,7 @@ size_t ArrayUtils::and_scalar(const uint32_t *A, const size_t lenA, return (out - initout); // NOTREACHED } +// merges two sorted arrays and also removes duplicates size_t ArrayUtils::or_scalar(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t **out) { size_t indexA = 0, indexB = 0, res_index = 0; @@ -45,23 +46,24 @@ size_t ArrayUtils::or_scalar(const uint32_t *A, const size_t lenA, uint32_t* results = new uint32_t[lenA+lenB]; while (indexA < lenA && indexB < lenB) { - if (A[indexA] < B[indexB]) { - if(res_index == 0 || results[res_index-1] != A[indexA]) { - results[res_index] = A[indexA]; - res_index++; + if (A[indexA] < B[indexB]) { + // check for duplicate + if(res_index == 0 || results[res_index-1] != A[indexA]) { + results[res_index] = A[indexA]; + res_index++; + } + indexA++; + } else { + if(res_index == 0 || results[res_index-1] != B[indexB]) { + results[res_index] = B[indexB]; + res_index++; + } + indexB++; } - indexA++; - } else { - if(res_index == 0 || results[res_index-1] != B[indexB]) { - results[res_index] = B[indexB]; - res_index++; - } - indexB++; - } } while (indexA < lenA) { - if(results[res_index-1] != A[indexA]) { + if(res_index == 0 || results[res_index-1] != A[indexA]) { results[res_index] = A[indexA]; res_index++; } @@ -70,7 +72,7 @@ size_t ArrayUtils::or_scalar(const uint32_t *A, const size_t lenA, } while (indexB < lenB) { - if(results[res_index-1] != B[indexB]) { + if(res_index == 0 || results[res_index-1] != B[indexB]) { results[res_index] = B[indexB]; res_index++; } diff --git a/src/collection.cpp b/src/collection.cpp index a90d9dd1..b56a4386 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -819,6 +819,7 @@ Option Collection::search(std::string query, const std::vector token_indices; char num_tokens_found = mscore.offset_diffs[0]; for(size_t i = 1; i <= num_tokens_found; i++) { @@ -835,20 +836,17 @@ Option Collection::search(std::string query, const std::vector"; + } + std::stringstream snippet_stream; for(size_t snippet_index = start_index; snippet_index < end_index; snippet_index++) { if(snippet_index != start_index) { snippet_stream << " "; } - if(snippet_index == token_indices[token_index]) { - token_index++; - snippet_stream << "" + tokens[snippet_index] + ""; - } else { - snippet_stream << tokens[snippet_index]; - } + snippet_stream << tokens[snippet_index]; } document["_snippets"] = nlohmann::json::object(); @@ -1191,7 +1189,7 @@ inline std::vector Collection::next_suggestion(const std::vector