diff --git a/TODO.md b/TODO.md index 41f9ce18..643b1506 100644 --- a/TODO.md +++ b/TODO.md @@ -29,4 +29,4 @@ **Refactoring** - `token_count` in leaf is redundant: can be accessed from value -- storing length in `offsets` is redundant: length can be found by looking up value of the next index in `offset_index` +- ~~storing length in `offsets` is redundant: it can be found by looking up value of the next index in `offset_index`~~ diff --git a/src/art.cpp b/src/art.cpp index a158b236..9e8f01cc 100644 --- a/src/art.cpp +++ b/src/art.cpp @@ -366,7 +366,6 @@ static void add_document_to_leaf(const art_document *document, art_leaf *leaf) { uint32_t curr_index = leaf->values->offsets.getLength(); leaf->values->offset_index.append_sorted(curr_index); - leaf->values->offsets.append_unsorted(document->offsets_len); for(uint32_t i=0; ioffsets_len; i++) { leaf->values->offsets.append_unsorted(document->offsets[i]); } diff --git a/src/collection.cpp b/src/collection.cpp index 9d77a7de..4550c1a1 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -83,6 +83,10 @@ void Collection::search(std::string query, size_t max_results) { } } + if(token_leaves.size() == 0) { + return ; + } + //std::cout << "token_leaves.size = " << token_leaves.size() << std::endl; Topster<100> topster; @@ -138,11 +142,16 @@ void Collection::score_results(Topster<100> &topster, const std::vector positions; uint32_t doc_index = token_leaf->values->ids.indexOf(doc_id); - uint32_t offset_index = token_leaf->values->offset_index.at(doc_index); - uint32_t num_offsets = token_leaf->values->offsets.at(offset_index); - for (auto offset_count = 1; offset_count <= num_offsets; offset_count++) { - positions.push_back((uint16_t) token_leaf->values->offsets.at(offset_index + offset_count)); + uint32_t start_offset = token_leaf->values->offset_index.at(doc_index); + uint32_t end_offset = (doc_index == token_leaf->values->ids.getLength() - 1) ? + (token_leaf->values->offsets.getLength() - 1) : + token_leaf->values->offset_index.at(doc_index+1); + + while(start_offset <= end_offset) { + positions.push_back((uint16_t) token_leaf->values->offsets.at(start_offset)); + start_offset++; } + token_positions.push_back(positions); }