Tests for collection.

This commit is contained in:
Kishore Nallan 2016-11-13 21:59:32 +05:30
parent ea0da73cfb
commit 5736888935
9 changed files with 56 additions and 9 deletions

View File

@ -34,7 +34,7 @@ link_directories(${CMAKE_SOURCE_DIR}/external/${H2O_NAME}/build)
link_directories(${CMAKE_SOURCE_DIR}/external/${H2O_NAME}/build/libressl-build/lib)
link_directories(${CMAKE_SOURCE_DIR}/external/${ROCKSDB_NAME})
add_executable(typesense_test test/forarray_test.cpp test/art_test.cpp ${SRC_FILES})
add_executable(typesense_test test/forarray_test.cpp test/art_test.cpp test/collection_test.cpp ${SRC_FILES})
add_executable(search ${HEADER_FILES} ${SRC_FILES} src/main/main.cpp)
add_executable(typesense-server ${HEADER_FILES} ${SRC_FILES} src/main/server.cpp)

View File

@ -26,6 +26,9 @@ private:
std::string get_seq_id_key(uint32_t seq_id);
std::string get_id_key(std::string id);
static inline std::vector<art_leaf *> _next_suggestion(const std::vector<std::vector<art_leaf *>> &token_leaves,
long long int n);
public:
Collection() = delete;
Collection(std::string state_dir_path);
@ -33,10 +36,6 @@ public:
std::string add(std::string json_str);
std::vector<nlohmann::json> search(std::string query, const int num_typos, const size_t num_results);
void remove(std::string id);
static inline std::vector<art_leaf *> _next_suggestion(const std::vector<std::vector<art_leaf *>> &token_leaves,
long long int n);
void score_results(Topster<100> &topster, const std::vector<art_leaf *> &query_suggestion,
const uint32_t *result_ids,
size_t result_size) const;

View File

@ -59,6 +59,7 @@ public:
uint32_t at(uint32_t index);
// FIXME: contains and indexOf are meant only for sorted arrays
bool contains(uint32_t value);
uint32_t indexOf(uint32_t value);

View File

@ -71,7 +71,7 @@ struct Topster {
}
static bool compare_values(const struct KV& i, const struct KV& j) {
return j.value < i.value;
return (i.value == j.value) ? i.key > j.key : i.value > j.value;
}
void sort() {

View File

@ -38,6 +38,8 @@ std::string Collection::add(std::string json_str) {
store->insert(get_seq_id_key(seq_id), document.dump());
store->insert(get_id_key(document["id"]), seq_id_str);
std::cout << "ID: " << document["id"] << ", Title: " << document["title"] << std::endl;
std::vector<std::string> tokens;
StringUtils::tokenize(document["title"], tokens, " ", true);
@ -219,9 +221,12 @@ void Collection::score_results(Topster<100> &topster, const std::vector<art_leaf
mscore = MatchScore::match_score(doc_id, token_positions);
}
uint32_t doc_score = doc_scores.at(doc_id);
const uint64_t final_score = ((uint64_t)(mscore.words_present * 32 + (20 - mscore.distance)) * UINT32_MAX) +
doc_scores.at(doc_id);
std::cout << "final_score: " << final_score << ", doc_id: " << doc_id << std::endl;
/*
std::cout << "result_ids[i]: " << result_ids[i] << " - mscore.distance: "
<< (int) mscore.distance << " - mscore.words_present: " << (int) mscore.words_present

View File

@ -7,7 +7,7 @@
#include <art.h>
#include <unordered_map>
#include "string_utils.h"
#include "../collection.h"
#include "collection.h"
using namespace std;

View File

@ -15,7 +15,7 @@
#include <map>
#include <regex>
#include "string_utils.h"
#include "../collection.h"
#include "collection.h"
#include <sys/resource.h>
#include "h2o.h"

42
test/collection_test.cpp Normal file
View File

@ -0,0 +1,42 @@
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include <fstream>
#include "collection.h"
class CollectionTest : public ::testing::Test {
protected:
Collection *collection;
virtual void SetUp() {
std::ifstream infile("/Users/kishore/others/wreally/typesense/test/documents.jsonl");
collection = new Collection("/tmp/typesense_test/collection");
std::string json_line;
while (std::getline(infile, json_line)) {
collection->add(json_line);
}
infile.close();
}
virtual void TearDown() {
delete collection;
}
};
TEST_F(CollectionTest, ExactSearchShouldBeStable) {
std::vector<nlohmann::json> results = collection->search("the", 0, 10);
ASSERT_EQ(7, results.size());
// For two documents of the same score, the larger doc_id appears first
std::vector<std::string> ids = {"1", "6", "foo", "13", "10", "8", "16"};
for(size_t i = 0; i < results.size(); i++) {
nlohmann::json result = results.at(i);
std::string id = ids.at(i);
std::string result_id = result["id"];
ASSERT_STREQ(id.c_str(), result_id.c_str());
}
}

View File

@ -10,7 +10,7 @@
{"points":12,"title":"Is there research for the optimal small crew size for a long space voyage?"}
{"points":12,"title":"Do long term missions receive insurance coverage?"}
{"points":12,"title":"What do they exactly look for when searching for extraterrestrial intelligence?"}
{"points":11,"title":"What were emergency procedures for failure of launch vehicles with nuclear upper stages?"}
{"points":12,"title":"What were emergency procedures for failure of launch vehicles with the nuclear upper stages?"}
{"points":11,"title":"Mathematics used for F9R flyback lunch and landing"}
{"points":11,"title":"What considerations have been made lunch for waste produced during colonisation?"}
{"points":10,"title":"Do late do the propellants lunch ionize in chemical rockets?"}