mirror of
https://github.com/typesense/typesense.git
synced 2025-05-19 05:08:43 +08:00
Tests for collection.
This commit is contained in:
parent
ea0da73cfb
commit
5736888935
@ -34,7 +34,7 @@ link_directories(${CMAKE_SOURCE_DIR}/external/${H2O_NAME}/build)
|
||||
link_directories(${CMAKE_SOURCE_DIR}/external/${H2O_NAME}/build/libressl-build/lib)
|
||||
link_directories(${CMAKE_SOURCE_DIR}/external/${ROCKSDB_NAME})
|
||||
|
||||
add_executable(typesense_test test/forarray_test.cpp test/art_test.cpp ${SRC_FILES})
|
||||
add_executable(typesense_test test/forarray_test.cpp test/art_test.cpp test/collection_test.cpp ${SRC_FILES})
|
||||
add_executable(search ${HEADER_FILES} ${SRC_FILES} src/main/main.cpp)
|
||||
add_executable(typesense-server ${HEADER_FILES} ${SRC_FILES} src/main/server.cpp)
|
||||
|
||||
|
@ -26,6 +26,9 @@ private:
|
||||
std::string get_seq_id_key(uint32_t seq_id);
|
||||
std::string get_id_key(std::string id);
|
||||
|
||||
static inline std::vector<art_leaf *> _next_suggestion(const std::vector<std::vector<art_leaf *>> &token_leaves,
|
||||
long long int n);
|
||||
|
||||
public:
|
||||
Collection() = delete;
|
||||
Collection(std::string state_dir_path);
|
||||
@ -33,10 +36,6 @@ public:
|
||||
std::string add(std::string json_str);
|
||||
std::vector<nlohmann::json> search(std::string query, const int num_typos, const size_t num_results);
|
||||
void remove(std::string id);
|
||||
|
||||
static inline std::vector<art_leaf *> _next_suggestion(const std::vector<std::vector<art_leaf *>> &token_leaves,
|
||||
long long int n);
|
||||
|
||||
void score_results(Topster<100> &topster, const std::vector<art_leaf *> &query_suggestion,
|
||||
const uint32_t *result_ids,
|
||||
size_t result_size) const;
|
@ -59,6 +59,7 @@ public:
|
||||
|
||||
uint32_t at(uint32_t index);
|
||||
|
||||
// FIXME: contains and indexOf are meant only for sorted arrays
|
||||
bool contains(uint32_t value);
|
||||
|
||||
uint32_t indexOf(uint32_t value);
|
||||
|
@ -71,7 +71,7 @@ struct Topster {
|
||||
}
|
||||
|
||||
static bool compare_values(const struct KV& i, const struct KV& j) {
|
||||
return j.value < i.value;
|
||||
return (i.value == j.value) ? i.key > j.key : i.value > j.value;
|
||||
}
|
||||
|
||||
void sort() {
|
||||
|
@ -38,6 +38,8 @@ std::string Collection::add(std::string json_str) {
|
||||
store->insert(get_seq_id_key(seq_id), document.dump());
|
||||
store->insert(get_id_key(document["id"]), seq_id_str);
|
||||
|
||||
std::cout << "ID: " << document["id"] << ", Title: " << document["title"] << std::endl;
|
||||
|
||||
std::vector<std::string> tokens;
|
||||
StringUtils::tokenize(document["title"], tokens, " ", true);
|
||||
|
||||
@ -219,9 +221,12 @@ void Collection::score_results(Topster<100> &topster, const std::vector<art_leaf
|
||||
mscore = MatchScore::match_score(doc_id, token_positions);
|
||||
}
|
||||
|
||||
uint32_t doc_score = doc_scores.at(doc_id);
|
||||
const uint64_t final_score = ((uint64_t)(mscore.words_present * 32 + (20 - mscore.distance)) * UINT32_MAX) +
|
||||
doc_scores.at(doc_id);
|
||||
|
||||
std::cout << "final_score: " << final_score << ", doc_id: " << doc_id << std::endl;
|
||||
|
||||
/*
|
||||
std::cout << "result_ids[i]: " << result_ids[i] << " - mscore.distance: "
|
||||
<< (int) mscore.distance << " - mscore.words_present: " << (int) mscore.words_present
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <art.h>
|
||||
#include <unordered_map>
|
||||
#include "string_utils.h"
|
||||
#include "../collection.h"
|
||||
#include "collection.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include "string_utils.h"
|
||||
#include "../collection.h"
|
||||
#include "collection.h"
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "h2o.h"
|
||||
|
42
test/collection_test.cpp
Normal file
42
test/collection_test.cpp
Normal file
@ -0,0 +1,42 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include "collection.h"
|
||||
|
||||
class CollectionTest : public ::testing::Test {
|
||||
protected:
|
||||
Collection *collection;
|
||||
|
||||
virtual void SetUp() {
|
||||
std::ifstream infile("/Users/kishore/others/wreally/typesense/test/documents.jsonl");
|
||||
collection = new Collection("/tmp/typesense_test/collection");
|
||||
|
||||
std::string json_line;
|
||||
|
||||
while (std::getline(infile, json_line)) {
|
||||
collection->add(json_line);
|
||||
}
|
||||
|
||||
infile.close();
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
delete collection;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(CollectionTest, ExactSearchShouldBeStable) {
|
||||
std::vector<nlohmann::json> results = collection->search("the", 0, 10);
|
||||
ASSERT_EQ(7, results.size());
|
||||
|
||||
// For two documents of the same score, the larger doc_id appears first
|
||||
std::vector<std::string> ids = {"1", "6", "foo", "13", "10", "8", "16"};
|
||||
|
||||
for(size_t i = 0; i < results.size(); i++) {
|
||||
nlohmann::json result = results.at(i);
|
||||
std::string id = ids.at(i);
|
||||
std::string result_id = result["id"];
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
}
|
@ -10,7 +10,7 @@
|
||||
{"points":12,"title":"Is there research for the optimal small crew size for a long space voyage?"}
|
||||
{"points":12,"title":"Do long term missions receive insurance coverage?"}
|
||||
{"points":12,"title":"What do they exactly look for when searching for extraterrestrial intelligence?"}
|
||||
{"points":11,"title":"What were emergency procedures for failure of launch vehicles with nuclear upper stages?"}
|
||||
{"points":12,"title":"What were emergency procedures for failure of launch vehicles with the nuclear upper stages?"}
|
||||
{"points":11,"title":"Mathematics used for F9R flyback lunch and landing"}
|
||||
{"points":11,"title":"What considerations have been made lunch for waste produced during colonisation?"}
|
||||
{"points":10,"title":"Do late do the propellants lunch ionize in chemical rockets?"}
|
Loading…
x
Reference in New Issue
Block a user