diff --git a/CMakeLists.txt b/CMakeLists.txt index 0628e352..d1e6e243 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ link_directories(${CMAKE_SOURCE_DIR}/external/${H2O_NAME}/build) link_directories(${CMAKE_SOURCE_DIR}/external/${H2O_NAME}/build/libressl-build/lib) link_directories(${CMAKE_SOURCE_DIR}/external/${ROCKSDB_NAME}) -add_executable(typesense_test test/forarray_test.cpp test/art_test.cpp ${SRC_FILES}) +add_executable(typesense_test test/forarray_test.cpp test/art_test.cpp test/collection_test.cpp ${SRC_FILES}) add_executable(search ${HEADER_FILES} ${SRC_FILES} src/main/main.cpp) add_executable(typesense-server ${HEADER_FILES} ${SRC_FILES} src/main/server.cpp) diff --git a/src/collection.h b/include/collection.h similarity index 93% rename from src/collection.h rename to include/collection.h index 6f12b7da..19819eaf 100644 --- a/src/collection.h +++ b/include/collection.h @@ -26,6 +26,9 @@ private: std::string get_seq_id_key(uint32_t seq_id); std::string get_id_key(std::string id); + static inline std::vector _next_suggestion(const std::vector> &token_leaves, + long long int n); + public: Collection() = delete; Collection(std::string state_dir_path); @@ -33,10 +36,6 @@ public: std::string add(std::string json_str); std::vector search(std::string query, const int num_typos, const size_t num_results); void remove(std::string id); - - static inline std::vector _next_suggestion(const std::vector> &token_leaves, - long long int n); - void score_results(Topster<100> &topster, const std::vector &query_suggestion, const uint32_t *result_ids, size_t result_size) const; diff --git a/include/forarray.h b/include/forarray.h index 91d0b8c0..2a0419ab 100644 --- a/include/forarray.h +++ b/include/forarray.h @@ -59,6 +59,7 @@ public: uint32_t at(uint32_t index); + // FIXME: contains and indexOf are meant only for sorted arrays bool contains(uint32_t value); uint32_t indexOf(uint32_t value); diff --git a/include/topster.h b/include/topster.h index c974be52..cd02bed2 100644 --- a/include/topster.h +++ b/include/topster.h @@ -71,7 +71,7 @@ struct Topster { } static bool compare_values(const struct KV& i, const struct KV& j) { - return j.value < i.value; + return (i.value == j.value) ? i.key > j.key : i.value > j.value; } void sort() { diff --git a/src/collection.cpp b/src/collection.cpp index 112fa851..e3e5ed6b 100644 --- a/src/collection.cpp +++ b/src/collection.cpp @@ -38,6 +38,8 @@ std::string Collection::add(std::string json_str) { store->insert(get_seq_id_key(seq_id), document.dump()); store->insert(get_id_key(document["id"]), seq_id_str); + std::cout << "ID: " << document["id"] << ", Title: " << document["title"] << std::endl; + std::vector tokens; StringUtils::tokenize(document["title"], tokens, " ", true); @@ -219,9 +221,12 @@ void Collection::score_results(Topster<100> &topster, const std::vector #include #include "string_utils.h" -#include "../collection.h" +#include "collection.h" using namespace std; diff --git a/src/main/server.cpp b/src/main/server.cpp index af2ab0e2..1791d381 100644 --- a/src/main/server.cpp +++ b/src/main/server.cpp @@ -15,7 +15,7 @@ #include #include #include "string_utils.h" -#include "../collection.h" +#include "collection.h" #include #include "h2o.h" diff --git a/test/collection_test.cpp b/test/collection_test.cpp new file mode 100644 index 00000000..79f4b933 --- /dev/null +++ b/test/collection_test.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include "collection.h" + +class CollectionTest : public ::testing::Test { +protected: + Collection *collection; + + virtual void SetUp() { + std::ifstream infile("/Users/kishore/others/wreally/typesense/test/documents.jsonl"); + collection = new Collection("/tmp/typesense_test/collection"); + + std::string json_line; + + while (std::getline(infile, json_line)) { + collection->add(json_line); + } + + infile.close(); + } + + virtual void TearDown() { + delete collection; + } +}; + +TEST_F(CollectionTest, ExactSearchShouldBeStable) { + std::vector results = collection->search("the", 0, 10); + ASSERT_EQ(7, results.size()); + + // For two documents of the same score, the larger doc_id appears first + std::vector ids = {"1", "6", "foo", "13", "10", "8", "16"}; + + for(size_t i = 0; i < results.size(); i++) { + nlohmann::json result = results.at(i); + std::string id = ids.at(i); + std::string result_id = result["id"]; + ASSERT_STREQ(id.c_str(), result_id.c_str()); + } +} \ No newline at end of file diff --git a/test/documents.jsonl b/test/documents.jsonl index b66fe263..e82b93a4 100644 --- a/test/documents.jsonl +++ b/test/documents.jsonl @@ -10,7 +10,7 @@ {"points":12,"title":"Is there research for the optimal small crew size for a long space voyage?"} {"points":12,"title":"Do long term missions receive insurance coverage?"} {"points":12,"title":"What do they exactly look for when searching for extraterrestrial intelligence?"} -{"points":11,"title":"What were emergency procedures for failure of launch vehicles with nuclear upper stages?"} +{"points":12,"title":"What were emergency procedures for failure of launch vehicles with the nuclear upper stages?"} {"points":11,"title":"Mathematics used for F9R flyback lunch and landing"} {"points":11,"title":"What considerations have been made lunch for waste produced during colonisation?"} {"points":10,"title":"Do late do the propellants lunch ionize in chemical rockets?"} \ No newline at end of file