From ce990ecb57d6e54fe3f2c48cbca49d98fa396e61 Mon Sep 17 00:00:00 2001 From: Kishore Nallan Date: Sun, 22 Jan 2017 22:18:08 +0530 Subject: [PATCH] Fix documents.jsonl path in tests. --- CMakeLists.txt | 15 ++++++++++----- TODO.md | 20 ++++++++++---------- src/main/benchmark.cpp | 4 ++-- src/main/main.cpp | 6 +++--- src/main/server.cpp | 9 +++++---- test/collection_test.cpp | 8 ++------ test/config.h.in | 0 7 files changed, 32 insertions(+), 30 deletions(-) create mode 100644 test/config.h.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 44027037..4a7eb5f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,13 +35,18 @@ link_directories(${CMAKE_SOURCE_DIR}/external/${H2O_NAME}/build) link_directories(${CMAKE_SOURCE_DIR}/external/${H2O_NAME}/build/libressl-build/lib) link_directories(${CMAKE_SOURCE_DIR}/external/${ROCKSDB_NAME}) -add_executable(typesense_test test/forarray_test.cpp test/art_test.cpp test/collection_test.cpp test/topster_test.cpp - ${SRC_FILES}) +add_executable(typesense-server ${HEADER_FILES} ${SRC_FILES} src/main/server.cpp) add_executable(search ${HEADER_FILES} ${SRC_FILES} src/main/main.cpp) add_executable(benchmark ${HEADER_FILES} ${SRC_FILES} src/main/benchmark.cpp) -add_executable(typesense-server ${HEADER_FILES} ${SRC_FILES} src/main/server.cpp) +add_executable(typesense_test test/forarray_test.cpp test/art_test.cpp test/collection_test.cpp test/topster_test.cpp + ${SRC_FILES}) -target_link_libraries(typesense_test pthread for rocksdb gtest gtest_main) +target_compile_definitions(typesense-server PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/") +target_compile_definitions(search PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/") +target_compile_definitions(benchmark PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/") +target_compile_definitions(typesense_test PRIVATE ROOT_DIR="${CMAKE_SOURCE_DIR}/") + +target_link_libraries(typesense-server for curl h2o-evloop pthread rocksdb ssl crypto) target_link_libraries(search for pthread rocksdb) target_link_libraries(benchmark for pthread rocksdb) -target_link_libraries(typesense-server for curl h2o-evloop pthread rocksdb ssl crypto) \ No newline at end of file +target_link_libraries(typesense_test pthread for rocksdb gtest gtest_main) diff --git a/TODO.md b/TODO.md index f1dac56d..0e0ccf8c 100644 --- a/TODO.md +++ b/TODO.md @@ -7,39 +7,39 @@ - ~~Proper JSON as input~~ - ~~Storing raw JSON input to RocksDB~~ - ~~ART for every indexed field~~ +- ~~Delete should remove from RocksDB~~ +- ~~Speed up UUID generation~~ +- ~~Make the search score computation customizable~~ +- ~~art int search should support signed ints~~ +- ~~Search across multiple fields~~ +- ~~Have set inside topster itself~~ +- ~~Persist next_seq_id~~ +- ~~collection_id should be int, not string~~ +- ~~API should return count~~ +- ~~Fix documents.jsonl path in tests~~ - UTF-8 support for fuzzy search - Facets - Filters - Support search operators like +, - etc. -- ~~Delete should remove from RocksDB~~ -- ~~Speed up UUID generation~~ - Prefix-search strings should not be null terminated -- ~~Make the search score computation customizable~~ - string_utils::tokenize should not have max length - only last token should be prefix searched -- ~~art int search should support signed ints~~ - art float search - storage key prefix should include collection name - Minimum results should be a variable instead of blindly going with max_results - Benchmark with -ffast-math - Space sensitivity - Use bitmap index instead of forarray for doc list -- ~~Search across multiple fields~~ - Multi field search tests - Throw errors when schema is broken - Index and search on multi-valued field -- Fix documents.jsonl path in tests - Assumption that all tokens match for scoring is no longer true - Primary_rank_scores and secondary_rank_scores hashmaps should be combined - Proper logging -- ~~Have set inside topster itself~~ - Restore records as well on restart (like for meta) -- ~~Persist next_seq_id~~ -- ~~collection_id should be int, not string~~ - clean special chars before indexing - d-ary heap? - range search for art_int -- ~~API should return count~~ - test for same match score but different primary, secondary attr **API** diff --git a/src/main/benchmark.cpp b/src/main/benchmark.cpp index 87f9d4c4..b329c418 100644 --- a/src/main/benchmark.cpp +++ b/src/main/benchmark.cpp @@ -13,7 +13,7 @@ using namespace std; -int main() { +int main(int argc, char* argv[]) { system("rm -rf /tmp/typesense-data && mkdir -p /tmp/typesense-data"); std::vector fields_to_index = {field("title", field_types::STRING)}; @@ -27,7 +27,7 @@ int main() { collection = collectionManager.create_collection("collection", fields_to_index, rank_fields); } - std::ifstream infile("/Users/kishore/Downloads/hnstories.jsonl"); + std::ifstream infile(argv[1]); std::string json_line; diff --git a/src/main/main.cpp b/src/main/main.cpp index 87b6122a..a2285ff4 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -13,7 +13,7 @@ using namespace std; -int main() { +int main(int argc, char* argv[]) { std::vector fields_to_index = {field("title", field_types::STRING)}; std::vector rank_fields = {"points"}; Store *store = new Store("/tmp/typesense-data"); @@ -25,8 +25,8 @@ int main() { collection = collectionManager.create_collection("collection", fields_to_index, rank_fields); } - std::ifstream infile("/Users/kishore/others/wreally/typesense/test/documents.jsonl"); - //std::ifstream infile("/Users/kishore/Downloads/hnstories.jsonl"); + std::ifstream infile(std::string(ROOT_DIR)+"test/documents.jsonl"); + //std::ifstream infile(argv[1]); std::string json_line; diff --git a/src/main/server.cpp b/src/main/server.cpp index 858721bd..bacae185 100644 --- a/src/main/server.cpp +++ b/src/main/server.cpp @@ -195,9 +195,9 @@ static int create_listener(void) { return 0; } -void index_documents() { - std::ifstream infile("/Users/kishore/others/wreally/typesense/test/documents.jsonl"); -// std::ifstream infile("/Users/kishore/Downloads/hnstories.jsonl"); +void index_documents(std::string path_to_docs) { + std::ifstream infile(path_to_docs); +// std::ifstream infile(path_to_docs); std::string json_line; @@ -222,7 +222,8 @@ int main(int argc, char **argv) { collection = collectionManager.create_collection("collection", search_fields, rank_fields); } - index_documents(); + index_documents(std::string(ROOT_DIR)+"test/documents.jsonl"); + //index_documents(argv[1]); h2o_config_init(&config); h2o_hostconf_t *hostconf = h2o_config_register_host(&config, h2o_iovec_init(H2O_STRLIT("default")), 65535); diff --git a/test/collection_test.cpp b/test/collection_test.cpp index b1af5bcf..4aec3638 100644 --- a/test/collection_test.cpp +++ b/test/collection_test.cpp @@ -20,7 +20,7 @@ protected: store = new Store(state_dir_path); collectionManager.init(store); - std::ifstream infile("/Users/kishore/others/wreally/typesense/test/documents.jsonl"); + std::ifstream infile(std::string(ROOT_DIR)+"test/documents.jsonl"); std::vector fields = {field("title", field_types::STRING)}; std::vector rank_fields = {"points"}; search_fields = {"title"}; @@ -69,10 +69,6 @@ TEST_F(CollectionTest, ExactSearchShouldBeStable) { TEST_F(CollectionTest, ExactPhraseSearch) { nlohmann::json results = collection->search("rocket launch", search_fields, 0, 10); - for(auto res: results["hits"]) { - std::cout << res << std::endl; - } - std::cout << std::endl; ASSERT_EQ(5, results["hits"].size()); /* @@ -303,7 +299,7 @@ TEST_F(CollectionTest, PrefixSearching) { TEST_F(CollectionTest, MultipleFields) { /*Collection *coll_mul_fields; - std::ifstream infile("/Users/kishore/others/wreally/typesense/test/multi_field_documents.jsonl"); + std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl"); std::vector fields = {field("title", field_types::STRING), field("starring", field_types::STRING)}; std::vector rank_fields = {"points"}; coll_mul_fields = new Collection("/tmp/typesense_test/coll_mul_fields", "coll_mul_fields", fields, rank_fields); diff --git a/test/config.h.in b/test/config.h.in new file mode 100644 index 00000000..e69de29b