Merge branch 'master' into distro_package

This commit is contained in:
kishorenc 2019-10-26 17:33:19 +05:30
commit 68591a1d34
23 changed files with 228 additions and 105 deletions

View File

@ -2,7 +2,7 @@ version: 2
jobs:
build:
docker:
- image: typesense/typesense-development:latest
- image: typesense/typesense-development:25-SEP-2019
environment:
- PROJECT_DIR: /typesense
- TYPESENSE_VERSION: $CIRCLE_BRANCH-$CIRCLE_SHA1

View File

@ -21,7 +21,7 @@ if (APPLE)
add_definitions(-D__GLIBCXX__)
# Prefer brew installated libraries
set(ENV{OPENSSL_ROOT_DIR} /usr/local/opt/openssl)
set(OPENSSL_ROOT_DIR /usr/local/opt/openssl@1.1)
set(SNAPPY_ROOT_DIR /usr/local/opt/snappy)
set(ZLIB_ROOT /usr/local/opt/zlib)
set(CMAKE_PREFIX_PATH /usr/local/opt/curl-openssl /usr/local/opt/icu4c)
@ -35,12 +35,14 @@ ELSE()
SET(CMAKE_FIND_LIBRARY_SUFFIXES .a)
ENDIF()
FIND_PACKAGE(OpenSSL 1.0.2 REQUIRED)
FIND_PACKAGE(OpenSSL 1.1.1 REQUIRED)
FIND_PACKAGE(Snappy REQUIRED)
FIND_PACKAGE(ZLIB REQUIRED)
FIND_PACKAGE(CURL REQUIRED)
FIND_PACKAGE(ICU REQUIRED)
message("OpenSSL library: ${OPENSSL_LIBRARIES}")
include(cmake/For.cmake)
include(cmake/H2O.cmake)
include(cmake/RocksDB.cmake)
@ -138,8 +140,8 @@ if(NOT APPLE)
endif()
set(ICU_ALL_LIBRARIES ${ICU_I18N_LIBRARIES} ${ICU_LIBRARIES} ${ICU_DATA_LIBRARIES})
set(CORE_LIBS h2o-evloop iconv ${CURL_LIBRARIES} for ${ICU_ALL_LIBRARIES} ${G3LOGGER_LIBRARIES} pthread
${ROCKSDB_LIBS} ${OPENSSL_LIBRARIES} dl ${STD_LIB})
set(CORE_LIBS h2o-evloop iconv ${CURL_LIBRARIES} for ${ICU_ALL_LIBRARIES} ${G3LOGGER_LIBRARIES}
${ROCKSDB_LIBS} ${OPENSSL_LIBRARIES} pthread dl ${STD_LIB})
target_link_libraries(typesense-core ${CORE_LIBS})
target_link_libraries(typesense-server ${CORE_LIBS})

View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
set -ex
PROJECT_DIR=`dirname $0 | while read a; do cd $a && pwd && break; done`
@ -24,7 +24,7 @@ fi
cmake -DTYPESENSE_VERSION=$TYPESENSE_VERSION -DCMAKE_BUILD_TYPE=Release -H$PROJECT_DIR -B$PROJECT_DIR/$BUILD_DIR
make -C $PROJECT_DIR/$BUILD_DIR
if [[ "$@" == *"--create-binary"* ]]; then
if [[ "$@" == *"--package-binary"* ]]; then
OS_FAMILY=$(echo `uname` | awk '{print tolower($0)}')
RELEASE_NAME=typesense-server-$TYPESENSE_VERSION-$OS_FAMILY-amd64
printf `md5sum $PROJECT_DIR/$BUILD_DIR/typesense-server | cut -b-32` > $PROJECT_DIR/$BUILD_DIR/typesense-server.md5.txt

View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
set -ex
PROJECT_DIR=`dirname $0 | while read a; do cd $a && pwd && break; done`
@ -21,14 +21,14 @@ if [[ "$@" == *"--depclean"* ]]; then
mkdir $PROJECT_DIR/external-$SYSTEM_NAME
fi
echo "Creating development image..."
docker build --file $PROJECT_DIR/docker/development.Dockerfile --tag typesense/typesense-development:latest $PROJECT_DIR/docker
#echo "Creating development image..."
#docker build --file $PROJECT_DIR/docker/development.Dockerfile --tag typesense/typesense-development:latest $PROJECT_DIR/docker
echo "Building Typesense $TYPESENSE_VERSION..."
docker run -it -v $PROJECT_DIR:/typesense typesense/typesense-development cmake -DTYPESENSE_VERSION=$TYPESENSE_VERSION \
-DCMAKE_BUILD_TYPE=Release -H/typesense -B/typesense/$BUILD_DIR
docker run -it -v $PROJECT_DIR:/typesense typesense/typesense-development make -C/typesense/$BUILD_DIR
docker run -it -v $PROJECT_DIR:/typesense typesense/typesense-development make typesense-server typesense-core -C/typesense/$BUILD_DIR
if [[ "$@" == *"--build-deploy-image"* ]]; then
echo "Creating deployment image for Typesense $TYPESENSE_VERSION server ..."
@ -38,7 +38,7 @@ if [[ "$@" == *"--build-deploy-image"* ]]; then
$PROJECT_DIR/$BUILD_DIR
fi
if [[ "$@" == *"--create-binary"* ]]; then
if [[ "$@" == *"--package-binary"* ]]; then
OS_FAMILY=linux
RELEASE_NAME=typesense-server-$TYPESENSE_VERSION-$OS_FAMILY-amd64
printf `md5sum $PROJECT_DIR/$BUILD_DIR/typesense-server | cut -b-32` > $PROJECT_DIR/$BUILD_DIR/typesense-server.md5.txt

View File

@ -16,33 +16,33 @@ RUN apt-get install -y python-software-properties \
libidn11 \
git
RUN curl -L -o /opt/openssl-1.0.2s.tar.gz https://openssl.org/source/openssl-1.0.2s.tar.gz
RUN tar -C /opt -xvzf /opt/openssl-1.0.2s.tar.gz
RUN cd /opt/openssl-1.0.2s && sh ./config --prefix=/usr --openssldir=/usr zlib-dynamic
RUN make -C /opt/openssl-1.0.2s depend
RUN make -C /opt/openssl-1.0.2s -j4
RUN make -C /opt/openssl-1.0.2s install
RUN curl -L -o /opt/cmake-3.15.2-Linux-x86_64.tar.gz https://cmake.org/files/v3.15/cmake-3.15.2-Linux-x86_64.tar.gz
ADD https://cmake.org/files/v3.15/cmake-3.15.2-Linux-x86_64.tar.gz /opt/cmake-3.15.2-Linux-x86_64.tar.gz
RUN tar -C /opt -xvzf /opt/cmake-3.15.2-Linux-x86_64.tar.gz
RUN cp -r /opt/cmake-3.15.2-Linux-x86_64/* /usr
RUN curl -L -o /opt/snappy_1.1.3.orig.tar.gz https://launchpad.net/ubuntu/+archive/primary/+files/snappy_1.1.3.orig.tar.gz
ADD https://launchpad.net/ubuntu/+archive/primary/+files/snappy_1.1.3.orig.tar.gz /opt/snappy_1.1.3.orig.tar.gz
RUN tar -C /opt -xf /opt/snappy_1.1.3.orig.tar.gz
RUN mkdir /opt/snappy-1.1.3/build && cd /opt/snappy-1.1.3/build && ../configure && make && make install
RUN mkdir /opt/snappy-1.1.3/build && cd /opt/snappy-1.1.3/build && ../configure && make -j8 && make install
RUN curl -L -o /opt/icu4c-61_1-src.tgz https://ssl.icu-project.org/files/icu4c/61.1/icu4c-61_1-src.tgz
ADD https://ssl.icu-project.org/files/icu4c/61.1/icu4c-61_1-src.tgz /opt/icu4c-61_1-src.tgz
RUN tar -C /opt -xf /opt/icu4c-61_1-src.tgz
RUN cd /opt/icu/source && echo "#define U_DISABLE_RENAMING 1" >> common/unicode/uconfig.h && \
echo "#define U_STATIC_IMPLEMENTATION 1" >> common/unicode/uconfig.h && \
echo "#define U_USING_ICU_NAMESPACE 0" >> common/unicode/uconfig.h
RUN cd /opt/icu/source && ./runConfigureICU Linux --disable-samples --disable-tests --enable-static \
--disable-shared --disable-renaming && make && make install
--disable-shared --disable-renaming && make -j8 && make install
RUN curl -L -o /opt/curl-7.65.3.tar.gz https://github.com/curl/curl/releases/download/curl-7_65_3/curl-7.65.3.tar.gz
ADD https://openssl.org/source/openssl-1.1.1d.tar.gz /opt/openssl-1.1.1d.tar.gz
RUN tar -C /opt -xvzf /opt/openssl-1.1.1d.tar.gz
RUN cd /opt/openssl-1.1.1d && sh ./config --prefix=/usr/local --openssldir=/usr/local zlib
RUN make -C /opt/openssl-1.1.1d depend
RUN make -C /opt/openssl-1.1.1d -j8
RUN make -C /opt/openssl-1.1.1d install
ADD https://github.com/curl/curl/releases/download/curl-7_65_3/curl-7.65.3.tar.gz /opt/curl-7.65.3.tar.gz
RUN tar -C /opt -xf /opt/curl-7.65.3.tar.gz
RUN cd /opt/curl-7.65.3 && LIBS="-ldl -lpthread" ./configure --disable-shared --with-ssl=/usr \
--without-ca-bundle --without-ca-path && make && make install
RUN cd /opt/curl-7.65.3 && LIBS="-ldl -lpthread" ./configure --disable-shared --with-ssl=/usr/local \
--without-ca-bundle --without-ca-path && make -j8 && make install
ENV CC /usr/local/gcc-6.4.0/bin/gcc
ENV CXX /usr/local/gcc-6.4.0/bin/g++

View File

@ -19,22 +19,7 @@ RUN apt-get update && apt-get install -y \
bison \
zlib1g-dev
RUN mkdir -p /etc/ssl/certs
ADD https://curl.haxx.se/ca/cacert.pem /etc/ssl/certs/cacert.pem
ADD https://openssl.org/source/openssl-1.0.2k.tar.gz /opt/openssl-1.0.2k.tar.gz
RUN tar -C /opt -xvzf /opt/openssl-1.0.2k.tar.gz
RUN cd /opt/openssl-1.0.2k && sh ./config --prefix=/usr/local --openssldir=/usr/local zlib-dynamic
RUN make -C /opt/openssl-1.0.2k depend
RUN make -C /opt/openssl-1.0.2k -j4
RUN make -C /opt/openssl-1.0.2k install
ADD https://github.com/curl/curl/releases/download/curl-7_55_1/curl-7.55.1.tar.bz2 /opt/curl-7.55.1.tar.bz2
RUN tar -C /opt -xf /opt/curl-7.55.1.tar.bz2
RUN cd /opt/curl-7.55.1 && LIBS="-ldl -lpthread" ./configure --disable-shared --with-ssl=/usr/local \
--with-ca-bundle=/etc/ssl/certs/cacert.pem && make && make install
RUN curl -L -o /opt/gcc-6.4.0.tar.gz https://ftp.gnu.org/gnu/gcc/gcc-6.4.0/gcc-6.4.0.tar.gz
ADD https://ftp.gnu.org/gnu/gcc/gcc-6.4.0/gcc-6.4.0.tar.gz /opt/
RUN tar -C /opt -xf /opt/gcc-6.4.0.tar.gz
RUN mkdir /opt/gcc-6.4.0/build && cd /opt/gcc-6.4.0/build && ../configure --disable-checking --enable-languages=c,c++ \
@ -46,8 +31,7 @@ RUN cd /opt/gcc-6.4.0/build && make -j8
RUN cd /opt/gcc-6.4.0/build && make install
RUN apt-get remove -y gcc
RUN rm -rf /opt/gcc-6.4.0 /opt/gcc-6.4.0.tar.gz /opt/openssl-1.0.2k /opt/openssl-1.0.2k.tar.gz \
/opt/curl-7.55.1 /opt/curl-7.55.1.tar.bz2
RUN rm -rf /opt/gcc-6.4.0 /opt/gcc-6.4.0.tar.gz
ENV PATH /usr/local/gcc-6.4.0/bin/:$PATH
ENV LD_LIBRARY_PATH /usr/local/gcc-6.4.0/lib64

View File

@ -85,51 +85,51 @@ public:
// getters
std::string get_data_dir() {
std::string get_data_dir() const {
return this->data_dir;
}
std::string get_log_dir() {
std::string get_log_dir() const {
return this->log_dir;
}
std::string get_api_key() {
std::string get_api_key() const {
return this->api_key;
}
std::string get_search_only_api_key() {
std::string get_search_only_api_key() const {
return this->search_only_api_key;
}
std::string get_listen_address() {
std::string get_listen_address() const {
return this->listen_address;
}
int get_listen_port() {
int get_listen_port() const {
return this->listen_port;
}
std::string get_master() {
std::string get_master() const {
return this->master;
}
std::string get_ssl_cert() {
std::string get_ssl_cert() const {
return this->ssl_certificate;
}
std::string get_ssl_cert_key() {
std::string get_ssl_cert_key() const {
return this->ssl_certificate_key;
}
std::string get_config_file() {
std::string get_config_file() const {
return config_file;
}
bool get_enable_cors() {
bool get_enable_cors() const {
return this->enable_cors;
}
size_t get_indices_per_collection() {
size_t get_indices_per_collection() const {
return indices_per_collection;
}

View File

@ -12,9 +12,33 @@ private:
std::string url;
std::string api_key;
std::string ca_cert_path;
inline bool file_exists (const std::string & name) {
struct stat buffer;
return (stat (name.c_str(), &buffer) == 0);
}
public:
HttpClient(std::string url, std::string api_key): url(url), api_key(api_key) {
// try to locate ca cert file (from: https://serverfault.com/a/722646/117601)
std::vector<std::string> locations = {
"/etc/ssl/certs/ca-certificates.crt", // Debian/Ubuntu/Gentoo etc.
"/etc/pki/tls/certs/ca-bundle.crt", // Fedora/RHEL 6
"/etc/ssl/ca-bundle.pem", // OpenSUSE
"/etc/pki/tls/cacert.pem", // OpenELEC
"/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", // CentOS/RHEL 7
"/usr/local/etc/openssl/cert.pem", // OSX
};
ca_cert_path = "";
for(const std::string & location: locations) {
if(file_exists(location)) {
ca_cert_path = location;
break;
}
}
}
static size_t curl_write (void *contents, size_t size, size_t nmemb, std::string *s) {
@ -24,6 +48,14 @@ public:
long get_reponse(std::string & response) {
CURL *curl = curl_easy_init();
if(!ca_cert_path.empty()) {
curl_easy_setopt(curl, CURLOPT_CAINFO, ca_cert_path.c_str());
} else {
LOG(ERR) << "Unable to locate system SSL certificates.";
return 0;
}
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); // to allow self-signed certs
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, HttpClient::curl_write);

View File

@ -31,6 +31,8 @@ private:
h2o_multithread_receiver_t* message_receiver;
bool exit_loop = false;
std::string version;
std::vector<route_path> routes;
std::map<std::string, void (*)(void*)> message_handlers;
@ -67,8 +69,9 @@ private:
static int send_401_unauthorized(h2o_req_t *req);
public:
HttpServer(std::string listen_address, uint32_t listen_port,
std::string ssl_cert_path, std::string ssl_cert_key_path, bool cors_enabled);
HttpServer(const std::string & version,
const std::string & listen_address, uint32_t listen_port,
const std::string & ssl_cert_path, const std::string & ssl_cert_key_path, bool cors_enabled);
~HttpServer();
@ -103,6 +106,8 @@ public:
static void on_stop_server(void *data);
std::string get_version();
static constexpr const char* AUTH_HEADER = "x-typesense-api-key";
static constexpr const char* STOP_SERVER_MESSAGE = "STOP_SERVER";
};

View File

@ -34,7 +34,7 @@ public:
class Replicator {
public:
static void start(HttpServer* server, const std::string master_host_port, const std::string api_key, Store& store);
static void start(HttpServer* server, const std::string & master_host_port, const std::string & api_key, Store& store);
static void on_replication_event(void *data);
};

View File

@ -74,6 +74,13 @@ public:
if(!s.ok()) {
LOG(ERR) << "Error while initializing store: " << s.ToString();
if(s.code() == rocksdb::Status::Code::kIOError) {
LOG(ERR) << "It seems like the data directory " << state_dir_path << " is already being used by "
<< "another Typesense server. ";
LOG(ERR) << "If you are SURE that this is not the case, delete the LOCK file "
<< "in the data directory and try again.";
exit(1);
}
}
assert(s.ok());
@ -147,13 +154,12 @@ public:
return db->GetLatestSequenceNumber();
}
/*
Since: GetUpdatesSince(0) == GetUpdatesSince(1), always query for 1 sequence number greater than the number
returned by GetLatestSequenceNumber() locally.
*/
Option<std::vector<std::string>*> get_updates_since(const uint64_t seq_number, const uint64_t max_updates) const {
Option<std::vector<std::string>*> get_updates_since(const uint64_t seq_number_org, const uint64_t max_updates) const {
const uint64_t local_latest_seq_num = db->GetLatestSequenceNumber();
// Since GetUpdatesSince(0) == GetUpdatesSince(1)
const uint64_t seq_number = (seq_number_org == 0) ? 1 : seq_number_org;
if(seq_number == local_latest_seq_num+1) {
// replica has caught up, send an empty list as result
std::vector<std::string>* updates = new std::vector<std::string>();
@ -170,7 +176,7 @@ public:
return Option<std::vector<std::string>*>(400, error.str());
}
if(!iter->Valid() && !(local_latest_seq_num == 0 && seq_number == 0)) {
if(!iter->Valid()) {
std::ostringstream error;
error << "Invalid iterator. Master's latest sequence number is " << local_latest_seq_num << " but "
<< "updates are requested from sequence number " << seq_number << ". "
@ -181,9 +187,22 @@ public:
uint64_t num_updates = 0;
std::vector<std::string>* updates = new std::vector<std::string>();
bool first_iteration = true;
while(iter->Valid() && num_updates < max_updates) {
rocksdb::BatchResult batch_result = iter->GetBatch();
const std::string & write_batch_serialized = batch_result.writeBatchPtr->Data();
const rocksdb::BatchResult & batch = iter->GetBatch();
if(first_iteration) {
first_iteration = false;
if(batch.sequence != seq_number) {
std::ostringstream error;
error << "Invalid iterator. Requested sequence number is " << seq_number << " but "
<< "updates are available only from sequence number " << batch.sequence << ". "
<< "The master's WAL entries might have expired (they are kept only for 24 hours).";
return Option<std::vector<std::string>*>(400, error.str());
}
}
const std::string & write_batch_serialized = batch.writeBatchPtr->Data();
updates->push_back(write_batch_serialized);
num_updates += 1;
iter->Next();

View File

@ -15,7 +15,7 @@ struct StringUtils {
StringUtils(): status(U_ZERO_ERROR) {
// transliterator(icu::Transliterator::createInstance("Latin-ASCII", UTRANS_FORWARD, status))
cd = iconv_open("ascii//TRANSLIT", "UTF-8");
cd = iconv_open("ASCII//TRANSLIT", "UTF-8");
}
~StringUtils() {
@ -156,6 +156,8 @@ struct StringUtils {
void unicode_normalize(std::string& str) const;
void unicode_normalize2(std::string& str) const;
/* https://stackoverflow.com/a/34571089/131050 */
static std::string base64_encode(const std::string &in) {
std::string out;

View File

@ -21,6 +21,7 @@ bool directory_exists(const std::string & dir_path);
void init_cmdline_options(cmdline::parser & options, int argc, char **argv);
int init_logger(Config & config, std::unique_ptr<g3::LogWorker> & log_worker);
int init_logger(Config & config, const std::string & server_version, std::unique_ptr<g3::LogWorker> & log_worker);
int run_server(Config & config, void (*master_server_routes)(), void (*replica_server_routes)());
int run_server(const Config & config, const std::string & version,
void (*master_server_routes)(), void (*replica_server_routes)());

View File

@ -32,7 +32,7 @@
enum recurse_progress { RECURSE, ABORT, ITERATE };
static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term,
static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term,
const int term_len, const int* irow, const int* jrow, const int min_cost,
const int max_cost, const bool prefix, std::vector<const art_node *> &results);
@ -1144,7 +1144,8 @@ static inline void copyIntArray2(const int *src, int *dest, const int len) {
}
}
static inline int levenshtein_dist(const int depth, const char p, const char c, const unsigned char* term, const int term_len,
static inline int levenshtein_dist(const int depth, const unsigned char p, const unsigned char c,
const unsigned char* term, const int term_len,
const int* irow, const int* jrow, int* krow) {
int row_min = std::numeric_limits<int>::max();
const int columns = term_len+1;
@ -1234,7 +1235,7 @@ static inline void rotate(int &i, int &j, int &k) {
// e.g. catapult against coratapult
// e.g. microafot against microsoft
static void art_fuzzy_recurse(char p, char c, const art_node *n, int depth, const unsigned char *term,
static void art_fuzzy_recurse(unsigned char p, unsigned char c, const art_node *n, int depth, const unsigned char *term,
const int term_len, const int* irow, const int* jrow, const int min_cost,
const int max_cost, const bool prefix, std::vector<const art_node *> &results) {
const int columns = term_len+1;

View File

@ -95,7 +95,7 @@ Option<bool> CollectionManager::init(Store *store,
Collection* collection = init_collection(collection_meta, collection_next_seq_id);
LOG(INFO) << "Loading collection " << collection->get_name() << std::endl;
LOG(INFO) << "Loading collection " << collection->get_name();
// initialize overrides
std::vector<std::string> collection_override_jsons;

View File

@ -152,7 +152,7 @@ void del_drop_collection(http_req & req, http_res & res) {
void get_debug(http_req & req, http_res & res) {
nlohmann::json result;
result["version"] = TYPESENSE_VERSION;
result["version"] = res.server->get_version();
res.send_200(result.dump());
}

View File

@ -27,10 +27,11 @@ struct h2o_custom_generator_t {
void* data;
};
HttpServer::HttpServer(std::string listen_address, uint32_t listen_port, std::string ssl_cert_path,
std::string ssl_cert_key_path, bool cors_enabled):
listen_address(listen_address), listen_port(listen_port), ssl_cert_path(ssl_cert_path),
ssl_cert_key_path(ssl_cert_key_path), cors_enabled(cors_enabled) {
HttpServer::HttpServer(const std::string & version, const std::string & listen_address,
uint32_t listen_port, const std::string & ssl_cert_path,
const std::string & ssl_cert_key_path, bool cors_enabled):
version(version), listen_address(listen_address), listen_port(listen_port),
ssl_cert_path(ssl_cert_path), ssl_cert_key_path(ssl_cert_key_path), cors_enabled(cors_enabled) {
accept_ctx = new h2o_accept_ctx_t();
h2o_config_init(&config);
hostconf = h2o_config_register_host(&config, h2o_iovec_init(H2O_STRLIT("default")), 65535);
@ -53,9 +54,6 @@ void HttpServer::on_accept(h2o_socket_t *listener, const char *err) {
}
int HttpServer::setup_ssl(const char *cert_file, const char *key_file) {
SSL_library_init();
SSL_load_error_strings();
accept_ctx->ssl_ctx = SSL_CTX_new(SSLv23_server_method());
// As recommended by:
@ -157,6 +155,10 @@ void HttpServer::on_stop_server(void *data) {
// do nothing
}
std::string HttpServer::get_version() {
return version;
}
void HttpServer::clear_timeouts(const std::vector<h2o_timeout_t*> & timeouts) {
for(h2o_timeout_t* timeout: timeouts) {
while (!h2o_linklist_is_empty(&timeout->_entries)) {

View File

@ -39,6 +39,7 @@ void replica_server_routes() {
// meta
server->get("/debug", get_debug);
server->get("/health", get_health);
// replication
server->get("/replication/updates", get_replication_updates, true);
@ -67,10 +68,10 @@ int main(int argc, char **argv) {
}
std::unique_ptr<g3::LogWorker> log_worker = g3::LogWorker::createLogWorker();
int ret_code = init_logger(config, log_worker);
int ret_code = init_logger(config, TYPESENSE_VERSION, log_worker);
if(ret_code != 0) {
return ret_code;
}
return run_server(config, &master_server_routes, &replica_server_routes);
return run_server(config, TYPESENSE_VERSION, &master_server_routes, &replica_server_routes);
}

View File

@ -77,7 +77,8 @@ void IterateBatchHandler::Merge(const rocksdb::Slice& key, const rocksdb::Slice&
}
}
void Replicator::start(HttpServer* server, const std::string master_host_port, const std::string api_key, Store& store) {
void Replicator::start(HttpServer* server, const std::string & master_host_port,
const std::string & api_key, Store& store) {
size_t total_runs = 0;
while(true) {
@ -89,9 +90,8 @@ void Replicator::start(HttpServer* server, const std::string master_host_port, c
LOG(INFO) << "Replica's latest sequence number: " << latest_seq_num;
}
HttpClient client(
master_host_port+"/replication/updates?seq_number="+std::to_string(latest_seq_num+1), api_key
);
std::string url = master_host_port+"/replication/updates?seq_number="+std::to_string(latest_seq_num+1);
HttpClient client(url, api_key);
std::string json_response;
long status_code = client.get_reponse(json_response);

View File

@ -5,7 +5,8 @@ std::string lower_and_no_special_chars(const std::string & str) {
std::stringstream ss;
for(const auto c: str) {
bool should_remove = (!std::isalnum(c) && (int)(c) >= 0);
bool should_remove = ( (int)(c) >= 0 && // check for ASCII range
!std::isalnum(c) );
if(!should_remove) {
ss << (char) std::tolower(c);
}
@ -14,7 +15,41 @@ std::string lower_and_no_special_chars(const std::string & str) {
return ss.str();
}
void StringUtils::unicode_normalize(std::string& str) const {
void StringUtils::unicode_normalize(std::string & str) const {
std::stringstream out;
for (char *s = &str[0]; *s;) {
char inbuf[5];
char *p = inbuf;
*p++ = *s++;
if ((*s & 0xC0) == 0x80) *p++ = *s++;
if ((*s & 0xC0) == 0x80) *p++ = *s++;
if ((*s & 0xC0) == 0x80) *p++ = *s++;
*p = 0;
size_t insize = (p - &inbuf[0]);
char outbuf[5] = {};
size_t outsize = sizeof(outbuf);
char *outptr = outbuf;
char *inptr = inbuf;
//printf("[%s]\n", inbuf);
errno = 0;
iconv(cd, &inptr, &insize, &outptr, &outsize);
if(errno == EILSEQ) {
// symbol cannot be represented as ASCII, so write the original symbol
out << inbuf;
} else {
out << outbuf;
}
}
str.assign(lower_and_no_special_chars(out.str()));
}
void StringUtils::unicode_normalize2(std::string& str) const {
size_t outbuflen = str.length() * 2;
char output[outbuflen];
char *outptr = output;

View File

@ -1,6 +1,7 @@
#include "core_api.h"
#include "typesense_server_utils.h"
#include <curl/curl.h>
#include <sys/stat.h>
HttpServer* server;
@ -38,7 +39,7 @@ void init_cmdline_options(cmdline::parser & options, int argc, char **argv) {
options.add<std::string>("config", '\0', "Path to the configuration file.", false, "");
}
int init_logger(Config & config, std::unique_ptr<g3::LogWorker> & log_worker) {
int init_logger(Config & config, const std::string & server_version, std::unique_ptr<g3::LogWorker> & log_worker) {
// remove SIGTERM since we handle it on our own
g3::overrideSetupSignals({{SIGABRT, "SIGABRT"}, {SIGFPE, "SIGFPE"},{SIGILL, "SIGILL"}, {SIGSEGV, "SIGSEGV"},});
@ -60,7 +61,7 @@ int init_logger(Config & config, std::unique_ptr<g3::LogWorker> & log_worker) {
log_worker->addDefaultLogger("typesense", log_dir, "");
std::cout << "Starting Typesense " << TYPESENSE_VERSION << ". Log directory is configured as: "
std::cout << "Starting Typesense " << server_version << ". Log directory is configured as: "
<< log_dir << std::endl;
}
@ -69,8 +70,10 @@ int init_logger(Config & config, std::unique_ptr<g3::LogWorker> & log_worker) {
return 0;
}
int run_server(Config & config, void (*master_server_routes)(), void (*replica_server_routes)()) {
LOG(INFO) << "Starting Typesense " << TYPESENSE_VERSION << std::flush;
int run_server(const Config & config, const std::string & version,
void (*master_server_routes)(), void (*replica_server_routes)()) {
LOG(INFO) << "Starting Typesense " << version << std::flush;
if(!directory_exists(config.get_data_dir())) {
LOG(ERR) << "Typesense failed to start. " << "Data directory " << config.get_data_dir()
@ -98,11 +101,12 @@ int run_server(Config & config, void (*master_server_routes)(), void (*replica_s
curl_global_init(CURL_GLOBAL_SSL);
server = new HttpServer(
config.get_listen_address(),
config.get_listen_port(),
config.get_ssl_cert(),
config.get_ssl_cert_key(),
config.get_enable_cors()
version,
config.get_listen_address(),
config.get_listen_port(),
config.get_ssl_cert(),
config.get_ssl_cert_key(),
config.get_enable_cors()
);
server->set_auth_handler(handle_authentication);
@ -123,9 +127,11 @@ int run_server(Config & config, void (*master_server_routes)(), void (*replica_s
return 1;
}
LOG(INFO) << "Typesense is starting as a read-only replica... Spawning replication thread...";
std::thread replication_thread([&master_host_port, &store, &config]() {
Replicator::start(::server, master_host_port, config.get_api_key(), store);
LOG(INFO) << "Typesense is starting as a read-only replica... Master URL is: " << master_host_port;
LOG(INFO) << "Spawning replication thread...";
std::thread replication_thread([&store, &config]() {
Replicator::start(::server, config.get_master(), config.get_api_key(), store);
});
replication_thread.detach();

View File

@ -723,6 +723,33 @@ TEST(ArtTest, test_art_fuzzy_search) {
ASSERT_TRUE(res == 0);
}
TEST(ArtTest, test_art_fuzzy_search_unicode_chars) {
art_tree t;
int res = art_tree_init(&t);
ASSERT_TRUE(res == 0);
std::vector<const char*> keys = {
"роман", "обладать", "роисхождения", "без", "பஞ்சமம்", "சுதந்திரமாகவே", "அல்லது", "அடிப்படையில்"
};
for(const char* key: keys) {
art_document doc = get_document((uint32_t) 1);
ASSERT_TRUE(NULL == art_insert(&t, (unsigned char*)key, strlen(key)+1, &doc, 1));
}
for(const char* key: keys) {
art_leaf* l = (art_leaf *) art_search(&t, (const unsigned char *)key, strlen(key)+1);
EXPECT_EQ(1, l->values->ids.at(0));
std::vector<art_leaf*> leaves;
art_fuzzy_search(&t, (unsigned char *)key, strlen(key), 0, 1, 10, FREQUENCY, true, leaves);
ASSERT_EQ(1, leaves.size());
}
res = art_tree_destroy(&t);
ASSERT_TRUE(res == 0);
}
TEST(ArtTest, test_encode_int32) {
unsigned char chars[8];

View File

@ -18,9 +18,9 @@ TEST(StringUtilsTest, ShouldNormalizeString) {
string_utils.unicode_normalize(alphanum_specialchars);
ASSERT_STREQ("aa12zzwr", alphanum_specialchars.c_str());
std::string alphanum_unicodechars = "abcÅà123ß";
std::string alphanum_unicodechars = "abcÅà123ß12";
string_utils.unicode_normalize(alphanum_unicodechars);
ASSERT_STREQ("abcaa123ss", alphanum_unicodechars.c_str());
ASSERT_STREQ("abcaa123ss12", alphanum_unicodechars.c_str());
std::string tamil_unicodechars = "தமிழ் நாடு";
string_utils.unicode_normalize(tamil_unicodechars);
@ -29,6 +29,12 @@ TEST(StringUtilsTest, ShouldNormalizeString) {
std::string chinese_unicodechars = "你好吗";
string_utils.unicode_normalize(chinese_unicodechars);
ASSERT_STREQ("你好吗", chinese_unicodechars.c_str());
std::string mixed_unicodechars = "çн தமிழ் நாடு so...";
string_utils.unicode_normalize(mixed_unicodechars);
ASSERT_STREQ("cнதமிழ்நாடுso", mixed_unicodechars.c_str());
// Any-Latin; Latin-ASCII; Lower()
}
TEST(StringUtilsTest, ShouldJoinString) {