mirror of
https://github.com/typesense/typesense.git
synced 2025-05-22 06:40:30 +08:00
Merge branch 'typesense_server_api'
This commit is contained in:
commit
12f425c6e0
@ -65,7 +65,7 @@ link_directories(${DEP_ROOT_DIR}/${FOR_NAME})
|
||||
link_directories(${DEP_ROOT_DIR}/${H2O_NAME}/build)
|
||||
link_directories(${DEP_ROOT_DIR}/${ROCKSDB_NAME})
|
||||
|
||||
add_executable(typesense-server ${SRC_FILES} src/main/server.cpp)
|
||||
add_executable(typesense-server ${SRC_FILES} src/main/typesense_server.cpp)
|
||||
add_executable(search ${SRC_FILES} src/main/main.cpp)
|
||||
add_executable(benchmark ${SRC_FILES} src/main/benchmark.cpp)
|
||||
add_executable(typesense_test ${SRC_FILES} test/array_test.cpp test/sorted_array_test.cpp test/art_test.cpp
|
||||
@ -83,7 +83,7 @@ if(NOT APPLE)
|
||||
list(APPEND ROCKSDB_LIBS rt)
|
||||
endif()
|
||||
|
||||
target_link_libraries(typesense-server for pthread h2o-evloop ${ROCKSDB_LIBS} ${OPENSSL_LIBRARIES} dl ${STD_LIB})
|
||||
target_link_libraries(typesense-server h2o-evloop for pthread ${ROCKSDB_LIBS} ${OPENSSL_LIBRARIES} dl ${STD_LIB})
|
||||
target_link_libraries(search for pthread h2o-evloop ${ROCKSDB_LIBS} ${OPENSSL_LIBRARIES} dl ${STD_LIB})
|
||||
target_link_libraries(benchmark for pthread ${ROCKSDB_LIBS} ${STD_LIB})
|
||||
target_link_libraries(typesense_test pthread for ${ROCKSDB_LIBS} gtest gtest_main ${STD_LIB})
|
||||
target_link_libraries(benchmark for pthread h2o-evloop ${ROCKSDB_LIBS} ${OPENSSL_LIBRARIES} dl ${STD_LIB})
|
||||
target_link_libraries(typesense_test h2o-evloop ${OPENSSL_LIBRARIES} pthread for ${ROCKSDB_LIBS} gtest gtest_main dl ${STD_LIB})
|
||||
|
20
README.md
20
README.md
@ -3,28 +3,24 @@
|
||||
Typesense is an open source search engine for building a delightful search experience.
|
||||
|
||||
- **Typo tolerance:** Handles typographical errors out-of-the-box
|
||||
- **Tunable ranking + relevancy:** Tailor your search results to perfection
|
||||
- **Tunable ranking:** Tailor your search results to perfection
|
||||
- **Blazing fast:** Meticulously designed and optimized for speed
|
||||
- **Simple and delightful:** Simple API, delightful out-of-the-box experience
|
||||
|
||||
## Development
|
||||
|
||||
### Build from source
|
||||
### Building from source
|
||||
|
||||
Please ensure that you have docker installed on your system.
|
||||
Building on your machine:
|
||||
|
||||
```
|
||||
$ ./build.sh [--clean]
|
||||
.
|
||||
.
|
||||
.
|
||||
$ ./dockcross build/typesense_test
|
||||
.
|
||||
.
|
||||
.
|
||||
$ ./dockcross build/typesense-server
|
||||
```
|
||||
|
||||
We use [dockcross](https://github.com/dockcross/dockcross) to build our development environment consistently.
|
||||
Building on a Docker container:
|
||||
|
||||
```
|
||||
$ ./docker-build.sh
|
||||
```
|
||||
|
||||
© 2016-2017 Wreally Studios Inc.
|
18
TODO.md
18
TODO.md
@ -30,6 +30,24 @@
|
||||
- ~~Schema validation during insertion (missing fields + type errors)~~
|
||||
- ~~Proper score field for ranking tokens~~
|
||||
- ~~Throw errors when schema is broken~~
|
||||
- ~~Desc/Asc ordering with tests~~
|
||||
- ~~Found count is wrong~~
|
||||
- ~~Filter query in the API~~
|
||||
- ~~Facet limit (hardcode to top 10)~~
|
||||
- ~~Deprecate old split function~~
|
||||
- When prefix=true, use token_ranking_field for token ordering
|
||||
- Search snippet
|
||||
- ID should not have "/"
|
||||
- Group results by field
|
||||
- Use rocksdb batch put for atomic insertion
|
||||
- Test for sorted_array::indexOf when length is 0
|
||||
- Handle store-get() not finding a key
|
||||
- Fix API response codes
|
||||
- Test for search without any sort_by given
|
||||
- Test for asc/desc upper/lower casing
|
||||
- Test for collection creation validation
|
||||
- Test for delete document
|
||||
- Proper pagination
|
||||
- Prevent string copy during indexing
|
||||
- clean special chars before indexing
|
||||
- Minimum results should be a variable instead of blindly going with max_results
|
||||
|
@ -20,6 +20,8 @@ file(COPY ${CMAKE_SOURCE_DIR}/cmake/patches/build_detect_platform DESTINATION
|
||||
|
||||
if(NOT EXISTS ${DEP_ROOT_DIR}/${ROCKSDB_NAME}/librocksdb.a)
|
||||
message("Building ${ROCKSDB_NAME} locally...")
|
||||
set(ENV{PORTABLE} 1)
|
||||
execute_process(COMMAND make "clean" WORKING_DIRECTORY ${DEP_ROOT_DIR}/${ROCKSDB_NAME}/)
|
||||
execute_process(COMMAND make "static_lib" WORKING_DIRECTORY ${DEP_ROOT_DIR}/${ROCKSDB_NAME}/
|
||||
RESULT_VARIABLE ROCKSDB_BUILD)
|
||||
if(NOT ROCKSDB_BUILD EQUAL 0)
|
||||
|
11
include/api.h
Normal file
11
include/api.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include "http_server.h"
|
||||
|
||||
void post_create_collection(http_req & req, http_res & res);
|
||||
|
||||
void get_search(http_req & req, http_res & res);
|
||||
|
||||
void post_add_document(http_req & req, http_res & res);
|
||||
|
||||
void del_remove_document(http_req & req, http_res & res);
|
@ -14,7 +14,7 @@ private:
|
||||
uint32_t m = std::min(min, value);
|
||||
uint32_t M = std::max(max, value);
|
||||
uint32_t bnew = required_bits(M - m);
|
||||
return METADATA_OVERHEAD + for_compressed_size_bits(new_length, bnew);
|
||||
return METADATA_OVERHEAD + 4 + for_compressed_size_bits(new_length, bnew);
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -24,20 +24,6 @@ protected:
|
||||
return (uint32_t) (v == 0 ? 0 : 32 - __builtin_clz(v));
|
||||
}
|
||||
|
||||
uint32_t inline sorted_append_size_required(uint32_t value, uint32_t new_length) {
|
||||
uint32_t m = std::min(min, value);
|
||||
uint32_t M = std::max(max, value);
|
||||
uint32_t bnew = required_bits(M - m);
|
||||
return METADATA_OVERHEAD + for_compressed_size_bits(new_length, bnew);
|
||||
}
|
||||
|
||||
uint32_t inline unsorted_append_size_required(uint32_t value, uint32_t new_length) {
|
||||
uint32_t m = std::min(min, value);
|
||||
uint32_t M = std::max(max, value);
|
||||
uint32_t bnew = required_bits(M - m);
|
||||
return METADATA_OVERHEAD + for_compressed_size_bits(new_length, bnew);
|
||||
}
|
||||
|
||||
public:
|
||||
array_base(const uint32_t n=2) {
|
||||
size_bytes = METADATA_OVERHEAD + (n * FOR_ELE_SIZE);
|
||||
|
@ -2,12 +2,15 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <stdint.h>
|
||||
#include <array>
|
||||
|
||||
/* Different intersection routines adapted from:
|
||||
* https://github.com/lemire/SIMDCompressionAndIntersection/blob/master/src/intersection.cpp
|
||||
*/
|
||||
class Intersection {
|
||||
class ArrayUtils {
|
||||
public:
|
||||
// Fast scalar scheme designed by N. Kurz. Returns the size of out (intersected set)
|
||||
static size_t scalar(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t *out);
|
||||
static size_t and_scalar(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t *out);
|
||||
|
||||
static size_t or_scalar(const uint32_t *A, const size_t lenA, const uint32_t *B, const size_t lenB, uint32_t **out);
|
||||
};
|
@ -110,7 +110,7 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
art_values* values;
|
||||
uint16_t max_score;
|
||||
uint32_t max_score;
|
||||
uint32_t key_len;
|
||||
unsigned char key[];
|
||||
} art_leaf;
|
||||
|
809
include/cmdline.h
Normal file
809
include/cmdline.h
Normal file
@ -0,0 +1,809 @@
|
||||
/*
|
||||
Copyright (c) 2009, Hideyuki Tanaka
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the <organization> nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY <copyright holder> ''AS IS'' AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <typeinfo>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <cxxabi.h>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace cmdline{
|
||||
|
||||
namespace detail{
|
||||
|
||||
template <typename Target, typename Source, bool Same>
|
||||
class lexical_cast_t{
|
||||
public:
|
||||
static Target cast(const Source &arg){
|
||||
Target ret;
|
||||
std::stringstream ss;
|
||||
if (!(ss<<arg && ss>>ret && ss.eof()))
|
||||
throw std::bad_cast();
|
||||
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Target, typename Source>
|
||||
class lexical_cast_t<Target, Source, true>{
|
||||
public:
|
||||
static Target cast(const Source &arg){
|
||||
return arg;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Source>
|
||||
class lexical_cast_t<std::string, Source, false>{
|
||||
public:
|
||||
static std::string cast(const Source &arg){
|
||||
std::ostringstream ss;
|
||||
ss<<arg;
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Target>
|
||||
class lexical_cast_t<Target, std::string, false>{
|
||||
public:
|
||||
static Target cast(const std::string &arg){
|
||||
Target ret;
|
||||
std::istringstream ss(arg);
|
||||
if (!(ss>>ret && ss.eof()))
|
||||
throw std::bad_cast();
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T1, typename T2>
|
||||
struct is_same {
|
||||
static const bool value = false;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct is_same<T, T>{
|
||||
static const bool value = true;
|
||||
};
|
||||
|
||||
template<typename Target, typename Source>
|
||||
Target lexical_cast(const Source &arg)
|
||||
{
|
||||
return lexical_cast_t<Target, Source, detail::is_same<Target, Source>::value>::cast(arg);
|
||||
}
|
||||
|
||||
static inline std::string demangle(const std::string &name)
|
||||
{
|
||||
int status=0;
|
||||
char *p=abi::__cxa_demangle(name.c_str(), 0, 0, &status);
|
||||
std::string ret(p);
|
||||
free(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
std::string readable_typename()
|
||||
{
|
||||
return demangle(typeid(T).name());
|
||||
}
|
||||
|
||||
template <class T>
|
||||
std::string default_value(T def)
|
||||
{
|
||||
return detail::lexical_cast<std::string>(def);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::string readable_typename<std::string>()
|
||||
{
|
||||
return "string";
|
||||
}
|
||||
|
||||
} // detail
|
||||
|
||||
//-----
|
||||
|
||||
class cmdline_error : public std::exception {
|
||||
public:
|
||||
cmdline_error(const std::string &msg): msg(msg){}
|
||||
~cmdline_error() throw() {}
|
||||
const char *what() const throw() { return msg.c_str(); }
|
||||
private:
|
||||
std::string msg;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct default_reader{
|
||||
T operator()(const std::string &str){
|
||||
return detail::lexical_cast<T>(str);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct range_reader{
|
||||
range_reader(const T &low, const T &high): low(low), high(high) {}
|
||||
T operator()(const std::string &s) const {
|
||||
T ret=default_reader<T>()(s);
|
||||
if (!(ret>=low && ret<=high)) throw cmdline::cmdline_error("range_error");
|
||||
return ret;
|
||||
}
|
||||
private:
|
||||
T low, high;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
range_reader<T> range(const T &low, const T &high)
|
||||
{
|
||||
return range_reader<T>(low, high);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
struct oneof_reader{
|
||||
T operator()(const std::string &s){
|
||||
T ret=default_reader<T>()(s);
|
||||
if (std::find(alt.begin(), alt.end(), ret)==alt.end())
|
||||
throw cmdline_error("");
|
||||
return ret;
|
||||
}
|
||||
void add(const T &v){ alt.push_back(v); }
|
||||
private:
|
||||
std::vector<T> alt;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2, T a3)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
ret.add(a3);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2, T a3, T a4)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
ret.add(a3);
|
||||
ret.add(a4);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
ret.add(a3);
|
||||
ret.add(a4);
|
||||
ret.add(a5);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
ret.add(a3);
|
||||
ret.add(a4);
|
||||
ret.add(a5);
|
||||
ret.add(a6);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
ret.add(a3);
|
||||
ret.add(a4);
|
||||
ret.add(a5);
|
||||
ret.add(a6);
|
||||
ret.add(a7);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
ret.add(a3);
|
||||
ret.add(a4);
|
||||
ret.add(a5);
|
||||
ret.add(a6);
|
||||
ret.add(a7);
|
||||
ret.add(a8);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8, T a9)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
ret.add(a3);
|
||||
ret.add(a4);
|
||||
ret.add(a5);
|
||||
ret.add(a6);
|
||||
ret.add(a7);
|
||||
ret.add(a8);
|
||||
ret.add(a9);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
oneof_reader<T> oneof(T a1, T a2, T a3, T a4, T a5, T a6, T a7, T a8, T a9, T a10)
|
||||
{
|
||||
oneof_reader<T> ret;
|
||||
ret.add(a1);
|
||||
ret.add(a2);
|
||||
ret.add(a3);
|
||||
ret.add(a4);
|
||||
ret.add(a5);
|
||||
ret.add(a6);
|
||||
ret.add(a7);
|
||||
ret.add(a8);
|
||||
ret.add(a9);
|
||||
ret.add(a10);
|
||||
return ret;
|
||||
}
|
||||
|
||||
//-----
|
||||
|
||||
class parser{
|
||||
public:
|
||||
parser(){
|
||||
}
|
||||
~parser(){
|
||||
for (std::map<std::string, option_base*>::iterator p=options.begin();
|
||||
p!=options.end(); p++)
|
||||
delete p->second;
|
||||
}
|
||||
|
||||
void add(const std::string &name,
|
||||
char short_name=0,
|
||||
const std::string &desc=""){
|
||||
if (options.count(name)) throw cmdline_error("multiple definition: "+name);
|
||||
options[name]=new option_without_value(name, short_name, desc);
|
||||
ordered.push_back(options[name]);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void add(const std::string &name,
|
||||
char short_name=0,
|
||||
const std::string &desc="",
|
||||
bool need=true,
|
||||
const T def=T()){
|
||||
add(name, short_name, desc, need, def, default_reader<T>());
|
||||
}
|
||||
|
||||
template <class T, class F>
|
||||
void add(const std::string &name,
|
||||
char short_name=0,
|
||||
const std::string &desc="",
|
||||
bool need=true,
|
||||
const T def=T(),
|
||||
F reader=F()){
|
||||
if (options.count(name)) throw cmdline_error("multiple definition: "+name);
|
||||
options[name]=new option_with_value_with_reader<T, F>(name, short_name, need, def, desc, reader);
|
||||
ordered.push_back(options[name]);
|
||||
}
|
||||
|
||||
void footer(const std::string &f){
|
||||
ftr=f;
|
||||
}
|
||||
|
||||
void set_program_name(const std::string &name){
|
||||
prog_name=name;
|
||||
}
|
||||
|
||||
bool exist(const std::string &name) const {
|
||||
if (options.count(name)==0) throw cmdline_error("there is no flag: --"+name);
|
||||
return options.find(name)->second->has_set();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
const T &get(const std::string &name) const {
|
||||
if (options.count(name)==0) throw cmdline_error("there is no flag: --"+name);
|
||||
const option_with_value<T> *p=dynamic_cast<const option_with_value<T>*>(options.find(name)->second);
|
||||
if (p==NULL) throw cmdline_error("type mismatch flag '"+name+"'");
|
||||
return p->get();
|
||||
}
|
||||
|
||||
const std::vector<std::string> &rest() const {
|
||||
return others;
|
||||
}
|
||||
|
||||
bool parse(const std::string &arg){
|
||||
std::vector<std::string> args;
|
||||
|
||||
std::string buf;
|
||||
bool in_quote=false;
|
||||
for (std::string::size_type i=0; i<arg.length(); i++){
|
||||
if (arg[i]=='\"'){
|
||||
in_quote=!in_quote;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg[i]==' ' && !in_quote){
|
||||
args.push_back(buf);
|
||||
buf="";
|
||||
continue;
|
||||
}
|
||||
|
||||
if (arg[i]=='\\'){
|
||||
i++;
|
||||
if (i>=arg.length()){
|
||||
errors.push_back("unexpected occurrence of '\\' at end of string");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
buf+=arg[i];
|
||||
}
|
||||
|
||||
if (in_quote){
|
||||
errors.push_back("quote is not closed");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (buf.length()>0)
|
||||
args.push_back(buf);
|
||||
|
||||
for (size_t i=0; i<args.size(); i++)
|
||||
std::cout<<"\""<<args[i]<<"\""<<std::endl;
|
||||
|
||||
return parse(args);
|
||||
}
|
||||
|
||||
bool parse(const std::vector<std::string> &args){
|
||||
int argc=static_cast<int>(args.size());
|
||||
std::vector<const char*> argv(argc);
|
||||
|
||||
for (int i=0; i<argc; i++)
|
||||
argv[i]=args[i].c_str();
|
||||
|
||||
return parse(argc, &argv[0]);
|
||||
}
|
||||
|
||||
bool parse(int argc, const char * const argv[]){
|
||||
errors.clear();
|
||||
others.clear();
|
||||
|
||||
if (argc<1){
|
||||
errors.push_back("argument number must be longer than 0");
|
||||
return false;
|
||||
}
|
||||
if (prog_name=="")
|
||||
prog_name=argv[0];
|
||||
|
||||
std::map<char, std::string> lookup;
|
||||
for (std::map<std::string, option_base*>::iterator p=options.begin();
|
||||
p!=options.end(); p++){
|
||||
if (p->first.length()==0) continue;
|
||||
char initial=p->second->short_name();
|
||||
if (initial){
|
||||
if (lookup.count(initial)>0){
|
||||
lookup[initial]="";
|
||||
errors.push_back(std::string("short option '")+initial+"' is ambiguous");
|
||||
return false;
|
||||
}
|
||||
else lookup[initial]=p->first;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=1; i<argc; i++){
|
||||
if (strncmp(argv[i], "--", 2)==0){
|
||||
const char *p=strchr(argv[i]+2, '=');
|
||||
if (p){
|
||||
std::string name(argv[i]+2, p);
|
||||
std::string val(p+1);
|
||||
set_option(name, val);
|
||||
}
|
||||
else{
|
||||
std::string name(argv[i]+2);
|
||||
if (options.count(name)==0){
|
||||
errors.push_back("undefined option: --"+name);
|
||||
continue;
|
||||
}
|
||||
if (options[name]->has_value()){
|
||||
if (i+1>=argc){
|
||||
errors.push_back("option needs value: --"+name);
|
||||
continue;
|
||||
}
|
||||
else{
|
||||
i++;
|
||||
set_option(name, argv[i]);
|
||||
}
|
||||
}
|
||||
else{
|
||||
set_option(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (strncmp(argv[i], "-", 1)==0){
|
||||
if (!argv[i][1]) continue;
|
||||
char last=argv[i][1];
|
||||
for (int j=2; argv[i][j]; j++){
|
||||
last=argv[i][j];
|
||||
if (lookup.count(argv[i][j-1])==0){
|
||||
errors.push_back(std::string("undefined short option: -")+argv[i][j-1]);
|
||||
continue;
|
||||
}
|
||||
if (lookup[argv[i][j-1]]==""){
|
||||
errors.push_back(std::string("ambiguous short option: -")+argv[i][j-1]);
|
||||
continue;
|
||||
}
|
||||
set_option(lookup[argv[i][j-1]]);
|
||||
}
|
||||
|
||||
if (lookup.count(last)==0){
|
||||
errors.push_back(std::string("undefined short option: -")+last);
|
||||
continue;
|
||||
}
|
||||
if (lookup[last]==""){
|
||||
errors.push_back(std::string("ambiguous short option: -")+last);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i+1<argc && options[lookup[last]]->has_value()){
|
||||
set_option(lookup[last], argv[i+1]);
|
||||
i++;
|
||||
}
|
||||
else{
|
||||
set_option(lookup[last]);
|
||||
}
|
||||
}
|
||||
else{
|
||||
others.push_back(argv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (std::map<std::string, option_base*>::iterator p=options.begin();
|
||||
p!=options.end(); p++)
|
||||
if (!p->second->valid())
|
||||
errors.push_back("need option: --"+std::string(p->first));
|
||||
|
||||
return errors.size()==0;
|
||||
}
|
||||
|
||||
void parse_check(const std::string &arg){
|
||||
if (!options.count("help"))
|
||||
add("help", '?', "print this message");
|
||||
check(0, parse(arg));
|
||||
}
|
||||
|
||||
void parse_check(const std::vector<std::string> &args){
|
||||
if (!options.count("help"))
|
||||
add("help", '?', "print this message");
|
||||
check(args.size(), parse(args));
|
||||
}
|
||||
|
||||
void parse_check(int argc, char *argv[]){
|
||||
if (!options.count("help"))
|
||||
add("help", '?', "print this message");
|
||||
check(argc, parse(argc, argv));
|
||||
}
|
||||
|
||||
std::string error() const{
|
||||
return errors.size()>0?errors[0]:"";
|
||||
}
|
||||
|
||||
std::string error_full() const{
|
||||
std::ostringstream oss;
|
||||
for (size_t i=0; i<errors.size(); i++)
|
||||
oss<<errors[i]<<std::endl;
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string usage() const {
|
||||
std::ostringstream oss;
|
||||
oss<<"usage: "<<prog_name<<" ";
|
||||
for (size_t i=0; i<ordered.size(); i++){
|
||||
if (ordered[i]->must())
|
||||
oss<<ordered[i]->short_description()<<" ";
|
||||
}
|
||||
|
||||
oss<<"[options] ... "<<ftr<<std::endl;
|
||||
oss<<"options:"<<std::endl;
|
||||
|
||||
size_t max_width=0;
|
||||
for (size_t i=0; i<ordered.size(); i++){
|
||||
max_width=std::max(max_width, ordered[i]->name().length());
|
||||
}
|
||||
for (size_t i=0; i<ordered.size(); i++){
|
||||
if (ordered[i]->short_name()){
|
||||
oss<<" -"<<ordered[i]->short_name()<<", ";
|
||||
}
|
||||
else{
|
||||
oss<<" ";
|
||||
}
|
||||
|
||||
oss<<"--"<<ordered[i]->name();
|
||||
for (size_t j=ordered[i]->name().length(); j<max_width+4; j++)
|
||||
oss<<' ';
|
||||
oss<<ordered[i]->description()<<std::endl;
|
||||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void check(int argc, bool ok){
|
||||
if ((argc==1 && !ok) || exist("help")){
|
||||
std::cerr<<usage();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (!ok){
|
||||
std::cerr<<error()<<std::endl<<usage();
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void set_option(const std::string &name){
|
||||
if (options.count(name)==0){
|
||||
errors.push_back("undefined option: --"+name);
|
||||
return;
|
||||
}
|
||||
if (!options[name]->set()){
|
||||
errors.push_back("option needs value: --"+name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void set_option(const std::string &name, const std::string &value){
|
||||
if (options.count(name)==0){
|
||||
errors.push_back("undefined option: --"+name);
|
||||
return;
|
||||
}
|
||||
if (!options[name]->set(value)){
|
||||
errors.push_back("option value is invalid: --"+name+"="+value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
class option_base{
|
||||
public:
|
||||
virtual ~option_base(){}
|
||||
|
||||
virtual bool has_value() const=0;
|
||||
virtual bool set()=0;
|
||||
virtual bool set(const std::string &value)=0;
|
||||
virtual bool has_set() const=0;
|
||||
virtual bool valid() const=0;
|
||||
virtual bool must() const=0;
|
||||
|
||||
virtual const std::string &name() const=0;
|
||||
virtual char short_name() const=0;
|
||||
virtual const std::string &description() const=0;
|
||||
virtual std::string short_description() const=0;
|
||||
};
|
||||
|
||||
class option_without_value : public option_base {
|
||||
public:
|
||||
option_without_value(const std::string &name,
|
||||
char short_name,
|
||||
const std::string &desc)
|
||||
:nam(name), snam(short_name), desc(desc), has(false){
|
||||
}
|
||||
~option_without_value(){}
|
||||
|
||||
bool has_value() const { return false; }
|
||||
|
||||
bool set(){
|
||||
has=true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool set(const std::string &){
|
||||
return false;
|
||||
}
|
||||
|
||||
bool has_set() const {
|
||||
return has;
|
||||
}
|
||||
|
||||
bool valid() const{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool must() const{
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::string &name() const{
|
||||
return nam;
|
||||
}
|
||||
|
||||
char short_name() const{
|
||||
return snam;
|
||||
}
|
||||
|
||||
const std::string &description() const {
|
||||
return desc;
|
||||
}
|
||||
|
||||
std::string short_description() const{
|
||||
return "--"+nam;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string nam;
|
||||
char snam;
|
||||
std::string desc;
|
||||
bool has;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class option_with_value : public option_base {
|
||||
public:
|
||||
option_with_value(const std::string &name,
|
||||
char short_name,
|
||||
bool need,
|
||||
const T &def,
|
||||
const std::string &desc)
|
||||
: nam(name), snam(short_name), need(need), has(false)
|
||||
, def(def), actual(def) {
|
||||
this->desc=full_description(desc);
|
||||
}
|
||||
~option_with_value(){}
|
||||
|
||||
const T &get() const {
|
||||
return actual;
|
||||
}
|
||||
|
||||
bool has_value() const { return true; }
|
||||
|
||||
bool set(){
|
||||
return false;
|
||||
}
|
||||
|
||||
bool set(const std::string &value){
|
||||
try{
|
||||
actual=read(value);
|
||||
has=true;
|
||||
}
|
||||
catch(const std::exception &e){
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool has_set() const{
|
||||
return has;
|
||||
}
|
||||
|
||||
bool valid() const{
|
||||
if (need && !has) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool must() const{
|
||||
return need;
|
||||
}
|
||||
|
||||
const std::string &name() const{
|
||||
return nam;
|
||||
}
|
||||
|
||||
char short_name() const{
|
||||
return snam;
|
||||
}
|
||||
|
||||
const std::string &description() const {
|
||||
return desc;
|
||||
}
|
||||
|
||||
std::string short_description() const{
|
||||
return "--"+nam+"="+detail::readable_typename<T>();
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string full_description(const std::string &desc){
|
||||
return
|
||||
desc+" ("+detail::readable_typename<T>()+
|
||||
(need?"":" [="+detail::default_value<T>(def)+"]")
|
||||
+")";
|
||||
}
|
||||
|
||||
virtual T read(const std::string &s)=0;
|
||||
|
||||
std::string nam;
|
||||
char snam;
|
||||
bool need;
|
||||
std::string desc;
|
||||
|
||||
bool has;
|
||||
T def;
|
||||
T actual;
|
||||
};
|
||||
|
||||
template <class T, class F>
|
||||
class option_with_value_with_reader : public option_with_value<T> {
|
||||
public:
|
||||
option_with_value_with_reader(const std::string &name,
|
||||
char short_name,
|
||||
bool need,
|
||||
const T def,
|
||||
const std::string &desc,
|
||||
F reader)
|
||||
: option_with_value<T>(name, short_name, need, def, desc), reader(reader){
|
||||
}
|
||||
|
||||
private:
|
||||
T read(const std::string &s){
|
||||
return reader(s);
|
||||
}
|
||||
|
||||
F reader;
|
||||
};
|
||||
|
||||
std::map<std::string, option_base*> options;
|
||||
std::vector<option_base*> ordered;
|
||||
std::string ftr;
|
||||
|
||||
std::string prog_name;
|
||||
std::vector<std::string> others;
|
||||
|
||||
std::vector<std::string> errors;
|
||||
};
|
||||
|
||||
} // cmdline
|
@ -10,6 +10,33 @@
|
||||
#include <field.h>
|
||||
#include <option.h>
|
||||
|
||||
struct facet_value {
|
||||
// use string to int mapping for saving memory
|
||||
spp::sparse_hash_map<std::string, uint32_t> value_index;
|
||||
spp::sparse_hash_map<uint32_t, std::string> index_value;
|
||||
|
||||
spp::sparse_hash_map<uint32_t, std::vector<uint32_t>> doc_values;
|
||||
|
||||
uint32_t get_value_index(const std::string & value) {
|
||||
if(value_index.count(value) != 0) {
|
||||
return value_index[value];
|
||||
}
|
||||
|
||||
uint32_t new_index = value_index.size();
|
||||
value_index.emplace(value, new_index);
|
||||
index_value.emplace(new_index, value);
|
||||
return new_index;
|
||||
}
|
||||
|
||||
void index_values(uint32_t doc_seq_id, const std::vector<std::string> & values) {
|
||||
std::vector<uint32_t> value_vec(values.size());
|
||||
for(auto i = 0; i < values.size(); i++) {
|
||||
value_vec[i] = get_value_index(values[i]);
|
||||
}
|
||||
doc_values.emplace(doc_seq_id, value_vec);
|
||||
}
|
||||
};
|
||||
|
||||
class Collection {
|
||||
private:
|
||||
std::string name;
|
||||
@ -23,19 +50,19 @@ private:
|
||||
|
||||
spp::sparse_hash_map<std::string, field> facet_schema;
|
||||
|
||||
std::vector<std::string> rank_fields;
|
||||
std::vector<field> sort_fields;
|
||||
|
||||
Store* store;
|
||||
|
||||
spp::sparse_hash_map<std::string, art_tree*> search_index;
|
||||
|
||||
spp::sparse_hash_map<std::string, art_tree*> facet_index;
|
||||
spp::sparse_hash_map<std::string, facet_value> facet_index;
|
||||
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_map<uint32_t, int64_t>*> rank_index;
|
||||
spp::sparse_hash_map<std::string, spp::sparse_hash_map<uint32_t, int64_t>*> sort_index;
|
||||
|
||||
std::string token_ordering_field;
|
||||
std::string token_ranking_field;
|
||||
|
||||
std::string get_doc_id_key(std::string doc_id);
|
||||
std::string get_doc_id_key(const std::string & doc_id);
|
||||
|
||||
std::string get_seq_id_key(uint32_t seq_id);
|
||||
|
||||
@ -51,14 +78,15 @@ private:
|
||||
void do_facets(std::vector<facet> & facets, uint32_t* result_ids, size_t results_size);
|
||||
|
||||
void search_field(std::string & query, const std::string & field, uint32_t *filter_ids, size_t filter_ids_length,
|
||||
std::vector<facet> & facets, const std::vector<std::string> & rank_fields, const int num_typos,
|
||||
const size_t num_results, Topster<100> &topster, size_t & num_found,
|
||||
const token_ordering token_order = FREQUENCY, const bool prefix = false);
|
||||
std::vector<facet> & facets, const std::vector<sort_field> & sort_fields, const int num_typos,
|
||||
const size_t num_results, Topster<100> &topster, uint32_t** all_result_ids,
|
||||
size_t & all_result_ids_len, const token_ordering token_order = FREQUENCY, const bool prefix = false);
|
||||
|
||||
void search_candidates(uint32_t* filter_ids, size_t filter_ids_length, std::vector<facet> & facets,
|
||||
const std::vector<std::string> & rank_fields, int & token_rank,
|
||||
const std::vector<sort_field> & sort_fields, int & token_rank,
|
||||
std::vector<std::vector<art_leaf*>> & token_leaves, Topster<100> & topster,
|
||||
size_t & total_results, size_t & num_found, const size_t & max_results);
|
||||
size_t & total_results, uint32_t** all_result_ids, size_t & all_result_ids_len,
|
||||
const size_t & max_results);
|
||||
|
||||
void index_string_field(const std::string & text, const uint32_t score, art_tree *t, uint32_t seq_id,
|
||||
const bool verbatim) const;
|
||||
@ -82,13 +110,13 @@ public:
|
||||
|
||||
Collection(const std::string name, const uint32_t collection_id, const uint32_t next_seq_id, Store *store,
|
||||
const std::vector<field> & search_fields, const std::vector<field> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields, const std::string token_ordering_field);
|
||||
const std::vector<field> & sort_fields, const std::string token_ranking_field);
|
||||
|
||||
~Collection();
|
||||
|
||||
static std::string get_next_seq_id_key(std::string collection_name);
|
||||
static std::string get_next_seq_id_key(const std::string & collection_name);
|
||||
|
||||
static std::string get_meta_key(std::string collection_name);
|
||||
static std::string get_meta_key(const std::string & collection_name);
|
||||
|
||||
std::string get_seq_id_collection_prefix();
|
||||
|
||||
@ -100,26 +128,26 @@ public:
|
||||
|
||||
std::vector<std::string> get_facet_fields();
|
||||
|
||||
std::vector<std::string> get_rank_fields();
|
||||
std::vector<field> get_sort_fields();
|
||||
|
||||
spp::sparse_hash_map<std::string, field> get_schema();
|
||||
|
||||
std::string get_token_ordering_field();
|
||||
std::string get_token_ranking_field();
|
||||
|
||||
Option<std::string> add(std::string json_str);
|
||||
Option<std::string> add(const std::string & json_str);
|
||||
|
||||
nlohmann::json search(std::string query, const std::vector<std::string> search_fields,
|
||||
const std::string & simple_filter_query, const std::vector<std::string> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields, const int num_typos,
|
||||
const std::vector<sort_field> & sort_fields, const int num_typos,
|
||||
const size_t num_results, const token_ordering token_order = FREQUENCY, const bool prefix = false);
|
||||
|
||||
void remove(std::string id);
|
||||
Option<std::string> remove(const std::string & id);
|
||||
|
||||
void score_results(const std::vector<std::string> & rank_fields, const int & token_rank, Topster<100> &topster,
|
||||
void score_results(const std::vector<sort_field> & sort_fields, const int & token_rank, Topster<100> &topster,
|
||||
const std::vector<art_leaf *> & query_suggestion, const uint32_t *result_ids,
|
||||
const size_t result_size) const;
|
||||
|
||||
Option<uint32_t> index_in_memory(const nlohmann::json &document, uint32_t seq_id);
|
||||
Option<uint32_t> index_in_memory(const nlohmann::json & document, uint32_t seq_id);
|
||||
|
||||
enum {MAX_SEARCH_TOKENS = 20};
|
||||
enum {MAX_RESULTS = 100};
|
||||
|
@ -22,19 +22,19 @@ private:
|
||||
static constexpr const char* COLLECTION_ID_KEY = "id";
|
||||
static constexpr const char* COLLECTION_SEARCH_FIELDS_KEY = "search_fields";
|
||||
static constexpr const char* COLLECTION_FACET_FIELDS_KEY = "facet_fields";
|
||||
static constexpr const char* COLLECTION_RANK_FIELDS_KEY = "rank_fields";
|
||||
static constexpr const char* COLLECTION_TOKEN_ORDERING_FIELD_KEY = "token_ordering_field";
|
||||
static constexpr const char* COLLECTION_SORT_FIELDS_KEY = "sort_fields";
|
||||
static constexpr const char* COLLECTION_TOKEN_ORDERING_FIELD_KEY = "token_ranking_field";
|
||||
|
||||
CollectionManager();
|
||||
|
||||
~CollectionManager() = default;
|
||||
|
||||
public:
|
||||
static CollectionManager& get_instance() {
|
||||
static CollectionManager & get_instance() {
|
||||
static CollectionManager instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
~CollectionManager();
|
||||
|
||||
CollectionManager(CollectionManager const&) = delete;
|
||||
void operator=(CollectionManager const&) = delete;
|
||||
|
||||
@ -42,8 +42,8 @@ public:
|
||||
|
||||
Collection* create_collection(std::string name, const std::vector<field> & search_fields,
|
||||
const std::vector<field> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields,
|
||||
const std::string & token_ordering_field = "");
|
||||
const std::vector<field> & sort_fields,
|
||||
const std::string & token_ranking_field = "");
|
||||
|
||||
Collection* get_collection(std::string collection_name);
|
||||
|
||||
|
@ -23,7 +23,7 @@ struct field {
|
||||
std::string name;
|
||||
std::string type;
|
||||
|
||||
field(std::string name, std::string type): name(name), type(type) {
|
||||
field(const std::string & name, const std::string & type): name(name), type(type) {
|
||||
|
||||
}
|
||||
|
||||
@ -64,6 +64,28 @@ struct filter {
|
||||
}
|
||||
};
|
||||
|
||||
namespace sort_field_const {
|
||||
static const std::string name = "name";
|
||||
static const std::string order = "order";
|
||||
static const std::string asc = "ASC";
|
||||
static const std::string desc = "DESC";
|
||||
}
|
||||
|
||||
struct sort_field {
|
||||
std::string name;
|
||||
std::string order;
|
||||
|
||||
sort_field(const std::string & name, const std::string & order): name(name), order(order) {
|
||||
|
||||
}
|
||||
|
||||
sort_field& operator=(sort_field other) {
|
||||
name = other.name;
|
||||
order = other.order;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
struct facet {
|
||||
const std::string field_name;
|
||||
std::map<std::string, size_t> result_map;
|
||||
|
112
include/http_server.h
Normal file
112
include/http_server.h
Normal file
@ -0,0 +1,112 @@
|
||||
#pragma once
|
||||
|
||||
#define H2O_USE_LIBUV 0
|
||||
|
||||
extern "C" {
|
||||
#include "h2o.h"
|
||||
#include "h2o/http1.h"
|
||||
#include "h2o/http2.h"
|
||||
}
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <stdio.h>
|
||||
#include "collection.h"
|
||||
#include "collection_manager.h"
|
||||
|
||||
struct http_res {
|
||||
uint32_t status_code;
|
||||
std::string body;
|
||||
|
||||
void send_200(const std::string & res_body) {
|
||||
status_code = 200;
|
||||
body = res_body;
|
||||
}
|
||||
|
||||
void send_201(const std::string & res_body) {
|
||||
status_code = 201;
|
||||
body = res_body;
|
||||
}
|
||||
|
||||
void send_400(const std::string & message) {
|
||||
status_code = 400;
|
||||
body = "{\"message\": \"" + message + "\"}";
|
||||
}
|
||||
|
||||
void send_404() {
|
||||
status_code = 404;
|
||||
body = "{\"message\": \"Not Found\"}";
|
||||
}
|
||||
|
||||
void send_409(const std::string & message) {
|
||||
status_code = 400;
|
||||
body = "{\"message\": \"" + message + "\"}";
|
||||
}
|
||||
|
||||
void send_500(const std::string & res_body) {
|
||||
status_code = 500;
|
||||
body = res_body;
|
||||
}
|
||||
|
||||
void send(uint32_t code, const std::string & message) {
|
||||
status_code = code;
|
||||
body = "{\"message\": \"" + message + "\"}";
|
||||
}
|
||||
};
|
||||
|
||||
struct http_req {
|
||||
std::map<std::string, std::string> params;
|
||||
std::string body;
|
||||
};
|
||||
|
||||
struct route_path {
|
||||
std::string http_method;
|
||||
std::vector<std::string> path_parts;
|
||||
void (*handler)(http_req & req, http_res &);
|
||||
|
||||
inline bool operator< (const route_path& rhs) const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class HttpServer {
|
||||
private:
|
||||
static h2o_globalconf_t config;
|
||||
static h2o_context_t ctx;
|
||||
static h2o_accept_ctx_t accept_ctx;
|
||||
static std::vector<route_path> routes;
|
||||
|
||||
std::string listen_address;
|
||||
|
||||
uint32_t listen_port;
|
||||
|
||||
h2o_hostconf_t *hostconf;
|
||||
|
||||
static void on_accept(h2o_socket_t *listener, const char *err);
|
||||
|
||||
int create_listener();
|
||||
|
||||
h2o_pathconf_t *register_handler(h2o_hostconf_t *hostconf, const char *path,
|
||||
int (*on_req)(h2o_handler_t *, h2o_req_t *));
|
||||
|
||||
static const char* get_status_reason(uint32_t status_code);
|
||||
|
||||
static std::map<std::string, std::string> parse_query(const std::string& query);
|
||||
|
||||
static int catch_all_handler(h2o_handler_t *self, h2o_req_t *req);
|
||||
|
||||
public:
|
||||
HttpServer(std::string listen_address, uint32_t listen_port);
|
||||
|
||||
~HttpServer();
|
||||
|
||||
void get(const std::string & path, void (*handler)(http_req & req, http_res &));
|
||||
|
||||
void post(const std::string & path, void (*handler)(http_req &, http_res &));
|
||||
|
||||
void put(const std::string & path, void (*handler)(http_req &, http_res &));
|
||||
|
||||
void del(const std::string & path, void (*handler)(http_req &, http_res &));
|
||||
|
||||
int run();
|
||||
};
|
@ -16,7 +16,7 @@ private:
|
||||
uint32_t m = std::min(min, value);
|
||||
uint32_t M = std::max(max, value);
|
||||
uint32_t bnew = required_bits(M - m);
|
||||
return METADATA_OVERHEAD + for_compressed_size_bits(new_length, bnew);
|
||||
return METADATA_OVERHEAD + 4 + for_compressed_size_bits(new_length, bnew);
|
||||
}
|
||||
|
||||
uint32_t lower_bound_search_bits(const uint8_t *in, uint32_t imin, uint32_t imax, uint32_t base,
|
||||
|
@ -24,6 +24,13 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
enum StoreStatus {
|
||||
FOUND,
|
||||
OK,
|
||||
NOT_FOUND,
|
||||
ERROR
|
||||
};
|
||||
|
||||
/*
|
||||
* Abstraction for underlying KV store (RocksDB)
|
||||
*/
|
||||
@ -72,9 +79,18 @@ public:
|
||||
return status.ok() && !status.IsNotFound();
|
||||
}
|
||||
|
||||
bool get(const std::string& key, std::string& value) {
|
||||
StoreStatus get(const std::string& key, std::string& value) {
|
||||
rocksdb::Status status = db->Get(rocksdb::ReadOptions(), key, &value);
|
||||
return status.ok();
|
||||
|
||||
if(status.IsNotFound()) {
|
||||
return StoreStatus::NOT_FOUND;
|
||||
}
|
||||
|
||||
if(!status.ok()) {
|
||||
return StoreStatus::ERROR;
|
||||
}
|
||||
|
||||
return StoreStatus::FOUND;
|
||||
}
|
||||
|
||||
bool remove(const std::string& key) {
|
||||
|
@ -5,46 +5,6 @@
|
||||
#include <sstream>
|
||||
|
||||
struct StringUtils {
|
||||
|
||||
template<class ContainerT>
|
||||
static void tokenize(const std::string &str, ContainerT &tokens,
|
||||
const std::string &delimiters = " ", bool trimEmpty = true, unsigned long maxTokenLength = 100) {
|
||||
const std::string truncated_str = str.substr(0, maxTokenLength);
|
||||
std::string::size_type pos, lastPos = 0;
|
||||
|
||||
using value_type = typename ContainerT::value_type;
|
||||
using size_type = typename ContainerT::size_type;
|
||||
|
||||
while (true) {
|
||||
pos = truncated_str.find_first_of(delimiters, lastPos);
|
||||
if (pos == std::string::npos) {
|
||||
pos = truncated_str.length();
|
||||
|
||||
if (pos != lastPos || !trimEmpty)
|
||||
tokens.push_back(value_type(truncated_str.data() + lastPos,
|
||||
(size_type) pos - lastPos));
|
||||
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (pos != lastPos || !trimEmpty)
|
||||
tokens.push_back(value_type(truncated_str.data() + lastPos,
|
||||
(size_type) pos - lastPos));
|
||||
}
|
||||
|
||||
lastPos = pos + 1;
|
||||
}
|
||||
}
|
||||
|
||||
static std::string replace_all(std::string str, const std::string &from, const std::string &to) {
|
||||
size_t start_pos = 0;
|
||||
while ((start_pos = str.find(from, start_pos)) != std::string::npos) {
|
||||
str.replace(start_pos, from.length(), to);
|
||||
start_pos += to.length(); // Handles case where 'to' is a substring of 'from'
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
// Adapted from: http://stackoverflow.com/a/236180/131050
|
||||
static void split(const std::string& s, std::vector<std::string> & result, const std::string& delim, const bool keep_empty = false) {
|
||||
if (delim.empty()) {
|
||||
@ -122,4 +82,8 @@ struct StringUtils {
|
||||
strtol(s.c_str(), &p, 10);
|
||||
return (*p == 0);
|
||||
}
|
||||
|
||||
static void toupper(std::string& str) {
|
||||
std::transform(str.begin(), str.end(), str.begin(), ::toupper);
|
||||
}
|
||||
};
|
236
src/api.cpp
Normal file
236
src/api.cpp
Normal file
@ -0,0 +1,236 @@
|
||||
#include <regex>
|
||||
#include <chrono>
|
||||
#include <sys/resource.h>
|
||||
#include "api.h"
|
||||
#include "string_utils.h"
|
||||
#include "collection.h"
|
||||
#include "collection_manager.h"
|
||||
|
||||
void post_create_collection(http_req & req, http_res & res) {
|
||||
nlohmann::json req_json;
|
||||
|
||||
try {
|
||||
req_json = nlohmann::json::parse(req.body);
|
||||
} catch(...) {
|
||||
return res.send_400("Bad JSON.");
|
||||
}
|
||||
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
|
||||
// validate presence of mandatory fields
|
||||
|
||||
if(req_json.count("name") == 0) {
|
||||
return res.send_400("Parameter `name` is required.");
|
||||
}
|
||||
|
||||
if(req_json.count("search_fields") == 0) {
|
||||
return res.send_400("Parameter `search_fields` is required.");
|
||||
}
|
||||
|
||||
if(req_json.count("sort_fields") == 0) {
|
||||
return res.send_400("Parameter `sort_fields` is required.");
|
||||
}
|
||||
|
||||
if(collectionManager.get_collection(req_json["name"]) != nullptr) {
|
||||
return res.send_409("Collection with name `" + req_json["name"].get<std::string>() + "` already exists.");
|
||||
}
|
||||
|
||||
// field specific validation
|
||||
|
||||
std::vector<field> search_fields;
|
||||
|
||||
if(!req_json["search_fields"].is_array() || req_json["search_fields"].size() == 0) {
|
||||
return res.send_400("Wrong format for `search_fields`. It should be an array like: "
|
||||
"[{\"name\": \"<field_name>\", \"type\": \"<field_type>\"}]");
|
||||
}
|
||||
|
||||
for(const nlohmann::json & search_field_json: req_json["search_fields"]) {
|
||||
if(!search_field_json.is_object() ||
|
||||
search_field_json.count(fields::name) == 0 || search_field_json.count(fields::type) == 0 ||
|
||||
!search_field_json.at(fields::name).is_string() || !search_field_json.at(fields::type).is_string()) {
|
||||
|
||||
return res.send_400("Wrong format for `search_fields`. It should be an array like: "
|
||||
"[{\"name\": \"<field_name>\", \"type\": \"<field_type>\"}]");
|
||||
}
|
||||
|
||||
search_fields.push_back(field(search_field_json["name"], search_field_json["type"]));
|
||||
}
|
||||
|
||||
std::vector<field> facet_fields;
|
||||
|
||||
if(req_json.count("facet_fields") != 0) {
|
||||
if(!req_json["facet_fields"].is_array()) {
|
||||
return res.send_400("Wrong format for `facet_fields`. It should be an array like: "
|
||||
"[{\"name\": \"<field_name>\", \"type\": \"<field_type>\"}]");
|
||||
}
|
||||
|
||||
for(const nlohmann::json & facet_field_json: req_json["facet_fields"]) {
|
||||
if(!facet_field_json.is_object() ||
|
||||
facet_field_json.count(fields::name) == 0 || facet_field_json.count(fields::type) == 0 ||
|
||||
!facet_field_json.at(fields::name).is_string() || !facet_field_json.at(fields::type).is_string()) {
|
||||
|
||||
return res.send_400("Wrong format for `facet_fields`. It should be an array like: "
|
||||
"[{\"name\": \"<field_name>\", \"type\": \"<field_type>\"}]");
|
||||
}
|
||||
|
||||
facet_fields.push_back(field(facet_field_json["name"], facet_field_json["type"]));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<field> sort_fields;
|
||||
|
||||
if(!req_json["sort_fields"].is_array() || req_json["sort_fields"].size() == 0) {
|
||||
return res.send_400("Wrong format for `sort_fields`. It should be an array like: "
|
||||
"[{\"name\": \"<field_name>\", \"type\": \"<field_type>\"}]");
|
||||
}
|
||||
|
||||
for(const nlohmann::json & sort_field_json: req_json["sort_fields"]) {
|
||||
if(!sort_field_json.is_object() ||
|
||||
sort_field_json.count(fields::name) == 0 || sort_field_json.count(fields::type) == 0 ||
|
||||
!sort_field_json.at(fields::name).is_string() ||
|
||||
!sort_field_json.at(fields::type).is_string()) {
|
||||
|
||||
return res.send_400("Wrong format for `sort_fields`. It should be an array like: "
|
||||
"[{\"name\": \"<field_name>\", \"type\": \"<field_type>\"}]");
|
||||
}
|
||||
|
||||
if(sort_field_json["type"] != "INT32" && sort_field_json["type"] != "INT64") {
|
||||
return res.send_400("Sort field `" + sort_field_json["name"].get<std::string>() + "` must be a number.");
|
||||
}
|
||||
|
||||
sort_fields.push_back(field(sort_field_json["name"], sort_field_json["type"]));
|
||||
}
|
||||
|
||||
std::string token_ranking_field = "";
|
||||
|
||||
if(req_json.count("token_ranking_field") != 0) {
|
||||
if(!req_json["token_ranking_field"].is_string()) {
|
||||
return res.send_400("Wrong format for `token_ranking_field`. It should be a string (name of a field).");
|
||||
}
|
||||
|
||||
token_ranking_field = req_json["token_ranking_field"].get<std::string>();
|
||||
}
|
||||
|
||||
collectionManager.create_collection(req_json["name"], search_fields, facet_fields, sort_fields, token_ranking_field);
|
||||
res.send_201(req.body);
|
||||
}
|
||||
|
||||
void get_search(http_req & req, http_res & res) {
|
||||
const char *NUM_TYPOS = "num_typos";
|
||||
const char *PREFIX = "prefix";
|
||||
const char *FILTER = "filter_by";
|
||||
const char *SEARCH_BY = "search_by";
|
||||
const char *SORT_BY = "sort_by";
|
||||
const char *FACET_BY = "facet_by";
|
||||
|
||||
if(req.params.count(NUM_TYPOS) == 0) {
|
||||
req.params[NUM_TYPOS] = "2";
|
||||
}
|
||||
|
||||
if(req.params.count(PREFIX) == 0) {
|
||||
req.params[PREFIX] = "false";
|
||||
}
|
||||
|
||||
if(req.params.count(SEARCH_BY) == 0) {
|
||||
return res.send_400(std::string("Parameter `") + SEARCH_BY + "` is required.");
|
||||
}
|
||||
|
||||
std::string filter_str = req.params.count(FILTER) != 0 ? req.params[FILTER] : "";
|
||||
|
||||
std::vector<std::string> search_fields;
|
||||
StringUtils::split(req.params[SEARCH_BY], search_fields, ",");
|
||||
|
||||
std::vector<std::string> facet_fields;
|
||||
StringUtils::split(req.params[FACET_BY], facet_fields, "&&");
|
||||
|
||||
std::vector<sort_field> sort_fields;
|
||||
if(req.params.count(SORT_BY) != 0) {
|
||||
std::vector<std::string> sort_field_strs;
|
||||
StringUtils::split(req.params[SORT_BY], sort_field_strs, ",");
|
||||
|
||||
if(sort_field_strs.size() > 2) {
|
||||
return res.send_400("Only upto 2 sort fields are allowed.");
|
||||
}
|
||||
|
||||
for(const std::string & sort_field_str: sort_field_strs) {
|
||||
std::vector<std::string> expression_parts;
|
||||
StringUtils::split(sort_field_str, expression_parts, ":");
|
||||
|
||||
if(expression_parts.size() != 2) {
|
||||
return res.send_400(std::string("Parameter `") + SORT_BY + "` is malformed.");
|
||||
}
|
||||
|
||||
StringUtils::toupper(expression_parts[1]);
|
||||
sort_fields.push_back(sort_field(expression_parts[0], expression_parts[1]));
|
||||
}
|
||||
}
|
||||
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
Collection* collection = collectionManager.get_collection(req.params["collection"]);
|
||||
|
||||
if(collection == nullptr) {
|
||||
return res.send_404();
|
||||
}
|
||||
|
||||
bool prefix = (req.params[PREFIX] == "true");
|
||||
|
||||
token_ordering token_order = FREQUENCY;
|
||||
if(prefix && !collection->get_token_ranking_field().empty()) {
|
||||
token_order = MAX_SCORE;
|
||||
}
|
||||
|
||||
nlohmann::json result = collection->search(req.params["q"], search_fields, filter_str, facet_fields,
|
||||
sort_fields, std::stoi(req.params[NUM_TYPOS]), 100,
|
||||
token_order, prefix);
|
||||
const std::string & json_str = result.dump();
|
||||
//std::cout << "JSON:" << json_str << std::endl;
|
||||
struct rusage r_usage;
|
||||
getrusage(RUSAGE_SELF,&r_usage);
|
||||
|
||||
//std::cout << "Memory usage: " << r_usage.ru_maxrss << std::endl;
|
||||
res.send_200(json_str);
|
||||
|
||||
long long int timeMicros = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
std::cout << "Time taken: " << timeMicros << "us" << std::endl;
|
||||
}
|
||||
|
||||
void post_add_document(http_req & req, http_res & res) {
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
Collection* collection = collectionManager.get_collection(req.params["collection"]);
|
||||
|
||||
if(collection == nullptr) {
|
||||
return res.send_404();
|
||||
}
|
||||
|
||||
Option<std::string> inserted_id_op = collection->add(req.body);
|
||||
|
||||
if(!inserted_id_op.ok()) {
|
||||
res.send(inserted_id_op.code(), inserted_id_op.error());
|
||||
} else {
|
||||
nlohmann::json json_response;
|
||||
json_response["id"] = inserted_id_op.get();
|
||||
res.send_201(json_response.dump());
|
||||
}
|
||||
}
|
||||
|
||||
void del_remove_document(http_req & req, http_res & res) {
|
||||
std::string doc_id = req.params["id"];
|
||||
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
Collection* collection = collectionManager.get_collection(req.params["collection"]);
|
||||
if(collection == nullptr) {
|
||||
return res.send_404();
|
||||
}
|
||||
|
||||
Option<std::string> deleted_id_op = collection->remove(doc_id);
|
||||
|
||||
if(!deleted_id_op.ok()) {
|
||||
res.send(deleted_id_op.code(), deleted_id_op.error());
|
||||
} else {
|
||||
nlohmann::json json_response;
|
||||
json_response["id"] = deleted_id_op.get();
|
||||
res.send_200(json_response.dump());
|
||||
}
|
||||
}
|
87
src/array_utils.cpp
Normal file
87
src/array_utils.cpp
Normal file
@ -0,0 +1,87 @@
|
||||
#include "array_utils.h"
|
||||
#include <memory.h>
|
||||
|
||||
size_t ArrayUtils::and_scalar(const uint32_t *A, const size_t lenA,
|
||||
const uint32_t *B, const size_t lenB, uint32_t *out) {
|
||||
const uint32_t *const initout(out);
|
||||
if (lenA == 0 || lenB == 0)
|
||||
return 0;
|
||||
|
||||
const uint32_t *endA = A + lenA;
|
||||
const uint32_t *endB = B + lenB;
|
||||
|
||||
while (1) {
|
||||
while (*A < *B) {
|
||||
SKIP_FIRST_COMPARE:
|
||||
if (++A == endA)
|
||||
return (out - initout);
|
||||
}
|
||||
while (*A > *B) {
|
||||
if (++B == endB)
|
||||
return (out - initout);
|
||||
}
|
||||
if (*A == *B) {
|
||||
*out++ = *A;
|
||||
if (++A == endA || ++B == endB)
|
||||
return (out - initout);
|
||||
} else {
|
||||
goto SKIP_FIRST_COMPARE;
|
||||
}
|
||||
}
|
||||
|
||||
return (out - initout); // NOTREACHED
|
||||
}
|
||||
|
||||
size_t ArrayUtils::or_scalar(const uint32_t *A, const size_t lenA,
|
||||
const uint32_t *B, const size_t lenB, uint32_t **out) {
|
||||
size_t indexA = 0, indexB = 0, res_index = 0;
|
||||
|
||||
if(A == nullptr) {
|
||||
*out = new uint32_t[lenB];
|
||||
memcpy(*out, B, lenB * sizeof(uint32_t));
|
||||
return lenB;
|
||||
}
|
||||
|
||||
uint32_t* results = new uint32_t[lenA+lenB];
|
||||
|
||||
while (indexA < lenA && indexB < lenB) {
|
||||
if (A[indexA] < B[indexB]) {
|
||||
if(res_index == 0 || results[res_index-1] != A[indexA]) {
|
||||
results[res_index] = A[indexA];
|
||||
res_index++;
|
||||
}
|
||||
indexA++;
|
||||
} else {
|
||||
if(res_index == 0 || results[res_index-1] != B[indexB]) {
|
||||
results[res_index] = B[indexB];
|
||||
res_index++;
|
||||
}
|
||||
indexB++;
|
||||
}
|
||||
}
|
||||
|
||||
while (indexA < lenA) {
|
||||
if(results[res_index-1] != A[indexA]) {
|
||||
results[res_index] = A[indexA];
|
||||
res_index++;
|
||||
}
|
||||
|
||||
indexA++;
|
||||
}
|
||||
|
||||
while (indexB < lenB) {
|
||||
if(results[res_index-1] != B[indexB]) {
|
||||
results[res_index] = B[indexB];
|
||||
res_index++;
|
||||
}
|
||||
|
||||
indexB++;
|
||||
}
|
||||
|
||||
// shrink fit
|
||||
*out = new uint32_t[res_index];
|
||||
memcpy(*out, results, res_index * sizeof(uint32_t));
|
||||
delete[] results;
|
||||
|
||||
return res_index;
|
||||
}
|
@ -2,16 +2,16 @@
|
||||
|
||||
#include <numeric>
|
||||
#include <chrono>
|
||||
#include <intersection.h>
|
||||
#include <array_utils.h>
|
||||
#include <match_score.h>
|
||||
#include <string_utils.h>
|
||||
#include <art.h>
|
||||
|
||||
Collection::Collection(const std::string name, const uint32_t collection_id, const uint32_t next_seq_id, Store *store,
|
||||
const std::vector<field> &search_fields, const std::vector<field> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields, const std::string token_ordering_field):
|
||||
const std::vector<field> & sort_fields, const std::string token_ranking_field):
|
||||
name(name), collection_id(collection_id), next_seq_id(next_seq_id), store(store),
|
||||
rank_fields(rank_fields), token_ordering_field(token_ordering_field) {
|
||||
sort_fields(sort_fields), token_ranking_field(token_ranking_field) {
|
||||
|
||||
for(const field& field: search_fields) {
|
||||
art_tree *t = new art_tree;
|
||||
@ -21,33 +21,27 @@ Collection::Collection(const std::string name, const uint32_t collection_id, con
|
||||
}
|
||||
|
||||
for(const field& field: facet_fields) {
|
||||
art_tree *t = new art_tree;
|
||||
art_tree_init(t);
|
||||
facet_index.emplace(field.name, t);
|
||||
facet_value fvalue;
|
||||
facet_index.emplace(field.name, fvalue);
|
||||
facet_schema.emplace(field.name, field);
|
||||
}
|
||||
|
||||
for(const std::string & rank_field: rank_fields) {
|
||||
for(const field & sort_field: sort_fields) {
|
||||
spp::sparse_hash_map<uint32_t, int64_t> * doc_to_score = new spp::sparse_hash_map<uint32_t, int64_t>();
|
||||
rank_index.emplace(rank_field, doc_to_score);
|
||||
sort_index.emplace(sort_field.name, doc_to_score);
|
||||
}
|
||||
}
|
||||
|
||||
Collection::~Collection() {
|
||||
for(std::pair<std::string, field> name_field: search_schema) {
|
||||
for(auto & name_field: search_schema) {
|
||||
art_tree *t = search_index.at(name_field.first);
|
||||
art_tree_destroy(t);
|
||||
t = nullptr;
|
||||
}
|
||||
|
||||
for(std::pair<std::string, field> name_field: facet_schema) {
|
||||
art_tree *t = facet_index.at(name_field.first);
|
||||
art_tree_destroy(t);
|
||||
t = nullptr;
|
||||
}
|
||||
|
||||
for(std::pair<std::string, spp::sparse_hash_map<uint32_t, int64_t>*> name_map: rank_index) {
|
||||
for(auto & name_map: sort_index) {
|
||||
delete name_map.second;
|
||||
name_map.second = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -56,7 +50,7 @@ uint32_t Collection::get_next_seq_id() {
|
||||
return next_seq_id++;
|
||||
}
|
||||
|
||||
Option<std::string> Collection::add(std::string json_str) {
|
||||
Option<std::string> Collection::add(const std::string & json_str) {
|
||||
nlohmann::json document = nlohmann::json::parse(json_str);
|
||||
|
||||
uint32_t seq_id = get_next_seq_id();
|
||||
@ -80,22 +74,22 @@ Option<std::string> Collection::add(std::string json_str) {
|
||||
}
|
||||
|
||||
Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uint32_t seq_id) {
|
||||
if(!token_ordering_field.empty() && document.count(token_ordering_field) == 0) {
|
||||
return Option<>(400, "Field `" + token_ordering_field + "` has been declared as a token ordering field, "
|
||||
if(!token_ranking_field.empty() && document.count(token_ranking_field) == 0) {
|
||||
return Option<>(400, "Field `" + token_ranking_field + "` has been declared as a token ranking field, "
|
||||
"but is not found in the document.");
|
||||
}
|
||||
|
||||
if(!token_ordering_field.empty() && !document[token_ordering_field].is_number()) {
|
||||
return Option<>(400, "Token ordering field `" + token_ordering_field + "` must be an INT32.");
|
||||
if(!token_ranking_field.empty() && !document[token_ranking_field].is_number()) {
|
||||
return Option<>(400, "Token ranking field `" + token_ranking_field + "` must be an INT32.");
|
||||
}
|
||||
|
||||
if(!token_ordering_field.empty() && document[token_ordering_field].get<int64_t>() > INT32_MAX) {
|
||||
return Option<>(400, "Token ordering field `" + token_ordering_field + "` exceeds maximum value of INT32.");
|
||||
if(!token_ranking_field.empty() && document[token_ranking_field].get<int64_t>() > INT32_MAX) {
|
||||
return Option<>(400, "Token ranking field `" + token_ranking_field + "` exceeds maximum value of INT32.");
|
||||
}
|
||||
|
||||
uint32_t points = 0;
|
||||
if(!token_ordering_field.empty()) {
|
||||
points = document[token_ordering_field];
|
||||
if(!token_ranking_field.empty()) {
|
||||
points = document[token_ranking_field];
|
||||
}
|
||||
|
||||
for(const std::pair<std::string, field> & field_pair: search_schema) {
|
||||
@ -176,13 +170,13 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
|
||||
"but is not found in the document.");
|
||||
}
|
||||
|
||||
art_tree *t = facet_index.at(field_name);
|
||||
facet_value & fvalue = facet_index.at(field_name);
|
||||
if(field_pair.second.type == field_types::STRING) {
|
||||
if(!document[field_name].is_string()) {
|
||||
return Option<>(400, "Facet field `" + field_name + "` must be a STRING.");
|
||||
}
|
||||
const std::string & text = document[field_name];
|
||||
index_string_field(text, points, t, seq_id, true);
|
||||
const std::string & value = document[field_name];
|
||||
fvalue.index_values(seq_id, { value });
|
||||
} else if(field_pair.second.type == field_types::STRING_ARRAY) {
|
||||
if(!document[field_name].is_array()) {
|
||||
return Option<>(400, "Facet field `" + field_name + "` must be a STRING_ARRAY.");
|
||||
@ -192,23 +186,23 @@ Option<uint32_t> Collection::index_in_memory(const nlohmann::json &document, uin
|
||||
return Option<>(400, "Facet field `" + field_name + "` must be a STRING_ARRAY.");
|
||||
}
|
||||
|
||||
std::vector<std::string> strings = document[field_name];
|
||||
index_string_array_field(strings, points, t, seq_id, true);
|
||||
const std::vector<std::string> & values = document[field_name];
|
||||
fvalue.index_values(seq_id, values);
|
||||
}
|
||||
}
|
||||
|
||||
for(const std::string & rank_field: rank_fields) {
|
||||
if(document.count(rank_field) == 0) {
|
||||
return Option<>(400, "Field `" + rank_field + "` has been declared as a rank field in the schema, "
|
||||
for(const field & sort_field: sort_fields) {
|
||||
if(document.count(sort_field.name) == 0) {
|
||||
return Option<>(400, "Field `" + sort_field.name + "` has been declared as a sort field in the schema, "
|
||||
"but is not found in the document.");
|
||||
}
|
||||
|
||||
if(!document[rank_field].is_number()) {
|
||||
return Option<>(400, "Rank field `" + rank_field + "` must be an integer.");
|
||||
if(!document[sort_field.name].is_number()) {
|
||||
return Option<>(400, "Sort field `" + sort_field.name + "` must be a number.");
|
||||
}
|
||||
|
||||
spp::sparse_hash_map<uint32_t, int64_t> *doc_to_score = rank_index.at(rank_field);
|
||||
doc_to_score->emplace(seq_id, document[rank_fields[0]].get<int64_t>());
|
||||
spp::sparse_hash_map<uint32_t, int64_t> *doc_to_score = sort_index.at(sort_field.name);
|
||||
doc_to_score->emplace(seq_id, document[sort_field.name].get<int64_t>());
|
||||
}
|
||||
|
||||
return Option<>(200);
|
||||
@ -269,9 +263,9 @@ void Collection::index_string_field(const std::string & text, const uint32_t sco
|
||||
tokens.push_back(text);
|
||||
token_to_offsets[text].push_back(0);
|
||||
} else {
|
||||
StringUtils::tokenize(text, tokens, " ", true);
|
||||
StringUtils::split(text, tokens, " ");
|
||||
for(uint32_t i=0; i<tokens.size(); i++) {
|
||||
auto token = tokens[i];
|
||||
auto & token = tokens[i];
|
||||
transform(token.begin(), token.end(), token.begin(), tolower);
|
||||
token_to_offsets[token].push_back(i);
|
||||
}
|
||||
@ -301,7 +295,8 @@ void Collection::index_string_field(const std::string & text, const uint32_t sco
|
||||
}
|
||||
|
||||
art_insert(t, key, key_len, &art_doc, num_hits);
|
||||
delete art_doc.offsets;
|
||||
delete [] art_doc.offsets;
|
||||
art_doc.offsets = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -330,36 +325,27 @@ void Collection::do_facets(std::vector<facet> & facets, uint32_t* result_ids, si
|
||||
for(auto & a_facet: facets) {
|
||||
// assumed that facet fields have already been validated upstream
|
||||
const field & facet_field = facet_schema.at(a_facet.field_name);
|
||||
const facet_value & fvalue = facet_index.at(facet_field.name);
|
||||
|
||||
// loop through the field, get all keys and intersect those ids with result ids
|
||||
if(facet_index.count(facet_field.name) != 0) {
|
||||
art_tree *t = facet_index.at(facet_field.name);
|
||||
std::vector<art_leaf *> leaves;
|
||||
|
||||
art_topk_iter(t->root, MAX_SCORE, 10, leaves);
|
||||
|
||||
for(const art_leaf* leaf: leaves) {
|
||||
const uint32_t* facet_ids = leaf->values->ids.uncompress();
|
||||
size_t facet_ids_size = leaf->values->ids.getLength();
|
||||
|
||||
uint32_t* facet_results = new uint32_t[std::min(facet_ids_size, results_size)];
|
||||
const size_t facet_results_size = Intersection::scalar(result_ids, results_size,
|
||||
facet_ids, facet_ids_size, facet_results);
|
||||
|
||||
const std::string facet_value((const char *)leaf->key, leaf->key_len-1); // drop trailing null
|
||||
a_facet.result_map.insert(std::pair<std::string, size_t>(facet_value, facet_results_size));
|
||||
|
||||
delete [] facet_ids;
|
||||
delete [] facet_results;
|
||||
for(auto i = 0; i < results_size; i++) {
|
||||
uint32_t doc_seq_id = result_ids[i];
|
||||
if(fvalue.doc_values.count(doc_seq_id) != 0) {
|
||||
// for every result document, get the values associated and increment counter
|
||||
const std::vector<uint32_t> & value_indices = fvalue.doc_values.at(doc_seq_id);
|
||||
for(auto j = 0; j < value_indices.size(); j++) {
|
||||
const std::string & facet_value = fvalue.index_value.at(value_indices.at(j));
|
||||
a_facet.result_map[facet_value] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Collection::search_candidates(uint32_t* filter_ids, size_t filter_ids_length, std::vector<facet> & facets,
|
||||
const std::vector<std::string> & rank_fields, int & token_rank,
|
||||
const std::vector<sort_field> & sort_fields, int & candidate_rank,
|
||||
std::vector<std::vector<art_leaf*>> & token_leaves, Topster<100> & topster,
|
||||
size_t & total_results, size_t & num_found, const size_t & max_results) {
|
||||
size_t & total_results, uint32_t** all_result_ids, size_t & all_result_ids_len,
|
||||
const size_t & max_results) {
|
||||
const size_t combination_limit = 10;
|
||||
auto product = []( long long a, std::vector<art_leaf*>& b ) { return a*b.size(); };
|
||||
long long int N = std::accumulate(token_leaves.begin(), token_leaves.end(), 1LL, product);
|
||||
@ -367,7 +353,11 @@ void Collection::search_candidates(uint32_t* filter_ids, size_t filter_ids_lengt
|
||||
for(long long n=0; n<N && n<combination_limit; ++n) {
|
||||
// every element in `query_suggestion` contains a token and its associated hits
|
||||
std::vector<art_leaf *> query_suggestion = next_suggestion(token_leaves, n);
|
||||
token_rank++;
|
||||
candidate_rank++;
|
||||
|
||||
/*for(auto i=0; i < query_suggestion.size(); i++) {
|
||||
std::cout << "i: " << i << " - " << query_suggestion[i]->key << std::endl;
|
||||
}*/
|
||||
|
||||
// initialize results with the starting element (for further intersection)
|
||||
uint32_t* result_ids = query_suggestion[0]->values->ids.uncompress();
|
||||
@ -386,22 +376,32 @@ void Collection::search_candidates(uint32_t* filter_ids, size_t filter_ids_lengt
|
||||
if(filter_ids != nullptr) {
|
||||
// intersect once again with filter ids
|
||||
uint32_t* filtered_result_ids = new uint32_t[std::min(filter_ids_length, result_size)];
|
||||
size_t filtered_results_size =
|
||||
Intersection::scalar(filter_ids, filter_ids_length, result_ids, result_size, filtered_result_ids);
|
||||
size_t filtered_results_size = ArrayUtils::and_scalar(filter_ids, filter_ids_length, result_ids,
|
||||
result_size, filtered_result_ids);
|
||||
|
||||
uint32_t* new_all_result_ids;
|
||||
all_result_ids_len = ArrayUtils::or_scalar(*all_result_ids, all_result_ids_len, filtered_result_ids,
|
||||
filtered_results_size, &new_all_result_ids);
|
||||
delete [] *all_result_ids;
|
||||
*all_result_ids = new_all_result_ids;
|
||||
|
||||
do_facets(facets, filtered_result_ids, filtered_results_size);
|
||||
|
||||
// go through each matching document id and calculate match score
|
||||
score_results(rank_fields, token_rank, topster, query_suggestion, filtered_result_ids, filtered_results_size);
|
||||
num_found += filtered_results_size;
|
||||
score_results(sort_fields, candidate_rank, topster, query_suggestion, filtered_result_ids, filtered_results_size);
|
||||
|
||||
delete[] filtered_result_ids;
|
||||
delete[] result_ids;
|
||||
} else {
|
||||
do_facets(facets, result_ids, result_size);
|
||||
|
||||
score_results(rank_fields, token_rank, topster, query_suggestion, result_ids, result_size);
|
||||
num_found += result_size;
|
||||
uint32_t* new_all_result_ids;
|
||||
all_result_ids_len = ArrayUtils::or_scalar(*all_result_ids, all_result_ids_len, result_ids,
|
||||
result_size, &new_all_result_ids);
|
||||
delete [] *all_result_ids;
|
||||
*all_result_ids = new_all_result_ids;
|
||||
|
||||
score_results(sort_fields, candidate_rank, topster, query_suggestion, result_ids, result_size);
|
||||
delete[] result_ids;
|
||||
}
|
||||
|
||||
@ -544,7 +544,8 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
|
||||
filter_ids_length = result_ids_length;
|
||||
} else {
|
||||
uint32_t* filtered_results = new uint32_t[std::min((size_t)filter_ids_length, result_ids_length)];
|
||||
filter_ids_length = Intersection::scalar(filter_ids, filter_ids_length, result_ids, result_ids_length, filtered_results);
|
||||
filter_ids_length = ArrayUtils::and_scalar(filter_ids, filter_ids_length, result_ids,
|
||||
result_ids_length, filtered_results);
|
||||
delete [] filter_ids;
|
||||
delete [] result_ids;
|
||||
filter_ids = filtered_results;
|
||||
@ -558,9 +559,8 @@ Option<uint32_t> Collection::do_filtering(uint32_t** filter_ids_out, const std::
|
||||
|
||||
nlohmann::json Collection::search(std::string query, const std::vector<std::string> search_fields,
|
||||
const std::string & simple_filter_query, const std::vector<std::string> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields, const int num_typos,
|
||||
const std::vector<sort_field> & sort_fields, const int num_typos,
|
||||
const size_t num_results, const token_ordering token_order, const bool prefix) {
|
||||
size_t num_found = 0;
|
||||
nlohmann::json result = nlohmann::json::object();
|
||||
std::vector<facet> facets;
|
||||
|
||||
@ -587,10 +587,15 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
|
||||
facets.push_back(facet(field_name));
|
||||
}
|
||||
|
||||
// validate rank fields
|
||||
for(const std::string & field_name: rank_fields) {
|
||||
if(rank_index.count(field_name) == 0) {
|
||||
result["error"] = "Could not find a rank field named `" + field_name + "` in the schema.";
|
||||
// validate sort fields
|
||||
for(const sort_field & _sort_field: sort_fields) {
|
||||
if(sort_index.count(_sort_field.name) == 0) {
|
||||
result["error"] = "Could not find a sort field named `" + _sort_field.name + "` in the schema.";
|
||||
return result;
|
||||
}
|
||||
|
||||
if(_sort_field.order != sort_field_const::asc && _sort_field.order != sort_field_const::desc) {
|
||||
result["error"] = "Order for sort field` " + _sort_field.name + "` should be either ASC or DESC.";
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@ -605,20 +610,23 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
|
||||
|
||||
const uint32_t filter_ids_length = op_filter_ids_length.get();
|
||||
|
||||
// Order of `fields` are used to rank results
|
||||
// Order of `fields` are used to sort results
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
std::vector<std::pair<int, Topster<100>::KV>> field_order_kvs;
|
||||
uint32_t* all_result_ids = nullptr;
|
||||
size_t all_result_ids_len = 0;
|
||||
|
||||
for(int i = 0; i < search_fields.size(); i++) {
|
||||
Topster<100> topster;
|
||||
const std::string & field = search_fields[i];
|
||||
// proceed to query search only when no filters are provided or when filtering produces results
|
||||
if(simple_filter_query.size() == 0 || filter_ids_length > 0) {
|
||||
search_field(query, field, filter_ids, filter_ids_length, facets, rank_fields, num_typos, num_results,
|
||||
topster, num_found, token_order, prefix);
|
||||
search_field(query, field, filter_ids, filter_ids_length, facets, sort_fields, num_typos, num_results,
|
||||
topster, &all_result_ids, all_result_ids_len, token_order, prefix);
|
||||
topster.sort();
|
||||
}
|
||||
|
||||
// order of fields specified matter: matching docs from earlier fields are more important
|
||||
for(auto t = 0; t < topster.size && t < num_results; t++) {
|
||||
field_order_kvs.push_back(std::make_pair(search_fields.size() - i, topster.getKV(t)));
|
||||
}
|
||||
@ -626,18 +634,19 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
|
||||
|
||||
delete [] filter_ids;
|
||||
|
||||
// All fields are sorted descending
|
||||
std::sort(field_order_kvs.begin(), field_order_kvs.end(),
|
||||
[](const std::pair<int, Topster<100>::KV> & a, const std::pair<int, Topster<100>::KV> & b) {
|
||||
if(a.second.match_score != b.second.match_score) return a.second.match_score > b.second.match_score;
|
||||
if(a.second.primary_attr != b.second.primary_attr) return a.second.primary_attr > b.second.primary_attr;
|
||||
if(a.second.secondary_attr != b.second.secondary_attr) return a.second.secondary_attr > b.second.secondary_attr;
|
||||
if(a.first != b.first) return a.first > b.first;
|
||||
if(a.first != b.first) return a.first > b.first; // field position
|
||||
return a.second.key > b.second.key;
|
||||
});
|
||||
|
||||
result["hits"] = nlohmann::json::array();
|
||||
|
||||
for(auto field_order_kv: field_order_kvs) {
|
||||
for(auto & field_order_kv: field_order_kvs) {
|
||||
std::string value;
|
||||
const std::string &seq_id_key = get_seq_id_key((uint32_t) field_order_kv.second.key);
|
||||
store->get(seq_id_key, value);
|
||||
@ -645,7 +654,7 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
|
||||
result["hits"].push_back(document);
|
||||
}
|
||||
|
||||
result["found"] = num_found;
|
||||
result["found"] = all_result_ids_len;
|
||||
|
||||
result["facet_counts"] = nlohmann::json::array();
|
||||
|
||||
@ -655,7 +664,19 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
|
||||
facet_result["field_name"] = a_facet.field_name;
|
||||
facet_result["counts"] = nlohmann::json::array();
|
||||
|
||||
for(auto kv: a_facet.result_map) {
|
||||
// keep only top 10 facets
|
||||
std::vector<std::pair<std::string, size_t>> value_to_count;
|
||||
for (auto itr = a_facet.result_map.begin(); itr != a_facet.result_map.end(); ++itr) {
|
||||
value_to_count.push_back(*itr);
|
||||
}
|
||||
|
||||
std::sort(value_to_count.begin(), value_to_count.end(),
|
||||
[=](std::pair<std::string, size_t>& a, std::pair<std::string, size_t>& b) {
|
||||
return a.second > b.second;
|
||||
});
|
||||
|
||||
for(auto i = 0; i < std::min((size_t)10, value_to_count.size()); i++) {
|
||||
auto & kv = value_to_count[i];
|
||||
nlohmann::json facet_value_count = nlohmann::json::object();
|
||||
facet_value_count["value"] = kv.first;
|
||||
facet_value_count["count"] = kv.second;
|
||||
@ -681,11 +702,11 @@ nlohmann::json Collection::search(std::string query, const std::vector<std::stri
|
||||
5. Sort the docs based on some ranking criteria
|
||||
*/
|
||||
void Collection::search_field(std::string & query, const std::string & field, uint32_t *filter_ids, size_t filter_ids_length,
|
||||
std::vector<facet> & facets, const std::vector<std::string> & rank_fields, const int num_typos,
|
||||
const size_t num_results, Topster<100> &topster, size_t & num_found,
|
||||
const token_ordering token_order, const bool prefix) {
|
||||
std::vector<facet> & facets, const std::vector<sort_field> & sort_fields, const int num_typos,
|
||||
const size_t num_results, Topster<100> &topster, uint32_t** all_result_ids,
|
||||
size_t & all_result_ids_len, const token_ordering token_order, const bool prefix) {
|
||||
std::vector<std::string> tokens;
|
||||
StringUtils::tokenize(query, tokens, " ", true);
|
||||
StringUtils::split(query, tokens, " ");
|
||||
|
||||
const int max_cost = (num_typos < 0 || num_typos > 2) ? 2 : num_typos;
|
||||
const size_t max_results = std::min(num_results, (size_t) Collection::MAX_RESULTS);
|
||||
@ -714,7 +735,7 @@ void Collection::search_field(std::string & query, const std::string & field, ui
|
||||
|
||||
const size_t combination_limit = 10;
|
||||
auto product = []( long long a, std::vector<int>& b ) { return a*b.size(); };
|
||||
int token_rank = 0;
|
||||
int candidate_rank = 0;
|
||||
long long n = 0;
|
||||
long long int N = std::accumulate(token_to_costs.begin(), token_to_costs.end(), 1LL, product);
|
||||
|
||||
@ -737,8 +758,8 @@ void Collection::search_field(std::string & query, const std::string & field, ui
|
||||
const std::string token_cost_hash = token + std::to_string(costs[token_index]);
|
||||
|
||||
std::vector<art_leaf*> leaves;
|
||||
/*std::cout << "\nSearching for: " << token << " - cost: " << costs[token_index] << ", token_rank: "
|
||||
<< token_rank << std::endl;*/
|
||||
/*std::cout << "\nSearching for: " << token << " - cost: " << costs[token_index] << ", candidate_rank: "
|
||||
<< candidate_rank << std::endl;*/
|
||||
|
||||
if(token_cost_cache.count(token_cost_hash) != 0) {
|
||||
leaves = token_cost_cache[token_cost_hash];
|
||||
@ -758,9 +779,9 @@ void Collection::search_field(std::string & query, const std::string & field, ui
|
||||
if(!leaves.empty()) {
|
||||
//!log_leaves(costs[token_index], token, leaves);
|
||||
token_leaves.push_back(leaves);
|
||||
token_to_count[token] = leaves.at(0)->values->ids.getLength();
|
||||
token_to_count[token] = std::max(token_to_count[token], leaves.at(0)->values->ids.getLength());
|
||||
} else {
|
||||
// No result at `cost = costs[token_index]` => remove cost for token and re-do combinations
|
||||
// No result at `cost = costs[token_index]`. Remove costs until `cost` for token and re-do combinations
|
||||
auto it = std::find(token_to_costs[token_index].begin(), token_to_costs[token_index].end(), costs[token_index]);
|
||||
if(it != token_to_costs[token_index].end()) {
|
||||
token_to_costs[token_index].erase(it);
|
||||
@ -773,9 +794,9 @@ void Collection::search_field(std::string & query, const std::string & field, ui
|
||||
}
|
||||
}
|
||||
|
||||
// To continue outerloop on new cost combination
|
||||
n = -1;
|
||||
N = std::accumulate(token_to_costs.begin(), token_to_costs.end(), 1LL, product);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
@ -784,8 +805,8 @@ void Collection::search_field(std::string & query, const std::string & field, ui
|
||||
|
||||
if(token_leaves.size() != 0 && token_leaves.size() == tokens.size()) {
|
||||
// If all tokens were found, go ahead and search for candidates with what we have so far
|
||||
search_candidates(filter_ids, filter_ids_length, facets, rank_fields, token_rank, token_leaves, topster,
|
||||
total_results, num_found, max_results);
|
||||
search_candidates(filter_ids, filter_ids_length, facets, sort_fields, candidate_rank, token_leaves, topster,
|
||||
total_results, all_result_ids, all_result_ids_len, max_results);
|
||||
|
||||
if (total_results >= max_results) {
|
||||
// If we don't find enough results, we continue outerloop (looking at tokens with greater cost)
|
||||
@ -818,8 +839,8 @@ void Collection::search_field(std::string & query, const std::string & field, ui
|
||||
}
|
||||
}
|
||||
|
||||
return search_field(truncated_query, field, filter_ids, filter_ids_length, facets, rank_fields, num_typos,
|
||||
num_results, topster, num_found, token_order, prefix);
|
||||
return search_field(truncated_query, field, filter_ids, filter_ids_length, facets, sort_fields, num_typos,
|
||||
num_results, topster, all_result_ids, all_result_ids_len, token_order, prefix);
|
||||
}
|
||||
}
|
||||
|
||||
@ -834,11 +855,11 @@ void Collection::log_leaves(const int cost, const std::string &token, const std:
|
||||
}
|
||||
}
|
||||
|
||||
void Collection::score_results(const std::vector<std::string> & rank_fields, const int & token_rank,
|
||||
void Collection::score_results(const std::vector<sort_field> & sort_fields, const int & candidate_rank,
|
||||
Topster<100> & topster, const std::vector<art_leaf *> &query_suggestion,
|
||||
const uint32_t *result_ids, const size_t result_size) const {
|
||||
|
||||
const int max_token_rank = 250;
|
||||
const int max_candidate_rank = 250;
|
||||
spp::sparse_hash_map<art_leaf*, uint32_t*> leaf_to_indices;
|
||||
|
||||
if(query_suggestion.size() != 1) {
|
||||
@ -853,13 +874,23 @@ void Collection::score_results(const std::vector<std::string> & rank_fields, con
|
||||
spp::sparse_hash_map<uint32_t, int64_t> * primary_rank_scores = nullptr;
|
||||
spp::sparse_hash_map<uint32_t, int64_t> * secondary_rank_scores = nullptr;
|
||||
|
||||
if(rank_fields.size() > 0) {
|
||||
// Used for asc/desc ordering. NOTE: Topster keeps biggest keys (i.e. it's desc in nature)
|
||||
int64_t primary_rank_factor = 1;
|
||||
int64_t secondary_rank_factor = 1;
|
||||
|
||||
if(sort_fields.size() > 0) {
|
||||
// assumed that rank field exists in the index - checked earlier in the chain
|
||||
primary_rank_scores = rank_index.at(rank_fields[0]);
|
||||
primary_rank_scores = sort_index.at(sort_fields[0].name);
|
||||
if(sort_fields[0].order == sort_field_const::asc) {
|
||||
primary_rank_factor = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if(rank_fields.size() > 1) {
|
||||
secondary_rank_scores = rank_index.at(rank_fields[1]);
|
||||
if(sort_fields.size() > 1) {
|
||||
secondary_rank_scores = sort_index.at(sort_fields[1].name);
|
||||
if(sort_fields[1].order == sort_field_const::asc) {
|
||||
secondary_rank_factor = -1;
|
||||
}
|
||||
}
|
||||
|
||||
for(auto i=0; i<result_size; i++) {
|
||||
@ -895,18 +926,22 @@ void Collection::score_results(const std::vector<std::string> & rank_fields, con
|
||||
mscore = MatchScore::match_score(seq_id, token_positions);
|
||||
}
|
||||
|
||||
int token_rank_score = max_token_rank - token_rank;
|
||||
int candidate_rank_score = max_candidate_rank - candidate_rank;
|
||||
|
||||
// Construct a single match_score from individual components (for multi-field sort)
|
||||
const uint64_t match_score = (token_rank_score << 16) +
|
||||
((uint64_t)(mscore.words_present) << 8) +
|
||||
const uint64_t match_score = ((uint64_t)(mscore.words_present) << 16) +
|
||||
(candidate_rank_score << 8) +
|
||||
(MAX_SEARCH_TOKENS - mscore.distance);
|
||||
|
||||
int64_t primary_rank_score = primary_rank_scores->count(seq_id) > 0 ? primary_rank_scores->at(seq_id) : 0;
|
||||
int64_t primary_rank_score = (primary_rank_scores && primary_rank_scores->count(seq_id) > 0) ?
|
||||
primary_rank_scores->at(seq_id) : 0;
|
||||
int64_t secondary_rank_score = (secondary_rank_scores && secondary_rank_scores->count(seq_id) > 0) ?
|
||||
secondary_rank_scores->at(seq_id) : 0;
|
||||
topster.add(seq_id, match_score, primary_rank_score, secondary_rank_score);
|
||||
/*std::cout << "token_rank_score: " << token_rank_score << ", match_score: "
|
||||
topster.add(seq_id, match_score,
|
||||
primary_rank_factor * primary_rank_score,
|
||||
secondary_rank_factor * secondary_rank_score);
|
||||
|
||||
/*std::cout << "candidate_rank_score: " << candidate_rank_score << ", match_score: "
|
||||
<< match_score << ", primary_rank_score: " << primary_rank_score << ", seq_id: " << seq_id << std::endl;*/
|
||||
}
|
||||
|
||||
@ -968,9 +1003,13 @@ void Collection::remove_and_shift_offset_index(sorted_array &offset_index, const
|
||||
delete[] new_array;
|
||||
}
|
||||
|
||||
void Collection::remove(std::string id) {
|
||||
Option<std::string> Collection::remove(const std::string & id) {
|
||||
std::string seq_id_str;
|
||||
store->get(get_doc_id_key(id), seq_id_str);
|
||||
StoreStatus status = store->get(get_doc_id_key(id), seq_id_str);
|
||||
|
||||
if(status == StoreStatus::NOT_FOUND) {
|
||||
return Option<std::string>(404, "Could not find a document with id: " + id);
|
||||
}
|
||||
|
||||
uint32_t seq_id = (uint32_t) std::stol(seq_id_str);
|
||||
|
||||
@ -979,48 +1018,108 @@ void Collection::remove(std::string id) {
|
||||
|
||||
nlohmann::json document = nlohmann::json::parse(parsed_document);
|
||||
|
||||
std::vector<std::string> tokens;
|
||||
StringUtils::tokenize(document["title"], tokens, " ", true);
|
||||
|
||||
for(auto token: tokens) {
|
||||
std::transform(token.begin(), token.end(), token.begin(), ::tolower);
|
||||
|
||||
const unsigned char *key = (const unsigned char *) token.c_str();
|
||||
int key_len = (int) (token.length() + 1);
|
||||
|
||||
art_leaf* leaf = (art_leaf *) art_search(search_index.at("title"), key, key_len);
|
||||
if(leaf != NULL) {
|
||||
uint32_t seq_id_values[1] = {seq_id};
|
||||
|
||||
uint32_t doc_index = leaf->values->ids.indexOf(seq_id);
|
||||
uint32_t start_offset = leaf->values->offset_index.at(doc_index);
|
||||
uint32_t end_offset = (doc_index == leaf->values->ids.getLength() - 1) ?
|
||||
leaf->values->offsets.getLength() :
|
||||
leaf->values->offset_index.at(doc_index+1);
|
||||
|
||||
uint32_t doc_indices[1] = {doc_index};
|
||||
remove_and_shift_offset_index(leaf->values->offset_index, doc_indices, 1);
|
||||
|
||||
leaf->values->offsets.remove_index(start_offset, end_offset);
|
||||
leaf->values->ids.remove_values(seq_id_values, 1);
|
||||
|
||||
/*len = leaf->values->offset_index.getLength();
|
||||
for(auto i=0; i<len; i++) {
|
||||
std::cout << "i: " << i << ", val: " << leaf->values->offset_index.at(i) << std::endl;
|
||||
for(auto & name_field: search_schema) {
|
||||
std::vector<std::string> tokens;
|
||||
if(name_field.second.type == field_types::STRING) {
|
||||
StringUtils::split(document[name_field.first], tokens, " ");
|
||||
} else if(name_field.second.type == field_types::STRING_ARRAY) {
|
||||
tokens = document[name_field.first].get<std::vector<std::string>>();
|
||||
} else if(name_field.second.type == field_types::INT32) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
int32_t value = document[name_field.first].get<int32_t>();
|
||||
encode_int32(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
} else if(name_field.second.type == field_types::INT32_ARRAY) {
|
||||
std::vector<int32_t> values = document[name_field.first].get<std::vector<int32_t>>();
|
||||
for(const int32_t value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_int32(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
}
|
||||
std::cout << "----" << std::endl;*/
|
||||
} else if(name_field.second.type == field_types::INT64) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
int64_t value = document[name_field.first].get<int64_t>();
|
||||
encode_int64(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
} else if(name_field.second.type == field_types::INT64_ARRAY) {
|
||||
std::vector<int64_t> values = document[name_field.first].get<std::vector<int64_t>>();
|
||||
for(const int64_t value: values) {
|
||||
const int KEY_LEN = 8;
|
||||
unsigned char key[KEY_LEN];
|
||||
encode_int64(value, key);
|
||||
tokens.push_back(std::string((char*)key, KEY_LEN));
|
||||
}
|
||||
}
|
||||
|
||||
if(leaf->values->ids.getLength() == 0) {
|
||||
art_delete(search_index.at("title"), key, key_len);
|
||||
for(auto & token: tokens) {
|
||||
const unsigned char *key;
|
||||
int key_len;
|
||||
|
||||
if(name_field.second.type == field_types::STRING_ARRAY || name_field.second.type == field_types::STRING) {
|
||||
std::transform(token.begin(), token.end(), token.begin(), ::tolower);
|
||||
key = (const unsigned char *) token.c_str();
|
||||
key_len = (int) (token.length() + 1);
|
||||
} else {
|
||||
key = (const unsigned char *) token.c_str();
|
||||
key_len = (int) (token.length());
|
||||
}
|
||||
|
||||
art_leaf* leaf = (art_leaf *) art_search(search_index.at(name_field.first), key, key_len);
|
||||
if(leaf != NULL) {
|
||||
uint32_t seq_id_values[1] = {seq_id};
|
||||
uint32_t doc_index = leaf->values->ids.indexOf(seq_id);
|
||||
|
||||
if(doc_index == leaf->values->ids.getLength()) {
|
||||
// not found - happens when 2 tokens repeat in a field, e.g "is it or is is not?"
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t start_offset = leaf->values->offset_index.at(doc_index);
|
||||
uint32_t end_offset = (doc_index == leaf->values->ids.getLength() - 1) ?
|
||||
leaf->values->offsets.getLength() :
|
||||
leaf->values->offset_index.at(doc_index+1);
|
||||
|
||||
uint32_t doc_indices[1] = {doc_index};
|
||||
remove_and_shift_offset_index(leaf->values->offset_index, doc_indices, 1);
|
||||
|
||||
leaf->values->offsets.remove_index(start_offset, end_offset);
|
||||
leaf->values->ids.remove_values(seq_id_values, 1);
|
||||
|
||||
/*len = leaf->values->offset_index.getLength();
|
||||
for(auto i=0; i<len; i++) {
|
||||
std::cout << "i: " << i << ", val: " << leaf->values->offset_index.at(i) << std::endl;
|
||||
}
|
||||
std::cout << "----" << std::endl;*/
|
||||
|
||||
if(leaf->values->ids.getLength() == 0) {
|
||||
art_values* values = (art_values*) art_delete(search_index.at(name_field.first), key, key_len);
|
||||
delete values;
|
||||
values = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// remove facets if any
|
||||
for(auto & field_facet_value: facet_index) {
|
||||
field_facet_value.second.doc_values.erase(seq_id);
|
||||
}
|
||||
|
||||
// remove sort index if any
|
||||
for(auto & field_doc_value_map: sort_index) {
|
||||
field_doc_value_map.second->erase(seq_id);
|
||||
}
|
||||
|
||||
store->remove(get_doc_id_key(id));
|
||||
store->remove(get_seq_id_key(seq_id));
|
||||
|
||||
return Option<std::string>(id);
|
||||
}
|
||||
|
||||
std::string Collection::get_next_seq_id_key(std::string collection_name) {
|
||||
std::string Collection::get_next_seq_id_key(const std::string & collection_name) {
|
||||
return std::string(COLLECTION_NEXT_SEQ_PREFIX) + "_" + collection_name;
|
||||
}
|
||||
|
||||
@ -1035,7 +1134,7 @@ std::string Collection::get_seq_id_key(uint32_t seq_id) {
|
||||
return get_seq_id_collection_prefix() + "_" + std::string(bytes, bytes+4);
|
||||
}
|
||||
|
||||
std::string Collection::get_doc_id_key(std::string doc_id) {
|
||||
std::string Collection::get_doc_id_key(const std::string & doc_id) {
|
||||
return std::to_string(collection_id) + "_" + DOC_ID_PREFIX + doc_id;
|
||||
}
|
||||
|
||||
@ -1059,15 +1158,15 @@ std::vector<std::string> Collection::get_facet_fields() {
|
||||
return facet_fields_copy;
|
||||
}
|
||||
|
||||
std::vector<std::string> Collection::get_rank_fields() {
|
||||
return rank_fields;
|
||||
std::vector<field> Collection::get_sort_fields() {
|
||||
return sort_fields;
|
||||
}
|
||||
|
||||
spp::sparse_hash_map<std::string, field> Collection::get_schema() {
|
||||
return search_schema;
|
||||
};
|
||||
|
||||
std::string Collection::get_meta_key(std::string collection_name) {
|
||||
std::string Collection::get_meta_key(const std::string & collection_name) {
|
||||
return COLLECTION_META_PREFIX + collection_name;
|
||||
}
|
||||
|
||||
@ -1075,6 +1174,6 @@ std::string Collection::get_seq_id_collection_prefix() {
|
||||
return std::to_string(collection_id) + "_" + std::string(SEQ_ID_PREFIX);
|
||||
}
|
||||
|
||||
std::string Collection::get_token_ordering_field() {
|
||||
return token_ordering_field;
|
||||
std::string Collection::get_token_ranking_field() {
|
||||
return token_ranking_field;
|
||||
}
|
@ -23,7 +23,7 @@ void CollectionManager::init(Store *store) {
|
||||
std::vector<std::string> collection_meta_jsons;
|
||||
store->scan_fill(Collection::COLLECTION_META_PREFIX, collection_meta_jsons);
|
||||
|
||||
for(auto collection_meta_json: collection_meta_jsons) {
|
||||
for(auto & collection_meta_json: collection_meta_jsons) {
|
||||
nlohmann::json collection_meta = nlohmann::json::parse(collection_meta_json);
|
||||
std::string this_collection_name = collection_meta[COLLECTION_NAME_KEY].get<std::string>();
|
||||
|
||||
@ -45,10 +45,15 @@ void CollectionManager::init(Store *store) {
|
||||
store->get(Collection::get_next_seq_id_key(this_collection_name), collection_next_seq_id_str);
|
||||
|
||||
uint32_t collection_next_seq_id = (const uint32_t) std::stoi(collection_next_seq_id_str);
|
||||
std::vector<std::string> collection_rank_fields =
|
||||
collection_meta[COLLECTION_RANK_FIELDS_KEY].get<std::vector<std::string>>();
|
||||
|
||||
std::string token_ordering_field = collection_meta[COLLECTION_TOKEN_ORDERING_FIELD_KEY].get<std::string>();
|
||||
std::vector<field> collection_sort_fields;
|
||||
nlohmann::json sort_fields_map = collection_meta[COLLECTION_SORT_FIELDS_KEY];
|
||||
|
||||
for (nlohmann::json::iterator it = sort_fields_map.begin(); it != sort_fields_map.end(); ++it) {
|
||||
collection_sort_fields.push_back({it.value()[fields::name], it.value()[fields::type]});
|
||||
}
|
||||
|
||||
std::string token_ranking_field = collection_meta[COLLECTION_TOKEN_ORDERING_FIELD_KEY].get<std::string>();
|
||||
|
||||
Collection* collection = new Collection(this_collection_name,
|
||||
collection_meta[COLLECTION_ID_KEY].get<uint32_t>(),
|
||||
@ -56,8 +61,8 @@ void CollectionManager::init(Store *store) {
|
||||
store,
|
||||
search_fields,
|
||||
facet_fields,
|
||||
collection_rank_fields,
|
||||
token_ordering_field);
|
||||
collection_sort_fields,
|
||||
token_ranking_field);
|
||||
|
||||
// Fetch records from the store and re-create memory index
|
||||
std::vector<std::string> documents;
|
||||
@ -82,8 +87,8 @@ void CollectionManager::init(Store *store) {
|
||||
|
||||
Collection* CollectionManager::create_collection(std::string name, const std::vector<field> & search_fields,
|
||||
const std::vector<field> & facet_fields,
|
||||
const std::vector<std::string> & rank_fields,
|
||||
const std::string & token_ordering_field) {
|
||||
const std::vector<field> & sort_fields,
|
||||
const std::string & token_ranking_field) {
|
||||
if(store->contains(Collection::get_meta_key(name))) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -91,7 +96,7 @@ Collection* CollectionManager::create_collection(std::string name, const std::ve
|
||||
nlohmann::json collection_meta;
|
||||
|
||||
nlohmann::json search_fields_json = nlohmann::json::array();;
|
||||
for(const field& search_field: search_fields) {
|
||||
for(const field & search_field: search_fields) {
|
||||
nlohmann::json field_val;
|
||||
field_val[fields::name] = search_field.name;
|
||||
field_val[fields::type] = search_field.type;
|
||||
@ -99,22 +104,30 @@ Collection* CollectionManager::create_collection(std::string name, const std::ve
|
||||
}
|
||||
|
||||
nlohmann::json facet_fields_json = nlohmann::json::array();;
|
||||
for(const field& facet_field: facet_fields) {
|
||||
for(const field & facet_field: facet_fields) {
|
||||
nlohmann::json field_val;
|
||||
field_val[fields::name] = facet_field.name;
|
||||
field_val[fields::type] = facet_field.type;
|
||||
facet_fields_json.push_back(field_val);
|
||||
}
|
||||
|
||||
nlohmann::json sort_fields_json = nlohmann::json::array();;
|
||||
for(const field & sort_field: sort_fields) {
|
||||
nlohmann::json sort_field_val;
|
||||
sort_field_val[fields::name] = sort_field.name;
|
||||
sort_field_val[fields::type] = sort_field.type;
|
||||
sort_fields_json.push_back(sort_field_val);
|
||||
}
|
||||
|
||||
collection_meta[COLLECTION_NAME_KEY] = name;
|
||||
collection_meta[COLLECTION_ID_KEY] = next_collection_id;
|
||||
collection_meta[COLLECTION_SEARCH_FIELDS_KEY] = search_fields_json;
|
||||
collection_meta[COLLECTION_FACET_FIELDS_KEY] = facet_fields_json;
|
||||
collection_meta[COLLECTION_RANK_FIELDS_KEY] = rank_fields;
|
||||
collection_meta[COLLECTION_TOKEN_ORDERING_FIELD_KEY] = token_ordering_field;
|
||||
collection_meta[COLLECTION_SORT_FIELDS_KEY] = sort_fields_json;
|
||||
collection_meta[COLLECTION_TOKEN_ORDERING_FIELD_KEY] = token_ranking_field;
|
||||
|
||||
Collection* new_collection = new Collection(name, next_collection_id, 0, store, search_fields, facet_fields,
|
||||
rank_fields, token_ordering_field);
|
||||
sort_fields, token_ranking_field);
|
||||
|
||||
store->insert(Collection::get_meta_key(name), collection_meta.dump());
|
||||
store->insert(Collection::get_next_seq_id_key(name), std::to_string(0));
|
||||
@ -135,12 +148,6 @@ Collection* CollectionManager::get_collection(std::string collection_name) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CollectionManager::~CollectionManager() {
|
||||
for(auto kv: collections) {
|
||||
drop_collection(kv.first);
|
||||
}
|
||||
}
|
||||
|
||||
bool CollectionManager::drop_collection(std::string collection_name) {
|
||||
Collection* collection = get_collection(collection_name);
|
||||
if(collection == nullptr) {
|
||||
|
219
src/http_server.cpp
Normal file
219
src/http_server.cpp
Normal file
@ -0,0 +1,219 @@
|
||||
#include "http_server.h"
|
||||
#include "string_utils.h"
|
||||
#include <regex>
|
||||
#include <signal.h>
|
||||
|
||||
h2o_globalconf_t HttpServer::config;
|
||||
h2o_context_t HttpServer::ctx;
|
||||
h2o_accept_ctx_t HttpServer::accept_ctx;
|
||||
std::vector<route_path> HttpServer::routes;
|
||||
|
||||
HttpServer::HttpServer(std::string listen_address, uint32_t listen_port):
|
||||
listen_address(listen_address), listen_port(listen_port) {
|
||||
h2o_config_init(&config);
|
||||
hostconf = h2o_config_register_host(&config, h2o_iovec_init(H2O_STRLIT("default")), 65535);
|
||||
register_handler(hostconf, "/", catch_all_handler);
|
||||
}
|
||||
|
||||
void HttpServer::on_accept(h2o_socket_t *listener, const char *err) {
|
||||
h2o_socket_t *sock;
|
||||
|
||||
if (err != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ((sock = h2o_evloop_socket_accept(listener)) == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
h2o_accept(&accept_ctx, sock);
|
||||
}
|
||||
|
||||
int HttpServer::create_listener(void) {
|
||||
struct sockaddr_in addr;
|
||||
int fd, reuseaddr_flag = 1;
|
||||
h2o_socket_t *sock;
|
||||
|
||||
memset(&addr, 0, sizeof(addr));
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_port = htons(listen_port);
|
||||
inet_pton(AF_INET, listen_address.c_str(), &(addr.sin_addr));
|
||||
|
||||
if ((fd = socket(AF_INET, SOCK_STREAM, 0)) == -1 ||
|
||||
setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr_flag, sizeof(reuseaddr_flag)) != 0 ||
|
||||
bind(fd, (struct sockaddr *)&addr, sizeof(addr)) != 0 ||
|
||||
listen(fd, SOMAXCONN) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ctx.globalconf->server_name = h2o_strdup(NULL, "", SIZE_MAX);
|
||||
sock = h2o_evloop_socket_create(ctx.loop, fd, H2O_SOCKET_FLAG_DONT_READ);
|
||||
h2o_socket_read_start(sock, on_accept);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int HttpServer::run() {
|
||||
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
h2o_context_init(&ctx, h2o_evloop_create(), &config);
|
||||
|
||||
accept_ctx.ctx = &ctx;
|
||||
accept_ctx.hosts = config.hosts;
|
||||
|
||||
if (create_listener() != 0) {
|
||||
std::cerr << "Failed to listen on " << listen_address << ":" << listen_port << std::endl
|
||||
<< "Error: " << strerror(errno) << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (h2o_evloop_run(ctx.loop) == 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
h2o_pathconf_t* HttpServer::register_handler(h2o_hostconf_t *hostconf, const char *path,
|
||||
int (*on_req)(h2o_handler_t *, h2o_req_t *)) {
|
||||
h2o_pathconf_t *pathconf = h2o_config_register_path(hostconf, path, 0);
|
||||
h2o_handler_t *handler = h2o_create_handler(pathconf, sizeof(*handler));
|
||||
handler->on_req = on_req;
|
||||
return pathconf;
|
||||
}
|
||||
|
||||
const char* HttpServer::get_status_reason(uint32_t status_code) {
|
||||
switch(status_code) {
|
||||
case 200: return "OK";
|
||||
case 201: return "Created";
|
||||
case 400: return "Bad Request";
|
||||
case 404: return "Not Found";
|
||||
case 409: return "Conflict";
|
||||
case 500: return "Internal Server Error";
|
||||
default: return "";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::map<std::string, std::string> HttpServer::parse_query(const std::string& query) {
|
||||
std::map<std::string, std::string> query_map;
|
||||
std::regex pattern("([\\w+%]+)=([^&]*)");
|
||||
|
||||
auto words_begin = std::sregex_iterator(query.begin(), query.end(), pattern);
|
||||
auto words_end = std::sregex_iterator();
|
||||
|
||||
for (std::sregex_iterator i = words_begin; i != words_end; i++) {
|
||||
std::string key = (*i)[1].str();
|
||||
std::string raw_value = (*i)[2].str();
|
||||
std::string value = StringUtils::url_decode(raw_value);
|
||||
if(query_map.count(value) == 0) {
|
||||
query_map[key] = value;
|
||||
} else {
|
||||
query_map[key] = query_map[key] + "&&" + value;
|
||||
}
|
||||
}
|
||||
|
||||
return query_map;
|
||||
}
|
||||
|
||||
int HttpServer::catch_all_handler(h2o_handler_t *self, h2o_req_t *req) {
|
||||
const std::string & http_method = std::string(req->method.base, req->method.len);
|
||||
const std::string & path = std::string(req->path.base, req->path.len);
|
||||
h2o_generator_t generator = {NULL, NULL};
|
||||
|
||||
std::vector<std::string> path_with_query_parts;
|
||||
StringUtils::split(path, path_with_query_parts, "?");
|
||||
const std::string & path_without_query = path_with_query_parts[0];
|
||||
|
||||
std::vector<std::string> path_parts;
|
||||
StringUtils::split(path_without_query, path_parts, "/");
|
||||
|
||||
h2o_iovec_t query = req->query_at != SIZE_MAX ?
|
||||
h2o_iovec_init(req->path.base + req->query_at, req->path.len - req->query_at) :
|
||||
h2o_iovec_init(H2O_STRLIT(""));
|
||||
|
||||
std::string query_str(query.base, query.len);
|
||||
std::map<std::string, std::string> query_map = parse_query(query_str);
|
||||
const std::string & req_body = std::string(req->entity.base, req->entity.len);
|
||||
|
||||
for(const route_path & rpath: routes) {
|
||||
if(rpath.path_parts.size() != path_parts.size() || rpath.http_method != http_method) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool found = true;
|
||||
|
||||
for(size_t i = 0; i < rpath.path_parts.size(); i++) {
|
||||
const std::string & rpart = rpath.path_parts[i];
|
||||
const std::string & given_part = path_parts[i];
|
||||
if(rpart != given_part && rpart[0] != ':') {
|
||||
found = false;
|
||||
goto check_next_route;
|
||||
}
|
||||
}
|
||||
|
||||
check_next_route:
|
||||
|
||||
if(found) {
|
||||
// routes match - iterate and extract path params
|
||||
for(size_t i = 0; i < rpath.path_parts.size(); i++) {
|
||||
const std::string & path_part = rpath.path_parts[i];
|
||||
if(path_part[0] == ':') {
|
||||
query_map.emplace(path_part.substr(1), path_parts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
http_req request = {query_map, req_body};
|
||||
http_res response;
|
||||
(rpath.handler)(request, response);
|
||||
|
||||
h2o_iovec_t body = h2o_strdup(&req->pool, response.body.c_str(), SIZE_MAX);
|
||||
req->res.status = response.status_code;
|
||||
req->res.reason = get_status_reason(response.status_code);
|
||||
h2o_add_header(&req->pool, &req->res.headers, H2O_TOKEN_CONTENT_TYPE, H2O_STRLIT("application/json; charset=utf-8"));
|
||||
h2o_start_response(req, &generator);
|
||||
h2o_send(req, &body, 1, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
h2o_iovec_t res_body = h2o_strdup(&req->pool, "{ \"message\": \"Not Found\"}", SIZE_MAX);
|
||||
req->res.status = 404;
|
||||
req->res.reason = get_status_reason(404);
|
||||
h2o_add_header(&req->pool, &req->res.headers, H2O_TOKEN_CONTENT_TYPE, H2O_STRLIT("application/json; charset=utf-8"));
|
||||
h2o_start_response(req, &generator);
|
||||
h2o_send(req, &res_body, 1, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void HttpServer::get(const std::string & path, void (*handler)(http_req &, http_res &)) {
|
||||
std::vector<std::string> path_parts;
|
||||
StringUtils::split(path, path_parts, "/");
|
||||
route_path rpath = {"GET", path_parts, handler};
|
||||
routes.push_back(rpath);
|
||||
}
|
||||
|
||||
void HttpServer::post(const std::string & path, void (*handler)(http_req &, http_res &)) {
|
||||
std::vector<std::string> path_parts;
|
||||
StringUtils::split(path, path_parts, "/");
|
||||
route_path rpath = {"POST", path_parts, handler};
|
||||
routes.push_back(rpath);
|
||||
}
|
||||
|
||||
void HttpServer::put(const std::string & path, void (*handler)(http_req &, http_res &)) {
|
||||
std::vector<std::string> path_parts;
|
||||
StringUtils::split(path, path_parts, "/");
|
||||
route_path rpath = {"PUT", path_parts, handler};
|
||||
routes.push_back(rpath);
|
||||
}
|
||||
|
||||
void HttpServer::del(const std::string & path, void (*handler)(http_req &, http_res &)) {
|
||||
std::vector<std::string> path_parts;
|
||||
StringUtils::split(path, path_parts, "/");
|
||||
route_path rpath = {"DELETE", path_parts, handler};
|
||||
routes.push_back(rpath);
|
||||
}
|
||||
|
||||
HttpServer::~HttpServer() {
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#include "intersection.h"
|
||||
|
||||
size_t Intersection::scalar(const uint32_t *A, const size_t lenA,
|
||||
const uint32_t *B, const size_t lenB, uint32_t *out) {
|
||||
const uint32_t *const initout(out);
|
||||
if (lenA == 0 || lenB == 0)
|
||||
return 0;
|
||||
|
||||
const uint32_t *endA = A + lenA;
|
||||
const uint32_t *endB = B + lenB;
|
||||
|
||||
while (1) {
|
||||
while (*A < *B) {
|
||||
SKIP_FIRST_COMPARE:
|
||||
if (++A == endA)
|
||||
return (out - initout);
|
||||
}
|
||||
while (*A > *B) {
|
||||
if (++B == endB)
|
||||
return (out - initout);
|
||||
}
|
||||
if (*A == *B) {
|
||||
*out++ = *A;
|
||||
if (++A == endA || ++B == endB)
|
||||
return (out - initout);
|
||||
} else {
|
||||
goto SKIP_FIRST_COMPARE;
|
||||
}
|
||||
}
|
||||
|
||||
return (out - initout); // NOTREACHED
|
||||
}
|
@ -17,14 +17,14 @@ int main(int argc, char* argv[]) {
|
||||
system("rm -rf /tmp/typesense-data && mkdir -p /tmp/typesense-data");
|
||||
|
||||
std::vector<field> fields_to_index = {field("title", field_types::STRING)};
|
||||
std::vector<std::string> rank_fields = {"points"};
|
||||
std::vector<field> sort_fields = { field("points", "INT32")};
|
||||
Store *store = new Store("/tmp/typesense-data");
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
collectionManager.init(store);
|
||||
|
||||
Collection *collection = collectionManager.get_collection("collection");
|
||||
if(collection == nullptr) {
|
||||
collection = collectionManager.create_collection("collection", fields_to_index, {}, rank_fields);
|
||||
collection = collectionManager.create_collection("collection", fields_to_index, {}, sort_fields);
|
||||
}
|
||||
|
||||
std::ifstream infile("/Users/kishore/Downloads/hnstories_small.jsonl");
|
||||
@ -48,7 +48,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
while(counter < 3000) {
|
||||
auto i = counter % 5;
|
||||
auto results = collection->search(queries[i], search_fields, "", { }, {"points"}, 1, 100, MAX_SCORE, 0);
|
||||
auto results = collection->search(queries[i], search_fields, "", { }, {sort_field("points", "DESC")}, 1, 100, MAX_SCORE, 0);
|
||||
results_total += results.size();
|
||||
counter++;
|
||||
}
|
||||
|
@ -8,72 +8,95 @@
|
||||
#include <unordered_map>
|
||||
#include <queue>
|
||||
#include "string_utils.h"
|
||||
#include <sys/resource.h>
|
||||
#include "collection.h"
|
||||
#include "collection_manager.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void find_indices(const uint32_t *result_ids, int low, int high, std::vector<uint32_t> & results) {
|
||||
if(high >= low) {
|
||||
size_t pivot = (low + high) / 2;
|
||||
//std::cout << pivot << std::endl;
|
||||
results.at(pivot) = result_ids[pivot];
|
||||
find_indices(result_ids, low, pivot-1, results);
|
||||
find_indices(result_ids, pivot+1, high, results);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::vector<uint32_t> results(3);
|
||||
uint32_t *result_ids = new uint32_t[3];
|
||||
/*for(auto i = 0; i < 100; i++) {
|
||||
result_ids[i] = i;
|
||||
}*/
|
||||
result_ids[0] = 6;
|
||||
result_ids[1] = 19;
|
||||
result_ids[2] = 21;
|
||||
|
||||
find_indices(result_ids, 0, 2, results);
|
||||
//std::sort(results.begin(), results.end());
|
||||
for(auto i : results) {
|
||||
std::cout << i << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
|
||||
const std::string state_dir_path = "/tmp/typesense-data";
|
||||
|
||||
std::vector<field> fields_to_index = {field("title", field_types::STRING)};
|
||||
std::vector<std::string> rank_fields = {"points"};
|
||||
Store *store = new Store("/tmp/typesense-data");
|
||||
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
collectionManager.init(store);
|
||||
|
||||
Collection *collection = collectionManager.get_collection("collection");
|
||||
if(collection == nullptr) {
|
||||
collection = collectionManager.create_collection("collection", fields_to_index, {}, rank_fields);
|
||||
std::ifstream infile(std::string(ROOT_DIR)+"test/documents.jsonl");
|
||||
//std::ifstream infile(argv[1]);
|
||||
std::vector<field> fields_to_index = {
|
||||
field("lang", field_types::STRING),
|
||||
field("description", field_types::STRING),
|
||||
field("topics", field_types::STRING_ARRAY),
|
||||
field("stars", field_types::INT32),
|
||||
field("repo_name", field_types::STRING),
|
||||
field("org", field_types::STRING)
|
||||
};
|
||||
|
||||
std::vector<field> facet_fields_index = {
|
||||
field("lang", field_types::STRING),
|
||||
field("org", field_types::STRING),
|
||||
field("topics", field_types::STRING_ARRAY)
|
||||
};
|
||||
|
||||
std::vector<field> sort_fields = {
|
||||
field("stars", "INT32")
|
||||
};
|
||||
|
||||
Collection *collection = collectionManager.get_collection("github_top1k");
|
||||
|
||||
if(collection == nullptr) {
|
||||
collection = collectionManager.create_collection("github_top1k", fields_to_index, facet_fields_index, sort_fields);
|
||||
}
|
||||
|
||||
int j = 0;
|
||||
while(j < 1000) {
|
||||
j++;
|
||||
|
||||
std::ifstream infile(argv[1]);
|
||||
std::string json_line;
|
||||
|
||||
cout << "BEGINNING Iteration: " << j << endl << flush;
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
int doc_id = 0;
|
||||
|
||||
while (std::getline(infile, json_line)) {
|
||||
collection->add(json_line);
|
||||
nlohmann::json document = nlohmann::json::parse(json_line);
|
||||
//document["id"] = std::to_string(doc_id);
|
||||
document["id"] = document["org"].get<std::string>() + ":" + document["repo_name"].get<std::string>();
|
||||
collection->add(document.dump());
|
||||
doc_id++;
|
||||
}
|
||||
|
||||
infile.close();
|
||||
cout << "FINISHED INDEXING!" << endl << flush;
|
||||
|
||||
long long int timeMillis =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
|
||||
std::cout << "Time taken for insertion: " << timeMillis << "ms" << std::endl;
|
||||
begin = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::ifstream infile2(argv[1]);
|
||||
|
||||
doc_id = 0;
|
||||
|
||||
while (std::getline(infile2, json_line)) {
|
||||
nlohmann::json document = nlohmann::json::parse(json_line);
|
||||
//document["id"] = std::to_string(doc_id);
|
||||
document["id"] = document["org"].get<std::string>() + ":" + document["repo_name"].get<std::string>();
|
||||
collection->remove(document["id"]);
|
||||
doc_id++;
|
||||
}
|
||||
|
||||
infile2.close();
|
||||
|
||||
timeMillis =
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
|
||||
struct rusage r_usage;
|
||||
getrusage(RUSAGE_SELF,&r_usage);
|
||||
std::cout << "Memory usage: " << r_usage.ru_maxrss << std::endl;
|
||||
std::cout << "Time taken for deletion: " << timeMillis << "ms" << std::endl;
|
||||
}
|
||||
|
||||
//collection->remove("foo");
|
||||
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
std::vector<std::string> search_fields = {"title"};
|
||||
collection->search("the", search_fields, "", {}, {"points"}, 1, 100, MAX_SCORE, 0);
|
||||
long long int timeMillis =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
cout << "Time taken: " << timeMillis << "us" << endl;
|
||||
delete collection;
|
||||
delete store;
|
||||
return 0;
|
||||
}
|
@ -1,264 +0,0 @@
|
||||
#define H2O_USE_LIBUV 0
|
||||
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <netinet/in.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include "string_utils.h"
|
||||
#include "collection.h"
|
||||
#include "collection_manager.h"
|
||||
#include "option.h"
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include "h2o.h"
|
||||
#include "h2o/http1.h"
|
||||
#include "h2o/http2.h"
|
||||
#include "h2o/memcached.h"
|
||||
|
||||
static h2o_globalconf_t config;
|
||||
static h2o_context_t ctx;
|
||||
static h2o_accept_ctx_t accept_ctx;
|
||||
std::vector<field> search_fields = {field("title", field_types::STRING), field("points", field_types::INT32)};
|
||||
std::vector<std::string> rank_fields = {"points"};
|
||||
Collection *collection;
|
||||
|
||||
static h2o_pathconf_t *register_handler(h2o_hostconf_t *hostconf, const char *path,
|
||||
int (*on_req)(h2o_handler_t *, h2o_req_t *)) {
|
||||
h2o_pathconf_t *pathconf = h2o_config_register_path(hostconf, path, 0);
|
||||
h2o_handler_t *handler = h2o_create_handler(pathconf, sizeof(*handler));
|
||||
handler->on_req = on_req;
|
||||
return pathconf;
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> parse_query(const std::string& query) {
|
||||
std::map<std::string, std::string> query_map;
|
||||
std::regex pattern("([\\w+%]+)=([^&]*)");
|
||||
|
||||
auto words_begin = std::sregex_iterator(query.begin(), query.end(), pattern);
|
||||
auto words_end = std::sregex_iterator();
|
||||
|
||||
for (std::sregex_iterator i = words_begin; i != words_end; i++) {
|
||||
std::string key = (*i)[1].str();
|
||||
std::string raw_value = (*i)[2].str();
|
||||
std::string value = StringUtils::url_decode(raw_value);
|
||||
if(query_map.count(value) == 0) {
|
||||
query_map[key] = value;
|
||||
} else {
|
||||
query_map[key] = query_map[key] + "&&" + value;
|
||||
}
|
||||
}
|
||||
|
||||
return query_map;
|
||||
}
|
||||
|
||||
static int get_search(h2o_handler_t *self, h2o_req_t *req) {
|
||||
static h2o_generator_t generator = {NULL, NULL};
|
||||
h2o_iovec_t query = req->query_at != SIZE_MAX ?
|
||||
h2o_iovec_init(req->path.base + req->query_at, req->path.len - req->query_at) :
|
||||
h2o_iovec_init(H2O_STRLIT(""));
|
||||
|
||||
std::string query_str(query.base, query.len);
|
||||
std::map<std::string, std::string> query_map = parse_query(query_str);
|
||||
const char *NUM_TYPOS = "num_typos";
|
||||
const char *PREFIX = "prefix";
|
||||
const char *TOKEN_ORDERING = "token_ordering";
|
||||
const char *FILTERS = "filters";
|
||||
|
||||
if(query_map.count(NUM_TYPOS) == 0) {
|
||||
query_map[NUM_TYPOS] = "2";
|
||||
}
|
||||
|
||||
if(query_map.count(PREFIX) == 0) {
|
||||
query_map[PREFIX] = "false";
|
||||
}
|
||||
|
||||
if(query_map.count(TOKEN_ORDERING) == 0) {
|
||||
query_map[TOKEN_ORDERING] = "FREQUENCY";
|
||||
}
|
||||
|
||||
std::string filter_str = query_map.count(FILTERS) != 0 ? query_map[FILTERS] : "";
|
||||
//std::cout << "filter_str: " << filter_str << std::endl;
|
||||
|
||||
token_ordering token_order = (query_map[TOKEN_ORDERING] == "MAX_SCORE") ? MAX_SCORE : FREQUENCY;
|
||||
|
||||
//printf("Query: %s\n", query_map["q"].c_str());
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
|
||||
std::vector<std::string> search_fields = {"title"};
|
||||
|
||||
nlohmann::json result = collection->search(query_map["q"], search_fields, filter_str, { },
|
||||
{"points"}, std::stoi(query_map[NUM_TYPOS]), 100, token_order, false);
|
||||
std::string json_str = result.dump();
|
||||
//std::cout << "JSON:" << json_str << std::endl;
|
||||
struct rusage r_usage;
|
||||
getrusage(RUSAGE_SELF,&r_usage);
|
||||
|
||||
//std::cout << "Memory usage: " << r_usage.ru_maxrss << std::endl;
|
||||
|
||||
h2o_iovec_t body = h2o_strdup(&req->pool, json_str.c_str(), SIZE_MAX);
|
||||
req->res.status = 200;
|
||||
req->res.reason = "OK";
|
||||
h2o_add_header(&req->pool, &req->res.headers, H2O_TOKEN_CONTENT_TYPE, H2O_STRLIT("application/json; charset=utf-8"));
|
||||
h2o_start_response(req, &generator);
|
||||
h2o_send(req, &body, 1, 1);
|
||||
|
||||
long long int timeMillis = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - begin).count();
|
||||
std::cout << "Time taken: " << timeMillis << "us" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int post_add_document(h2o_handler_t *self, h2o_req_t *req) {
|
||||
std::string document(req->entity.base, req->entity.len);
|
||||
Option<std::string> inserted_id_op = collection->add(document);
|
||||
|
||||
nlohmann::json json_response;
|
||||
static h2o_generator_t generator = {NULL, NULL};
|
||||
|
||||
if(!inserted_id_op.ok()) {
|
||||
req->res.status = 400;
|
||||
req->res.reason = "BAD REQUEST";
|
||||
json_response["message"] = inserted_id_op.error();
|
||||
} else {
|
||||
req->res.status = 201;
|
||||
req->res.reason = "CREATED";
|
||||
json_response["id"] = inserted_id_op.get();
|
||||
}
|
||||
|
||||
h2o_add_header(&req->pool, &req->res.headers, H2O_TOKEN_CONTENT_TYPE, H2O_STRLIT("application/json; charset=utf-8"));
|
||||
h2o_start_response(req, &generator);
|
||||
|
||||
h2o_iovec_t body = h2o_strdup(&req->pool, json_response.dump().c_str(), SIZE_MAX);
|
||||
h2o_send(req, &body, 1, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int delete_remove_document(h2o_handler_t *self, h2o_req_t *req) {
|
||||
h2o_iovec_t query = req->query_at != SIZE_MAX ?
|
||||
h2o_iovec_init(req->path.base + req->query_at, req->path.len - req->query_at) :
|
||||
h2o_iovec_init(H2O_STRLIT(""));
|
||||
|
||||
std::string query_str(query.base, query.len);
|
||||
std::map<std::string, std::string> query_map = parse_query(query_str);
|
||||
|
||||
std::string doc_id = query_map["id"];
|
||||
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
collection->remove(doc_id);
|
||||
long long int time_micro = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
std::chrono::high_resolution_clock::now() - begin).count();
|
||||
std::cout << "Time taken: " << time_micro << "us" << std::endl;
|
||||
|
||||
nlohmann::json json_response;
|
||||
json_response["id"] = doc_id;
|
||||
json_response["status"] = "SUCCESS";
|
||||
|
||||
static h2o_generator_t generator = {NULL, NULL};
|
||||
req->res.status = 200;
|
||||
req->res.reason = "OK";
|
||||
h2o_add_header(&req->pool, &req->res.headers, H2O_TOKEN_CONTENT_TYPE, H2O_STRLIT("application/json; charset=utf-8"));
|
||||
h2o_start_response(req, &generator);
|
||||
h2o_iovec_t body = h2o_strdup(&req->pool, json_response.dump().c_str(), SIZE_MAX);
|
||||
h2o_send(req, &body, 1, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void on_accept(h2o_socket_t *listener, const char *err) {
|
||||
h2o_socket_t *sock;
|
||||
|
||||
if (err != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ((sock = h2o_evloop_socket_accept(listener)) == NULL)
|
||||
return;
|
||||
h2o_accept(&accept_ctx, sock);
|
||||
}
|
||||
|
||||
static int create_listener(void) {
|
||||
struct sockaddr_in addr;
|
||||
int fd, reuseaddr_flag = 1;
|
||||
h2o_socket_t *sock;
|
||||
|
||||
memset(&addr, 0, sizeof(addr));
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_addr.s_addr = htonl(0x7f000001);
|
||||
addr.sin_port = htons(1088);
|
||||
|
||||
if ((fd = socket(AF_INET, SOCK_STREAM, 0)) == -1 ||
|
||||
setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr_flag, sizeof(reuseaddr_flag)) != 0 ||
|
||||
bind(fd, (struct sockaddr *)&addr, sizeof(addr)) != 0 || listen(fd, SOMAXCONN) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
sock = h2o_evloop_socket_create(ctx.loop, fd, H2O_SOCKET_FLAG_DONT_READ);
|
||||
h2o_socket_read_start(sock, on_accept);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void index_documents(std::string path_to_docs) {
|
||||
std::ifstream infile(path_to_docs);
|
||||
// std::ifstream infile(path_to_docs);
|
||||
|
||||
std::string json_line;
|
||||
|
||||
while (std::getline(infile, json_line)) {
|
||||
collection->add(json_line);
|
||||
}
|
||||
|
||||
infile.close();
|
||||
std::cout << "FINISHED INDEXING!" << std::endl << std::flush;
|
||||
struct rusage r_usage;
|
||||
getrusage(RUSAGE_SELF,&r_usage);
|
||||
|
||||
std::cout << "Memory usage: " << r_usage.ru_maxrss << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
|
||||
Store *store = new Store("/tmp/typesense-data");
|
||||
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
collectionManager.init(store);
|
||||
|
||||
collection = collectionManager.get_collection("collection");
|
||||
if(collection == nullptr) {
|
||||
collection = collectionManager.create_collection("collection", search_fields, {}, rank_fields);
|
||||
//index_documents(std::string(ROOT_DIR)+"test/documents.jsonl");
|
||||
if(argc > 1) {
|
||||
index_documents(argv[1]);
|
||||
}
|
||||
}
|
||||
|
||||
h2o_config_init(&config);
|
||||
h2o_hostconf_t *hostconf = h2o_config_register_host(&config, h2o_iovec_init(H2O_STRLIT("default")), 65535);
|
||||
register_handler(hostconf, "/add", post_add_document);
|
||||
register_handler(hostconf, "/delete", delete_remove_document);
|
||||
register_handler(hostconf, "/search", get_search);
|
||||
|
||||
h2o_context_init(&ctx, h2o_evloop_create(), &config);
|
||||
|
||||
accept_ctx.ctx = &ctx;
|
||||
accept_ctx.hosts = config.hosts;
|
||||
|
||||
if (create_listener() != 0) {
|
||||
fprintf(stderr, "failed to listen to 127.0.0.1:1088:%s\n", strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (h2o_evloop_run(ctx.loop) == 0);
|
||||
|
||||
return 0;
|
||||
}
|
25
src/main/typesense_server.cpp
Normal file
25
src/main/typesense_server.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
#include <cmdline.h>
|
||||
#include "http_server.h"
|
||||
#include "api.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
cmdline::parser options;
|
||||
options.add<std::string>("data-dir", 'd', "Directory where data will be stored.", true);
|
||||
options.add<std::string>("listen-address", 'a', "Address to which Typesense server binds.", false, "0.0.0.0");
|
||||
options.add<uint32_t>("listen-port", 'p', "Port on which Typesense server listens.", false, 8080);
|
||||
options.parse_check(argc, argv);
|
||||
|
||||
Store store(options.get<std::string>("data-dir"));
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
collectionManager.init(&store);
|
||||
|
||||
HttpServer server(options.get<std::string>("listen-address"), options.get<uint32_t>("listen-port"));
|
||||
|
||||
server.post("/collection", post_create_collection);
|
||||
server.post("/collection/:collection", post_add_document);
|
||||
server.get("/collection/:collection/search", get_search);
|
||||
server.del("/collection/:collection/:id", del_remove_document);
|
||||
|
||||
server.run();
|
||||
return 0;
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
#include "sorted_array.h"
|
||||
#include "intersection.h"
|
||||
#include "array_utils.h"
|
||||
|
||||
void sorted_array::load(const uint32_t *sorted_array, const uint32_t array_length) {
|
||||
min = sorted_array[0];
|
||||
min = array_length != 0 ? sorted_array[0] : 0;
|
||||
max = array_length > 1 ? sorted_array[array_length-1] : min;
|
||||
|
||||
uint32_t size_required = (uint32_t) (sorted_append_size_required(max, array_length) * FOR_GROWTH_FACTOR);
|
||||
@ -55,6 +55,10 @@ bool sorted_array::contains(uint32_t value) {
|
||||
}
|
||||
|
||||
uint32_t sorted_array::indexOf(uint32_t value) {
|
||||
if(length == 0) {
|
||||
return length;
|
||||
}
|
||||
|
||||
uint32_t actual;
|
||||
uint32_t index = for_lower_bound_search(in, length, value, &actual);
|
||||
if(actual == value) return index;
|
||||
@ -173,7 +177,7 @@ size_t sorted_array::intersect(uint32_t* arr, const size_t arr_length, uint32_t*
|
||||
uint32_t* curr = uncompress();
|
||||
uint32_t* results = new uint32_t[std::min(arr_length, (size_t) length)];
|
||||
|
||||
size_t results_length = Intersection::scalar(arr, arr_length, curr, length, results);
|
||||
size_t results_length = ArrayUtils::and_scalar(arr, arr_length, curr, length, results);
|
||||
delete[] curr;
|
||||
|
||||
*results_out = results;
|
||||
|
@ -12,7 +12,9 @@ protected:
|
||||
Collection *collection1;
|
||||
std::vector<field> search_fields;
|
||||
std::vector<field> facet_fields;
|
||||
std::vector<std::string> rank_fields;
|
||||
std::vector<field> sort_fields_index;
|
||||
|
||||
std::vector<sort_field> sort_fields;
|
||||
|
||||
void setupCollection() {
|
||||
std::string state_dir_path = "/tmp/typesense_test/coll_manager_test_db";
|
||||
@ -24,10 +26,11 @@ protected:
|
||||
|
||||
search_fields = {field("title", field_types::STRING), field("starring", field_types::STRING)};
|
||||
facet_fields = {field("starring", field_types::STRING)};
|
||||
rank_fields = {"points"};
|
||||
sort_fields = { sort_field("points", "DESC") };
|
||||
sort_fields_index = { field("points", "INT32") };
|
||||
|
||||
collection1 = collectionManager.create_collection("collection1", search_fields, facet_fields,
|
||||
rank_fields, "points");
|
||||
sort_fields_index, "points");
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
@ -53,7 +56,7 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
std::vector<std::string> search_fields = {"starring", "title"};
|
||||
std::vector<std::string> facets;
|
||||
|
||||
nlohmann::json results = collection1->search("thomas", search_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = collection1->search("thomas", search_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
spp::sparse_hash_map<std::string, field> schema = collection1->get_schema();
|
||||
@ -70,11 +73,12 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
|
||||
ASSERT_EQ(0, collection1->get_collection_id());
|
||||
ASSERT_EQ(18, collection1->get_next_seq_id());
|
||||
ASSERT_EQ(facet_fields_expected, collection1->get_facet_fields());
|
||||
ASSERT_EQ(rank_fields, collection1->get_rank_fields());
|
||||
ASSERT_EQ(1, collection1->get_sort_fields().size());
|
||||
ASSERT_EQ(sort_fields[0].name, collection1->get_sort_fields()[0].name);
|
||||
ASSERT_EQ(schema.size(), collection1->get_schema().size());
|
||||
ASSERT_EQ("points", collection1->get_token_ordering_field());
|
||||
ASSERT_EQ("points", collection1->get_token_ranking_field());
|
||||
|
||||
results = collection1->search("thomas", search_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = collection1->search("thomas", search_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <collection_manager.h>
|
||||
#include "collection.h"
|
||||
|
||||
@ -11,8 +12,9 @@ protected:
|
||||
std::vector<std::string> query_fields;
|
||||
Store *store;
|
||||
CollectionManager & collectionManager = CollectionManager::get_instance();
|
||||
std::vector<std::string> rank_fields;
|
||||
std::vector<field> facet_fields;
|
||||
std::vector<field> sort_fields_index;
|
||||
std::vector<sort_field> sort_fields;
|
||||
|
||||
void setupCollection() {
|
||||
std::string state_dir_path = "/tmp/typesense_test/collection";
|
||||
@ -27,12 +29,13 @@ protected:
|
||||
|
||||
query_fields = {"title"};
|
||||
facet_fields = { };
|
||||
rank_fields = {"points"};
|
||||
sort_fields = { sort_field("points", "DESC") };
|
||||
sort_fields_index = { field("points", "INT32") };
|
||||
|
||||
collection = collectionManager.get_collection("collection");
|
||||
if(collection == nullptr) {
|
||||
collection = collectionManager.create_collection("collection", search_fields, facet_fields,
|
||||
rank_fields, "points");
|
||||
sort_fields_index, "points");
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
@ -60,7 +63,7 @@ protected:
|
||||
|
||||
TEST_F(CollectionTest, ExactSearchShouldBeStable) {
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = collection->search("the", query_fields, "", facets, rank_fields, 0, 10);
|
||||
nlohmann::json results = collection->search("the", query_fields, "", facets, sort_fields, 0, 10);
|
||||
ASSERT_EQ(7, results["hits"].size());
|
||||
ASSERT_EQ(7, results["found"].get<int>());
|
||||
|
||||
@ -73,12 +76,29 @@ TEST_F(CollectionTest, ExactSearchShouldBeStable) {
|
||||
std::string result_id = result["id"];
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// check ASC sorting
|
||||
std::vector<sort_field> sort_fields_asc = { sort_field("points", "ASC") };
|
||||
|
||||
results = collection->search("the", query_fields, "", facets, sort_fields_asc, 0, 10);
|
||||
ASSERT_EQ(7, results["hits"].size());
|
||||
ASSERT_EQ(7, results["found"].get<int>());
|
||||
|
||||
ids = {"16", "13", "10", "8", "6", "foo", "1"};
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string id = ids.at(i);
|
||||
std::string result_id = result["id"];
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, ExactPhraseSearch) {
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = collection->search("rocket launch", query_fields, "", facets, rank_fields, 0, 10);
|
||||
nlohmann::json results = collection->search("rocket launch", query_fields, "", facets, sort_fields, 0, 10);
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ASSERT_EQ(5, results["found"].get<uint32_t>());
|
||||
|
||||
/*
|
||||
Sort by (match, diff, score)
|
||||
@ -98,9 +118,28 @@ TEST_F(CollectionTest, ExactPhraseSearch) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// Check ASC sort order
|
||||
std::vector<sort_field> sort_fields_asc = { sort_field("points", "ASC") };
|
||||
results = collection->search("rocket launch", query_fields, "", facets, sort_fields_asc, 0, 10);
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ASSERT_EQ(5, results["found"].get<uint32_t>());
|
||||
|
||||
ids = {"8", "17", "1", "16", "13"};
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string id = ids.at(i);
|
||||
std::string result_id = result["id"];
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
// Check pagination
|
||||
results = collection->search("rocket launch", query_fields, "", facets, rank_fields, 0, 3);
|
||||
results = collection->search("rocket launch", query_fields, "", facets, sort_fields, 0, 3);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
ASSERT_EQ(4, results["found"].get<uint32_t>());
|
||||
|
||||
ids = {"8", "1", "17", "16", "13"};
|
||||
|
||||
for(size_t i = 0; i < 3; i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string id = ids.at(i);
|
||||
@ -112,7 +151,7 @@ TEST_F(CollectionTest, ExactPhraseSearch) {
|
||||
TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
|
||||
// Tokens that are not found in the index should be skipped
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = collection->search("DoesNotExist from", query_fields, "", facets, rank_fields, 0, 10);
|
||||
nlohmann::json results = collection->search("DoesNotExist from", query_fields, "", facets, sort_fields, 0, 10);
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"2", "17"};
|
||||
@ -125,7 +164,7 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
|
||||
}
|
||||
|
||||
// with non-zero cost
|
||||
results = collection->search("DoesNotExist from", query_fields, "", facets, rank_fields, 1, 10);
|
||||
results = collection->search("DoesNotExist from", query_fields, "", facets, sort_fields, 1, 10);
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
@ -136,7 +175,7 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
|
||||
}
|
||||
|
||||
// with 2 indexed words
|
||||
results = collection->search("from DoesNotExist insTruments", query_fields, "", facets, rank_fields, 1, 10);
|
||||
results = collection->search("from DoesNotExist insTruments", query_fields, "", facets, sort_fields, 1, 10);
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ids = {"2", "17"};
|
||||
|
||||
@ -148,17 +187,17 @@ TEST_F(CollectionTest, SkipUnindexedTokensDuringPhraseSearch) {
|
||||
}
|
||||
|
||||
results.clear();
|
||||
results = collection->search("DoesNotExist1 DoesNotExist2", query_fields, "", facets, rank_fields, 0, 10);
|
||||
results = collection->search("DoesNotExist1 DoesNotExist2", query_fields, "", facets, sort_fields, 0, 10);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
results.clear();
|
||||
results = collection->search("DoesNotExist1 DoesNotExist2", query_fields, "", facets, rank_fields, 2, 10);
|
||||
results = collection->search("DoesNotExist1 DoesNotExist2", query_fields, "", facets, sort_fields, 2, 10);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, PartialPhraseSearch) {
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = collection->search("rocket research", query_fields, "", facets, rank_fields, 0, 10);
|
||||
nlohmann::json results = collection->search("rocket research", query_fields, "", facets, sort_fields, 0, 10);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"1", "8", "16", "17"};
|
||||
@ -173,7 +212,7 @@ TEST_F(CollectionTest, PartialPhraseSearch) {
|
||||
|
||||
TEST_F(CollectionTest, QueryWithTypo) {
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = collection->search("kind biologcal", query_fields, "", facets, rank_fields, 2, 3);
|
||||
nlohmann::json results = collection->search("kind biologcal", query_fields, "", facets, sort_fields, 2, 3);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"19", "20", "21"};
|
||||
@ -186,7 +225,7 @@ TEST_F(CollectionTest, QueryWithTypo) {
|
||||
}
|
||||
|
||||
results.clear();
|
||||
results = collection->search("fer thx", query_fields, "", facets, rank_fields, 1, 3);
|
||||
results = collection->search("fer thx", query_fields, "", facets, sort_fields, 1, 3);
|
||||
ids = {"1", "10", "13"};
|
||||
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
@ -201,7 +240,7 @@ TEST_F(CollectionTest, QueryWithTypo) {
|
||||
|
||||
TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = collection->search("loox", query_fields, "", facets, rank_fields, 1, 2, MAX_SCORE, false);
|
||||
nlohmann::json results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 2, MAX_SCORE, false);
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
std::vector<std::string> ids = {"22", "23"};
|
||||
|
||||
@ -212,7 +251,7 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
results = collection->search("loox", query_fields, "", facets, rank_fields, 1, 3, FREQUENCY, false);
|
||||
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 3, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
ids = {"3", "12", "24"};
|
||||
|
||||
@ -224,19 +263,19 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
|
||||
}
|
||||
|
||||
// Check pagination
|
||||
results = collection->search("loox", query_fields, "", facets, rank_fields, 1, 1, FREQUENCY, false);
|
||||
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 1, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["found"].get<int>());
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
std::string solo_id = results["hits"].at(0)["id"];
|
||||
ASSERT_STREQ("3", solo_id.c_str());
|
||||
|
||||
results = collection->search("loox", query_fields, "", facets, rank_fields, 1, 2, FREQUENCY, false);
|
||||
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 2, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["found"].get<int>());
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
// Check total ordering
|
||||
|
||||
results = collection->search("loox", query_fields, "", facets, rank_fields, 1, 10, FREQUENCY, false);
|
||||
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ids = {"3", "12", "24", "22", "23"};
|
||||
|
||||
@ -247,7 +286,7 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
results = collection->search("loox", query_fields, "", facets, rank_fields, 1, 10, MAX_SCORE, false);
|
||||
results = collection->search("loox", query_fields, "", facets, sort_fields, 1, 10, MAX_SCORE, false);
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ids = {"22", "23", "3", "12", "24"};
|
||||
|
||||
@ -262,8 +301,9 @@ TEST_F(CollectionTest, TypoTokenRankedByScoreAndFrequency) {
|
||||
TEST_F(CollectionTest, TextContainingAnActualTypo) {
|
||||
// A line contains "ISX" but not "what" - need to ensure that correction to "ISS what" happens
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = collection->search("ISX what", query_fields, "", facets, rank_fields, 1, 4, FREQUENCY, false);
|
||||
nlohmann::json results = collection->search("ISX what", query_fields, "", facets, sort_fields, 1, 4, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
ASSERT_EQ(4, results["found"].get<uint32_t>());
|
||||
|
||||
std::vector<std::string> ids = {"19", "6", "21", "8"};
|
||||
|
||||
@ -275,8 +315,9 @@ TEST_F(CollectionTest, TextContainingAnActualTypo) {
|
||||
}
|
||||
|
||||
// Record containing exact token match should appear first
|
||||
results = collection->search("ISX", query_fields, "", facets, rank_fields, 1, 10, FREQUENCY, false);
|
||||
results = collection->search("ISX", query_fields, "", facets, sort_fields, 1, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(8, results["hits"].size());
|
||||
ASSERT_EQ(8, results["found"].get<uint32_t>());
|
||||
|
||||
ids = {"20", "19", "6", "3", "21", "4", "10", "8"};
|
||||
|
||||
@ -290,7 +331,7 @@ TEST_F(CollectionTest, TextContainingAnActualTypo) {
|
||||
|
||||
TEST_F(CollectionTest, PrefixSearching) {
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = collection->search("ex", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, true);
|
||||
nlohmann::json results = collection->search("ex", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, true);
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
std::vector<std::string> ids = {"12", "6"};
|
||||
|
||||
@ -301,7 +342,7 @@ TEST_F(CollectionTest, PrefixSearching) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
results = collection->search("ex", query_fields, "", facets, rank_fields, 0, 10, MAX_SCORE, true);
|
||||
results = collection->search("ex", query_fields, "", facets, sort_fields, 0, 10, MAX_SCORE, true);
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
ids = {"6", "12"};
|
||||
|
||||
@ -311,6 +352,19 @@ TEST_F(CollectionTest, PrefixSearching) {
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
std::cout << "WHAT EX..." << std::endl;
|
||||
|
||||
results = collection->search("what ex", query_fields, "", facets, sort_fields, 0, 10, MAX_SCORE, true);
|
||||
ASSERT_EQ(9, results["hits"].size());
|
||||
ids = {"6", "12", "19", "22", "13", "8", "15", "24", "21"};
|
||||
|
||||
for(size_t i = 0; i < results["hits"].size(); i++) {
|
||||
nlohmann::json result = results["hits"].at(i);
|
||||
std::string result_id = result["id"];
|
||||
std::string id = ids.at(i);
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CollectionTest, MultipleFields) {
|
||||
@ -319,11 +373,10 @@ TEST_F(CollectionTest, MultipleFields) {
|
||||
std::ifstream infile(std::string(ROOT_DIR)+"test/multi_field_documents.jsonl");
|
||||
std::vector<field> fields = {field("title", field_types::STRING), field("starring", field_types::STRING),
|
||||
field("cast", field_types::STRING_ARRAY)};
|
||||
std::vector<std::string> rank_fields = {"points"};
|
||||
|
||||
coll_mul_fields = collectionManager.get_collection("coll_mul_fields");
|
||||
if(coll_mul_fields == nullptr) {
|
||||
coll_mul_fields = collectionManager.create_collection("coll_mul_fields", fields, facet_fields, rank_fields);
|
||||
coll_mul_fields = collectionManager.create_collection("coll_mul_fields", fields, facet_fields, sort_fields_index);
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
@ -336,7 +389,7 @@ TEST_F(CollectionTest, MultipleFields) {
|
||||
|
||||
query_fields = {"title", "starring"};
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = coll_mul_fields->search("Will", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = coll_mul_fields->search("Will", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"3", "2", "1", "0"};
|
||||
@ -351,7 +404,7 @@ TEST_F(CollectionTest, MultipleFields) {
|
||||
// when "starring" takes higher priority than "title"
|
||||
|
||||
query_fields = {"starring", "title"};
|
||||
results = coll_mul_fields->search("thomas", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_mul_fields->search("thomas", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
ids = {"15", "14", "12", "13"};
|
||||
@ -364,11 +417,11 @@ TEST_F(CollectionTest, MultipleFields) {
|
||||
}
|
||||
|
||||
query_fields = {"starring", "title", "cast"};
|
||||
results = coll_mul_fields->search("ben affleck", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_mul_fields->search("ben affleck", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
query_fields = {"cast"};
|
||||
results = coll_mul_fields->search("chris", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_mul_fields->search("chris", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"6", "1", "7"};
|
||||
@ -380,7 +433,7 @@ TEST_F(CollectionTest, MultipleFields) {
|
||||
}
|
||||
|
||||
query_fields = {"cast"};
|
||||
results = coll_mul_fields->search("chris pine", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_mul_fields->search("chris pine", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"7", "6", "1"};
|
||||
@ -399,11 +452,12 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING), field("age", field_types::INT32),
|
||||
field("years", field_types::INT32_ARRAY),
|
||||
field("timestamps", field_types::INT64_ARRAY)};
|
||||
std::vector<std::string> rank_fields = {"age"};
|
||||
std::vector<sort_field> sort_fields = { sort_field("age", "DESC") };
|
||||
std::vector<field> sort_fields_index = { field("age", "INT32") };
|
||||
|
||||
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
||||
if(coll_array_fields == nullptr) {
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, rank_fields);
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, sort_fields_index);
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
@ -417,7 +471,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
// Plain search with no filters - results should be sorted by rank fields
|
||||
query_fields = {"name"};
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"3", "1", "4", "0", "2"};
|
||||
@ -430,7 +484,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// Searching on an int32 field
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age:>24", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age:>24", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"3", "1", "4"};
|
||||
@ -442,14 +496,14 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age:>=24", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age:>=24", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age:24", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age:24", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
// Searching a number against an int32 array field
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "years:>2002", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "years:>2002", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"1", "0", "2"};
|
||||
@ -460,7 +514,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "years:<1989", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "years:<1989", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ids = {"3"};
|
||||
@ -472,7 +526,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// multiple filters
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "years:<2005 && years:>1987", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "years:<2005 && years:>1987", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(1, results["hits"].size());
|
||||
|
||||
ids = {"4"};
|
||||
@ -484,7 +538,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// multiple search values (works like SQL's IN operator) against a single int field
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age:[21, 24, 63]", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age:[21, 24, 63]", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
|
||||
ids = {"3", "0", "2"};
|
||||
@ -496,7 +550,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// multiple search values against an int32 array field - also use extra padding between symbols
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "years : [ 2015, 1985 , 1999]", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "years : [ 2015, 1985 , 1999]", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
ids = {"3", "1", "4", "0"};
|
||||
@ -508,7 +562,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// searching on an int64 array field - also ensure that padded space causes no issues
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "timestamps : > 475205222", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "timestamps : > 475205222", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
ids = {"1", "4", "0", "2"};
|
||||
@ -521,7 +575,7 @@ TEST_F(CollectionTest, FilterOnNumericFields) {
|
||||
}
|
||||
|
||||
// when filters don't match any record, no results should be returned
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "timestamps:<1", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "timestamps:<1", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
@ -534,11 +588,13 @@ TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
std::vector<field> fields = {field("name", field_types::STRING), field("age", field_types::INT32),
|
||||
field("years", field_types::INT32_ARRAY),
|
||||
field("tags", field_types::STRING_ARRAY)};
|
||||
std::vector<std::string> rank_fields = {"age"};
|
||||
|
||||
std::vector<field> sort_fields_index = { field("age", "INT32") };
|
||||
std::vector<sort_field> sort_fields = { sort_field("age", "DESC") };
|
||||
|
||||
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
||||
if(coll_array_fields == nullptr) {
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, rank_fields);
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, sort_fields_index);
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
@ -551,7 +607,7 @@ TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
|
||||
query_fields = {"name"};
|
||||
std::vector<std::string> facets;
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "tags: gold", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "tags: gold", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
std::vector<std::string> ids = {"1", "4", "0", "2"};
|
||||
@ -563,7 +619,7 @@ TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
ASSERT_STREQ(id.c_str(), result_id.c_str());
|
||||
}
|
||||
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags : bronze", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags : bronze", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(2, results["hits"].size());
|
||||
|
||||
ids = {"4", "2"};
|
||||
@ -576,7 +632,7 @@ TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
}
|
||||
|
||||
// search with a list of tags, also testing extra padding of space
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags: [bronze, silver]", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags: [bronze, silver]", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(4, results["hits"].size());
|
||||
|
||||
ids = {"3", "4", "0", "2"};
|
||||
@ -589,7 +645,7 @@ TEST_F(CollectionTest, FilterOnTextFields) {
|
||||
}
|
||||
|
||||
// should be exact matches (no normalization or fuzzy searching should happen)
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags: BRONZE", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "tags: BRONZE", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
@ -604,11 +660,13 @@ TEST_F(CollectionTest, HandleBadlyFormedFilterQuery) {
|
||||
field("years", field_types::INT32_ARRAY),
|
||||
field("timestamps", field_types::INT64_ARRAY),
|
||||
field("tags", field_types::STRING_ARRAY)};
|
||||
std::vector<std::string> rank_fields = {"age"};
|
||||
|
||||
std::vector<field> sort_fields_index = { field("age", "INT32") };
|
||||
std::vector<sort_field> sort_fields = { sort_field("age", "DESC") };
|
||||
|
||||
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
||||
if(coll_array_fields == nullptr) {
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, rank_fields);
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, sort_fields_index);
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
@ -623,27 +681,27 @@ TEST_F(CollectionTest, HandleBadlyFormedFilterQuery) {
|
||||
std::vector<std::string> facets;
|
||||
|
||||
// when filter field does not exist in the schema
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "tagzz: gold", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "tagzz: gold", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// searching using a string for a numeric field
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age: abcdef", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age: abcdef", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// searching using a string for a numeric array field
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "timestamps: abcdef", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "timestamps: abcdef", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// malformed k:v syntax
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "timestamps abcdef", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "timestamps abcdef", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// just empty spaces
|
||||
results = coll_array_fields->search("Jeremy", query_fields, " ", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, " ", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
// wrapping number with quotes
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age: '21'", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age: '21'", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
@ -658,11 +716,13 @@ TEST_F(CollectionTest, FacetCounts) {
|
||||
field("timestamps", field_types::INT64_ARRAY),
|
||||
field("tags", field_types::STRING_ARRAY)};
|
||||
facet_fields = {field("tags", field_types::STRING_ARRAY), field("name", field_types::STRING)};
|
||||
std::vector<std::string> rank_fields = {"age"};
|
||||
|
||||
std::vector<field> sort_fields_index = { field("age", "DESC") };
|
||||
std::vector<sort_field> sort_fields = { sort_field("age", "DESC") };
|
||||
|
||||
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
||||
if(coll_array_fields == nullptr) {
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, rank_fields);
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, sort_fields_index);
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
@ -677,27 +737,27 @@ TEST_F(CollectionTest, FacetCounts) {
|
||||
std::vector<std::string> facets = {"tags"};
|
||||
|
||||
// single facet with no filters
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"][0].size());
|
||||
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
|
||||
|
||||
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
ASSERT_EQ(4, (int) results["facet_counts"][0]["counts"][1]["count"]);
|
||||
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ(4, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
|
||||
ASSERT_EQ("silver", results["facet_counts"][0]["counts"][2]["value"]);
|
||||
ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][2]["count"]);
|
||||
ASSERT_EQ("silver", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
ASSERT_EQ(3, (int) results["facet_counts"][0]["counts"][1]["count"]);
|
||||
|
||||
ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][2]["value"]);
|
||||
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][2]["count"]);
|
||||
|
||||
// 2 facets, 1 text filter with no filters
|
||||
facets.clear();
|
||||
facets.push_back("tags");
|
||||
facets.push_back("name");
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
|
||||
ASSERT_EQ(5, results["hits"].size());
|
||||
ASSERT_EQ(2, results["facet_counts"].size());
|
||||
@ -712,19 +772,19 @@ TEST_F(CollectionTest, FacetCounts) {
|
||||
// facet with filters
|
||||
facets.clear();
|
||||
facets.push_back("tags");
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age: >24", facets, rank_fields, 0, 10, FREQUENCY, false);
|
||||
results = coll_array_fields->search("Jeremy", query_fields, "age: >24", facets, sort_fields, 0, 10, FREQUENCY, false);
|
||||
|
||||
ASSERT_EQ(3, results["hits"].size());
|
||||
ASSERT_EQ(1, results["facet_counts"].size());
|
||||
|
||||
ASSERT_EQ("tags", results["facet_counts"][0]["field_name"]);
|
||||
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][0]["count"]);
|
||||
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][1]["count"]);
|
||||
ASSERT_EQ(2, (int) results["facet_counts"][0]["counts"][2]["count"]);
|
||||
ASSERT_EQ(1, (int) results["facet_counts"][0]["counts"][2]["count"]);
|
||||
|
||||
ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
ASSERT_EQ("silver", results["facet_counts"][0]["counts"][2]["value"]);
|
||||
ASSERT_EQ("gold", results["facet_counts"][0]["counts"][0]["value"]);
|
||||
ASSERT_EQ("silver", results["facet_counts"][0]["counts"][1]["value"]);
|
||||
ASSERT_EQ("bronze", results["facet_counts"][0]["counts"][2]["value"]);
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
}
|
||||
@ -739,11 +799,12 @@ TEST_F(CollectionTest, SearchingWithMissingFields) {
|
||||
field("timestamps", field_types::INT64_ARRAY),
|
||||
field("tags", field_types::STRING_ARRAY)};
|
||||
facet_fields = {field("tags", field_types::STRING_ARRAY), field("name", field_types::STRING)};
|
||||
std::vector<std::string> rank_fields = {"age"};
|
||||
std::vector<field> sort_fields_index = { field("age", "DESC") };
|
||||
std::vector<sort_field> sort_fields = { sort_field("age", "DESC") };
|
||||
|
||||
coll_array_fields = collectionManager.get_collection("coll_array_fields");
|
||||
if(coll_array_fields == nullptr) {
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, rank_fields);
|
||||
coll_array_fields = collectionManager.create_collection("coll_array_fields", fields, facet_fields, sort_fields_index);
|
||||
}
|
||||
|
||||
std::string json_line;
|
||||
@ -758,28 +819,28 @@ TEST_F(CollectionTest, SearchingWithMissingFields) {
|
||||
std::vector<std::string> facets;
|
||||
std::vector<std::string> query_fields_not_found = {"titlez"};
|
||||
|
||||
nlohmann::json res = coll_array_fields->search("the", query_fields_not_found, "", facets, rank_fields, 0, 10);
|
||||
nlohmann::json res = coll_array_fields->search("the", query_fields_not_found, "", facets, sort_fields, 0, 10);
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
ASSERT_STREQ("Could not find a search field named `titlez` in the schema.",res["error"].get<std::string>().c_str());
|
||||
|
||||
// when a query field is an integer field
|
||||
res = coll_array_fields->search("the", {"age"}, "", facets, rank_fields, 0, 10);
|
||||
res = coll_array_fields->search("the", {"age"}, "", facets, sort_fields, 0, 10);
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
ASSERT_STREQ("Search field `age` should be a string or a string array.", res["error"].get<std::string>().c_str());
|
||||
|
||||
// when a facet field is not defined in the schema
|
||||
res = coll_array_fields->search("the", {"name"}, "", {"timestamps"}, rank_fields, 0, 10);
|
||||
res = coll_array_fields->search("the", {"name"}, "", {"timestamps"}, sort_fields, 0, 10);
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
ASSERT_STREQ("Could not find a facet field named `timestamps` in the schema.", res["error"].get<std::string>().c_str());
|
||||
|
||||
// when a rank field is not defined in the schema
|
||||
res = coll_array_fields->search("the", {"name"}, "", {}, {"timestamps"}, 0, 10);
|
||||
res = coll_array_fields->search("the", {"name"}, "", {}, { sort_field("timestamps", "ASC") }, 0, 10);
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
ASSERT_STREQ("Could not find a rank field named `timestamps` in the schema.", res["error"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("Could not find a sort field named `timestamps` in the schema.", res["error"].get<std::string>().c_str());
|
||||
|
||||
res = coll_array_fields->search("the", {"name"}, "", {}, {"_rank"}, 0, 10);
|
||||
res = coll_array_fields->search("the", {"name"}, "", {}, { sort_field("_rank", "ASC") }, 0, 10);
|
||||
ASSERT_EQ(0, res["hits"].size());
|
||||
ASSERT_STREQ("Could not find a rank field named `_rank` in the schema.", res["error"].get<std::string>().c_str());
|
||||
ASSERT_STREQ("Could not find a sort field named `_rank` in the schema.", res["error"].get<std::string>().c_str());
|
||||
|
||||
collectionManager.drop_collection("coll_array_fields");
|
||||
}
|
||||
@ -790,12 +851,14 @@ TEST_F(CollectionTest, IndexingWithBadData) {
|
||||
|
||||
std::vector<field> fields = {field("name", field_types::STRING)};
|
||||
facet_fields = {field("tags", field_types::STRING_ARRAY)};
|
||||
std::vector<std::string> rank_fields = {"age", "average"};
|
||||
|
||||
std::vector<field> sort_fields_index = { field("age", "INT32"), field("average", "INT32") };
|
||||
std::vector<sort_field> sort_fields = { sort_field("age", "DESC"), sort_field("average", "DESC") };
|
||||
|
||||
sample_collection = collectionManager.get_collection("sample_collection");
|
||||
if(sample_collection == nullptr) {
|
||||
sample_collection = collectionManager.create_collection("sample_collection", fields, facet_fields,
|
||||
rank_fields, "age");
|
||||
sort_fields_index, "age");
|
||||
}
|
||||
|
||||
const Option<std::string> & search_fields_missing_op1 = sample_collection->add("{\"namezz\": \"foo\", \"age\": 29}");
|
||||
@ -814,10 +877,10 @@ TEST_F(CollectionTest, IndexingWithBadData) {
|
||||
facet_fields_missing_op1.error().c_str());
|
||||
|
||||
const char *doc_str = "{\"name\": \"foo\", \"age\": 34, \"tags\": [\"red\", \"blue\"]}";
|
||||
const Option<std::string> & rank_fields_missing_op1 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(rank_fields_missing_op1.ok());
|
||||
ASSERT_STREQ("Field `average` has been declared as a rank field in the schema, but is not found in the document.",
|
||||
rank_fields_missing_op1.error().c_str());
|
||||
const Option<std::string> & sort_fields_missing_op1 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(sort_fields_missing_op1.ok());
|
||||
ASSERT_STREQ("Field `average` has been declared as a sort field in the schema, but is not found in the document.",
|
||||
sort_fields_missing_op1.error().c_str());
|
||||
|
||||
// Handle type errors
|
||||
|
||||
@ -832,19 +895,25 @@ TEST_F(CollectionTest, IndexingWithBadData) {
|
||||
ASSERT_TRUE(empty_facet_field_op.ok());
|
||||
|
||||
doc_str = "{\"name\": \"foo\", \"age\": \"34\", \"tags\": [], \"average\": 34 }";
|
||||
const Option<std::string> & bad_token_ordering_field_op1 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_token_ordering_field_op1.ok());
|
||||
ASSERT_STREQ("Token ordering field `age` must be an INT32.", bad_token_ordering_field_op1.error().c_str());
|
||||
const Option<std::string> & bad_token_ranking_field_op1 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_token_ranking_field_op1.ok());
|
||||
ASSERT_STREQ("Token ranking field `age` must be an INT32.", bad_token_ranking_field_op1.error().c_str());
|
||||
|
||||
doc_str = "{\"name\": \"foo\", \"age\": 343234324234233234, \"tags\": [], \"average\": 34 }";
|
||||
const Option<std::string> & bad_token_ordering_field_op2 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_token_ordering_field_op2.ok());
|
||||
ASSERT_STREQ("Token ordering field `age` exceeds maximum value of INT32.", bad_token_ordering_field_op2.error().c_str());
|
||||
const Option<std::string> & bad_token_ranking_field_op2 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_token_ranking_field_op2.ok());
|
||||
ASSERT_STREQ("Token ranking field `age` exceeds maximum value of INT32.", bad_token_ranking_field_op2.error().c_str());
|
||||
|
||||
doc_str = "{\"name\": \"foo\", \"tags\": [], \"average\": 34 }";
|
||||
const Option<std::string> & bad_token_ranking_field_op3 = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_token_ranking_field_op3.ok());
|
||||
ASSERT_STREQ("Field `age` has been declared as a token ranking field, but is not found in the document.",
|
||||
bad_token_ranking_field_op3.error().c_str());
|
||||
|
||||
doc_str = "{\"name\": \"foo\", \"age\": 34, \"tags\": [], \"average\": \"34\"}";
|
||||
const Option<std::string> & bad_rank_field_op = sample_collection->add(doc_str);
|
||||
ASSERT_FALSE(bad_rank_field_op.ok());
|
||||
ASSERT_STREQ("Rank field `average` must be an integer.", bad_rank_field_op.error().c_str());
|
||||
ASSERT_STREQ("Sort field `average` must be a number.", bad_rank_field_op.error().c_str());
|
||||
|
||||
collectionManager.drop_collection("sample_collection");
|
||||
}
|
||||
@ -854,13 +923,15 @@ TEST_F(CollectionTest, EmptyIndexShouldNotCrash) {
|
||||
|
||||
std::vector<field> fields = {field("name", field_types::STRING)};
|
||||
facet_fields = {field("tags", field_types::STRING_ARRAY)};
|
||||
std::vector<std::string> rank_fields = {"age", "average"};
|
||||
|
||||
std::vector<field> sort_fields_index = { field("age", "INT32"), field("average", "INT32") };
|
||||
std::vector<sort_field> sort_fields = { sort_field("age", "DESC"), sort_field("average", "DESC") };
|
||||
|
||||
empty_coll = collectionManager.get_collection("empty_coll");
|
||||
if(empty_coll == nullptr) {
|
||||
empty_coll = collectionManager.create_collection("empty_coll", fields, facet_fields, rank_fields, "age");
|
||||
empty_coll = collectionManager.create_collection("empty_coll", fields, facet_fields, sort_fields_index, "age");
|
||||
}
|
||||
|
||||
nlohmann::json results = empty_coll->search("a", {"name"}, "", {}, rank_fields, 0, 10, FREQUENCY, false);
|
||||
nlohmann::json results = empty_coll->search("a", {"name"}, "", {}, sort_fields, 0, 10, FREQUENCY, false);
|
||||
ASSERT_EQ(0, results["hits"].size());
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user