typesense/src/main.cpp
Kishore Nallan ba33da1d51 Lots of code clean up.
* Move stuff out of main to classes
* Standardize naming conventions.
2016-08-07 14:55:26 -07:00

44 lines
1.3 KiB
C++

#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <vector>
#include <numeric>
#include <art.h>
#include <unordered_map>
#include "string_utils.h"
#include "crow_all.h"
#include "search_index.h"
using namespace std;
int main() {
SearchIndex *index = new SearchIndex();
//std::ifstream infile("/Users/kishore/others/wreally/typesense/test/documents.txt");
std::ifstream infile("/Users/kishore/Downloads/hnstories.tsv");
std::string line;
uint32_t doc_id = 1;
while (std::getline(infile, line)) {
vector<string> parts;
StringUtils::tokenize(line, parts, "\t", true);
line = StringUtils::replace_all(line, "\"", "");
vector<string> tokens;
StringUtils::tokenize(parts[0], tokens, " ", true);
if(parts.size() != 2) continue;
index->add(doc_id, tokens, stoi(parts[1]));
doc_id++;
}
cout << "FINISHED INDEXING!" << endl << flush;
auto begin = std::chrono::high_resolution_clock::now();
index->search("thei rserch", 100);
long long int timeMillis = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - begin).count();
cout << "Time taken: " << timeMillis << "us" << endl;
delete index;
return 0;
}