Use unorderd_map for low-volume meta datastructures.

Order of spp:sparse_hash_map during iteration is different in clang and gcc.
This commit is contained in:
Kishore Nallan 2017-12-20 06:45:21 +05:30
parent 8d5f7c18a3
commit 01275c38f2
6 changed files with 29 additions and 21 deletions

View File

@ -3,6 +3,7 @@
#include <string>
#include <vector>
#include <string>
#include <unordered_map>
#include <thread>
#include <mutex>
#include <condition_variable>
@ -35,11 +36,11 @@ private:
std::vector<field> fields;
spp::sparse_hash_map<std::string, field> search_schema;
std::unordered_map<std::string, field> search_schema;
spp::sparse_hash_map<std::string, field> facet_schema;
std::unordered_map<std::string, field> facet_schema;
spp::sparse_hash_map<std::string, field> sort_schema;
std::unordered_map<std::string, field> sort_schema;
Store* store;
@ -87,7 +88,7 @@ public:
std::vector<field> get_fields();
spp::sparse_hash_map<std::string, field> get_schema();
std::unordered_map<std::string, field> get_schema();
std::string get_token_ranking_field();

View File

@ -1,6 +1,7 @@
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include <mutex>
#include <condition_variable>
@ -57,11 +58,11 @@ private:
size_t num_documents;
spp::sparse_hash_map<std::string, field> search_schema;
std::unordered_map<std::string, field> search_schema;
spp::sparse_hash_map<std::string, field> facet_schema;
std::unordered_map<std::string, field> facet_schema;
spp::sparse_hash_map<std::string, field> sort_schema;
std::unordered_map<std::string, field> sort_schema;
spp::sparse_hash_map<std::string, art_tree*> search_index;
@ -125,8 +126,8 @@ private:
public:
Index() = delete;
Index(const std::string name, spp::sparse_hash_map<std::string, field> search_schema,
spp::sparse_hash_map<std::string, field> facet_schema, spp::sparse_hash_map<std::string, field> sort_schema);
Index(const std::string name, std::unordered_map<std::string, field> search_schema,
std::unordered_map<std::string, field> facet_schema, std::unordered_map<std::string, field> sort_schema);
~Index();

View File

@ -6,6 +6,8 @@
#include <match_score.h>
#include <string_utils.h>
#include <art.h>
#include <thread>
#include <chrono>
Collection::Collection(const std::string name, const uint32_t collection_id, const uint32_t next_seq_id, Store *store,
const std::vector<field> &fields, const std::string & token_ranking_field):
@ -417,6 +419,7 @@ Option<nlohmann::json> Collection::search(std::string query, const std::vector<s
index->processed = false;
}
index->cv.notify_one();
//std::this_thread::sleep_for(std::chrono::milliseconds(400));
}
Option<nlohmann::json> index_search_op({}); // stores the last error across all index threads
@ -739,7 +742,7 @@ std::vector<field> Collection::get_fields() {
return fields;
}
spp::sparse_hash_map<std::string, field> Collection::get_schema() {
std::unordered_map<std::string, field> Collection::get_schema() {
return search_schema;
};

View File

@ -2,13 +2,14 @@
#include <numeric>
#include <chrono>
#include <unordered_map>
#include <array_utils.h>
#include <match_score.h>
#include <string_utils.h>
#include <art.h>
Index::Index(const std::string name, spp::sparse_hash_map<std::string, field> search_schema,
spp::sparse_hash_map<std::string, field> facet_schema, spp::sparse_hash_map<std::string, field> sort_schema):
Index::Index(const std::string name, std::unordered_map<std::string, field> search_schema,
std::unordered_map<std::string, field> facet_schema, std::unordered_map<std::string, field> sort_schema):
name(name), search_schema(search_schema), facet_schema(facet_schema), sort_schema(sort_schema) {
for(const auto pair: search_schema) {
@ -635,7 +636,7 @@ void Index::search_field(std::string & query, const std::string & field, uint32_
spp::sparse_hash_map<std::string, std::vector<art_leaf*>> token_cost_cache;
// Used to drop the least occurring token(s) for partial searches
spp::sparse_hash_map<std::string, uint32_t> token_to_count;
std::unordered_map<std::string, uint32_t> token_to_count;
std::vector<std::vector<int>> token_to_costs;
@ -877,10 +878,12 @@ void Index::score_results(const std::vector<sort_by> & sort_fields, const int &
const number_t & secondary_rank_value = secondary_rank_score * secondary_rank_factor;
topster.add(seq_id, query_index, match_score, primary_rank_value, secondary_rank_value);
/*std::cout << name << ", total_cost: " << total_cost
<< ", words_present: " << mscore.words_present << ", match_score: " << match_score
<< ", primary_rank_score: " << primary_rank_score.intval << ", distance: " << mscore.distance
<< ", seq_id: " << seq_id << std::endl;*/
/*std::ostringstream os;
os << name << ", total_cost: " << (255 - total_cost)
<< ", words_present: " << mscore.words_present << ", match_score: " << match_score
<< ", primary_rank_score: " << primary_rank_score.intval << ", distance: " << (MAX_SEARCH_TOKENS - mscore.distance)
<< ", seq_id: " << seq_id << std::endl;
std::cout << os.str();*/
}
//long long int timeNanos = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - begin).count();

View File

@ -47,7 +47,7 @@ TEST_F(CollectionManagerTest, CollectionCreation) {
collection1 = collectionManager2.get_collection("collection1");
ASSERT_NE(nullptr, collection1);
spp::sparse_hash_map<std::string, field> schema = collection1->get_schema();
std::unordered_map<std::string, field> schema = collection1->get_schema();
std::vector<std::string> facet_fields_expected = {"cast"};
ASSERT_EQ(0, collection1->get_collection_id());
@ -119,7 +119,7 @@ TEST_F(CollectionManagerTest, RestoreRecordsOnRestart) {
nlohmann::json results = collection1->search("thomas", search_fields, "", facets, sort_fields, 0, 10, 1, FREQUENCY, false).get();
ASSERT_EQ(4, results["hits"].size());
spp::sparse_hash_map<std::string, field> schema = collection1->get_schema();
std::unordered_map<std::string, field> schema = collection1->get_schema();
// create a new collection manager to ensure that it restores the records from the disk backed store
CollectionManager & collectionManager2 = CollectionManager::get_instance();

View File

@ -1424,10 +1424,10 @@ TEST_F(CollectionTest, IndexingWithBadData) {
const Option<std::string> & search_fields_missing_op1 = sample_collection->add("{\"namezz\": \"foo\", \"age\": 29, \"average\": 78}");
ASSERT_FALSE(search_fields_missing_op1.ok());
ASSERT_STREQ("Field `name` has been declared in the schema, but is not found in the document.",
ASSERT_STREQ("Field `tags` has been declared in the schema, but is not found in the document.",
search_fields_missing_op1.error().c_str());
const Option<std::string> & search_fields_missing_op2 = sample_collection->add("{\"namez\": \"foo\", \"age\": 34, \"average\": 78}");
const Option<std::string> & search_fields_missing_op2 = sample_collection->add("{\"namez\": \"foo\", \"tags\": [], \"age\": 34, \"average\": 78}");
ASSERT_FALSE(search_fields_missing_op2.ok());
ASSERT_STREQ("Field `name` has been declared in the schema, but is not found in the document.",
search_fields_missing_op2.error().c_str());