2020-07-21 17:36:27 +05:30

110 lines
2.7 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Compact Variable Trie
================================================================================================================
ates, at, as, bet, to, tk
[ * ]
|
a b t
\
s t etØ k o
/ / /
Ø esØ Ø Ø Ø
BASIC DESIGN
============
* All siblings in the same block.
* Pointer to ONLY FIRST child node of each sibling.
* Each sibling node's children represented by their character/byte
* For root there are no siblings, so pointer only to `a` child.
ROOT -> [0|0|PTRA][3][a][b][t]
PTRA -> [0|PTRS][2|PTRE][4|PTRK[2][s][t][2][e][t][2][k][o]
PTRS -> [0|PTRØ][1|PTRT][1][Ø][2][e][Ø]
PTRT -> [0|L_PTRE][3|PTRØ][3][e][s][Ø][1][Ø] (path compression)
PTRØ -> [0|LEAF]
[TYPE+OFFSET][PTR_1]..[NUM_CHILDREN][A][B]..[X]
2 bytes for type+offset
6 bytes for address
1 byte for num_children
x bytes for bytes
Actual offset to the Nth node's children: (8*NUM_CHILDREN) + N + offset
if num_children >= 32:
Use bitset to represent children present
Read 32 bytes and do bitset operations to extract matched index
else:
Use array to represent children
Read `num_children` bytes and do sequential search
Removal of [be]
1. Realloc contents of PTR1 by removing "e" from the nodes list
2. Free PTR3
3. Realloc contents of ROOT by removing "b" from the nodes list
*/
#include <cstdint>
#include <cstddef>
class CVTrie {
private:
size_t size;
// [TYPE+OFFSET][PTR_1]... (8 bytes each)
// [NUM_PREFIX][A][B]..[X] (upto 33 bytes)
uint8_t* root;
const uintptr_t PTR_MASK = ~(1ULL << 48ULL);
public:
uint8_t* get_ptr(const uint8_t* tagged_ptr) {
// Unfortunately, right shifting of signed integer for sign extension is implementation-defined
// but works on all major compilers
return (uint8_t*)( (intptr_t)((uintptr_t)tagged_ptr << 16ULL) >> 16ULL );
}
uint8_t* tag_ptr(const uint8_t* ptr, const uint64_t data) {
return (uint8_t*)(((uintptr_t)ptr & PTR_MASK) | (data << 48ULL));
}
uint16_t get_data(const uint16_t* ptr) {
return (uintptr_t)(ptr) >> 56ULL;;
}
void add(const unsigned char* key, const size_t length, void* value) {
// If the key exists, augment the node, otherwise insert a new node
size_t num_siblings = 1;
uint8_t* buf = root;
size_t key_index = 0;
if(root == nullptr) {
// trie is empty
root = new uint8_t[8+1+1];
}
while(true) {
// for each sibling
for(auto sindex = 0; sindex < num_siblings; sindex++) {
unsigned char c = key[key_index];
}
}
}
};