|
Data Structures |
class | bvector |
| bitvector with runtime compression of bits. More...
|
struct | gap_len_table |
| Default GAP lengths table. More...
|
struct | gap_len_table_min |
| Alternative GAP lengths table. Good for for memory saver mode and very sparse bitsets. More...
|
struct | bit_count_table |
| Structure to aid in counting bits table contains count of bits in 0-255 diapason of numbers. More...
|
struct | block_set_table |
| Structure keeps all-left/right ON bits masks. More...
|
struct | first_bit_table |
| Structure keeps index of first ON bit for every byte. More...
|
struct | all_set |
| Structure carries pointer on bit block with all bits 1. More...
|
struct | _copyright |
| Internal structure. More...
|
struct | globals |
| Internal structure. More...
|
class | miniset |
| Template class implements memory saving set functionality. More...
|
class | bvmini |
| Mini bitvector used in bvector template to keep block type flags. More...
|
class | bvector_mini |
| Bitvector class with very limited functionality. More...
|
class | encoder |
| Memory encoding. More...
|
class | decoder_base |
| Base class for all decoding functionality. More...
|
class | decoder |
| Class for decoding data from memory buffer. More...
|
class | decoder_little_endian |
| Class for decoding data from memory buffer. More...
|
class | sse2_empty_guard |
| SSE2 reinitialization guard class. More...
|
struct | distance_metric_descriptor |
| Distance metric descriptor, holds metric code and result. More...
|
Typedefs |
typedef bm::miniset< bm::block_allocator,
bm::set_total_blocks > | mem_save_set |
typedef decoder | decoder_big_endian |
| Class for decoding data from memory buffer.
|
typedef unsigned long long | id64_t |
typedef unsigned int | id_t |
typedef unsigned int | word_t |
typedef unsigned short | short_t |
typedef unsigned short | gap_word_t |
typedef id64_t | wordop_t |
Enumerations |
enum | operation { BM_AND = 0,
BM_OR,
BM_SUB,
BM_XOR
} |
| Bit operations enumeration. More...
|
enum | ByteOrder { BigEndian = 0,
LittleEndian = 1
} |
| Byte orders recognized by the library. More...
|
enum | strategy { BM_BIT = 0,
BM_GAP = 1
} |
| Block allocation strategies. More...
|
enum | distance_metric {
COUNT_AND,
COUNT_XOR,
COUNT_OR,
COUNT_SUB_AB,
COUNT_SUB_BA,
COUNT_A,
COUNT_B
} |
| Distance metrics codes defined for vectors A and B. More...
|
Functions |
template<class Alloc, class MS> |
bvector< Alloc, MS > | operator & (const bvector< Alloc, MS > &v1, const bvector< Alloc, MS > &v2) |
template<class Alloc, class MS> |
bvector< Alloc, MS > | operator| (const bvector< Alloc, MS > &v1, const bvector< Alloc > &v2) |
template<class Alloc, class MS> |
bvector< Alloc, MS > | operator^ (const bvector< Alloc, MS > &v1, const bvector< Alloc, MS > &v2) |
template<class Alloc, class MS> |
bvector< Alloc, MS > | operator- (const bvector< Alloc, MS > &v1, const bvector< Alloc, MS > &v2) |
bm::id_t | word_bitcount64 (bm::id64_t w) |
template<typename W> |
void | xor_swap (W &x, W &y) |
| XOR swap two scalar variables.
|
template<typename T> |
int | wordcmp0 (T w1, T w2) |
| Lexicographical comparison of two words as bit strings. Auxiliary implementation for testing and reference purposes.
|
template<typename T> |
int | wordcmp (T a, T b) |
| Lexicographical comparison of two words as bit strings. Auxiliary implementation for testing and reference purposes.
|
template<typename T> |
unsigned | gap_bfind (const T *buf, unsigned pos, unsigned *is_set) |
template<typename T> |
unsigned | gap_test (const T *buf, unsigned pos) |
| Tests if bit = pos is true.
|
template<class T, class F> |
void | for_each_nzblock (T ***root, unsigned size1, unsigned size2, F &f) |
template<class T, class F> |
bool | for_each_nzblock_if (T ***root, unsigned size1, unsigned size2, F &f) |
template<class T, class F> |
void | for_each_block (T ***root, unsigned size1, unsigned size2, F &f) |
template<class T, class F> |
F | bmfor_each (T first, T last, F f) |
template<class T> |
T | sum_arr (T *first, T *last) |
template<typename T> |
unsigned | gap_bit_count (const T *buf) |
| Calculates number of bits ON in GAP buffer.
|
template<typename T> |
unsigned | gap_bit_count_range (const T *buf, T left, T right) |
| Counts 1 bits in GAP buffer in the closed [left, right] diapason.
|
template<typename T> |
int | gapcmp (const T *buf1, const T *buf2) |
| Lexicographical comparison of GAP buffers.
|
template<typename T, class F> |
void | gap_buff_op (T *BMRESTRICT dest, const T *BMRESTRICT vect1, unsigned vect1_mask, const T *BMRESTRICT vect2, unsigned vect2_mask, F f) |
| Abstract operation for GAP buffers. Receives functor F as a template argument.
|
template<typename T> |
unsigned | gap_set_value (unsigned val, T *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) |
| Abstract distance(similarity) operation for GAP buffers. Receives functor F as a template argument Sets or clears bit in the GAP buffer.
|
template<typename T> |
int | gap_find_in_block (const T *buf, unsigned nbit, bm::id_t *prev) |
| Searches for the next 1 bit in the GAP block.
|
void | or_bit_block (unsigned *dest, unsigned bitpos, unsigned bitcount) |
| Sets bits to 1 in the bitblock.
|
void | sub_bit_block (unsigned *dest, unsigned bitpos, unsigned bitcount) |
| SUB (AND NOT) bit interval to 1 in the bitblock.
|
void | xor_bit_block (unsigned *dest, unsigned bitpos, unsigned bitcount) |
| XOR bit interval to 1 in the bitblock.
|
template<typename T> |
void | gap_sub_to_bitset (unsigned *dest, const T *buf) |
| SUB (AND NOT) GAP block to bitblock.
|
template<typename T> |
void | gap_xor_to_bitset (unsigned *dest, const T *buf) |
| XOR GAP block to bitblock.
|
template<typename T> |
void | gap_add_to_bitset (unsigned *dest, const T *buf) |
| Adds(OR) GAP block to bitblock.
|
template<typename T> |
void | gap_and_to_bitset (unsigned *dest, const T *buf) |
| ANDs GAP block to bitblock.
|
template<typename T> |
bm::id_t | gap_bitset_and_count (const unsigned *block, const T *buf) |
| Compute bitcount of bit block AND masked by GAP block.
|
template<typename T> |
bm::id_t | gap_bitset_sub_count (const unsigned *block, const T *buf) |
| Compute bitcount of bit block SUB masked by GAP block.
|
template<typename T> |
bm::id_t | gap_bitset_xor_count (const unsigned *block, const T *buf) |
| Compute bitcount of bit block XOR masked by GAP block.
|
template<typename T> |
bm::id_t | gap_bitset_or_count (const unsigned *block, const T *buf) |
| Compute bitcount of bit block OR masked by GAP block.
|
void | bit_block_set (bm::word_t *BMRESTRICT dst, bm::word_t value) |
| Bitblock memset operation.
|
template<typename T> |
void | gap_convert_to_bitset (unsigned *dest, const T *buf) |
| GAP block to bitblock conversion.
|
template<typename T> |
void | gap_convert_to_bitset (unsigned *dest, const T *buf, unsigned dest_len) |
| GAP block to bitblock conversion.
|
template<typename T> |
unsigned * | gap_convert_to_bitset_smart (unsigned *dest, const T *buf, id_t set_max) |
| Smart GAP block to bitblock conversion.
|
template<typename T> |
unsigned | gap_control_sum (const T *buf) |
| Calculates sum of all words in GAP block. (For debugging purposes).
|
template<class T> |
void | gap_set_all (T *buf, unsigned set_max, unsigned value) |
| Sets all bits to 0 or 1 (GAP).
|
template<class T> |
void | gap_init_range_block (T *buf, unsigned from, unsigned to, unsigned value, unsigned set_max) |
| Init gap block so it has block in it (can be whole block).
|
template<typename T> |
void | gap_invert (T *buf) |
| Inverts all bits in the GAP buffer.
|
template<typename T> |
bool | gap_is_all_zero (const T *buf, unsigned set_max) |
| Temporary inverts all bits in the GAP buffer. Checks if GAP block is all-zero.
|
template<typename T> |
bool | gap_is_all_one (const T *buf, unsigned set_max) |
| Checks if GAP block is all-one.
|
template<typename T> |
unsigned | gap_length (const T *buf) |
| Returs GAP block length.
|
template<typename T> |
unsigned | gap_capacity (const T *buf, const T *glevel_len) |
| Returs GAP block capacity.
|
template<typename T> |
unsigned | gap_limit (const T *buf, const T *glevel_len) |
| Returs GAP block capacity limit.
|
template<typename T> |
unsigned | gap_level (const T *buf) |
| Returs GAP blocks capacity level.
|
template<typename T> |
void | set_gap_level (T *buf, unsigned level) |
| Sets GAP block capacity level.
|
template<typename T> |
int | gap_calc_level (int len, const T *glevel_len) |
| Calculates GAP block capacity level.
|
template<typename T> |
unsigned | gap_free_elements (const T *buf, const T *glevel_len) |
| Returns number of free elements in GAP block array. Difference between GAP block capacity on this level and actual GAP length.
|
template<typename T> |
int | bitcmp (const T *buf1, const T *buf2, unsigned len) |
| Lexicographical comparison of BIT buffers.
|
template<typename T> |
unsigned | bit_convert_to_gap (T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len) |
| Converts bit block to GAP.
|
template<typename D, typename T> |
D | gap_convert_to_arr (D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len) |
| Convert gap block into array of ints corresponding to 1 bits.
|
template<typename T> |
T | bit_convert_to_arr (T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len) |
| Convert bit block into an array of ints corresponding to 1 bits.
|
bm::id_t | bit_block_calc_count (const bm::word_t *block, const bm::word_t *block_end) |
| Bitcount for bit string.
|
bm::id_t | bit_count_change (bm::word_t w) |
bm::id_t | bit_block_calc_count_change (const bm::word_t *block, const bm::word_t *block_end) |
bm::id_t | bit_block_calc_count_range (const bm::word_t *block, bm::word_t left, bm::word_t right) |
template<typename T> |
void | bit_invert (T *start, T *end) |
bool | is_bits_one (const bm::wordop_t *start, const bm::wordop_t *end) |
| Returns "true" if all bits in the block are 1.
|
bool | bit_is_all_zero (const bm::wordop_t *start, const bm::wordop_t *end) |
| Returns "true" if all bits in the block are 0.
|
unsigned | and_op (unsigned v1, unsigned v2) |
| GAP and functor.
|
unsigned | xor_op (unsigned v1, unsigned v2) |
| GAP xor functor.
|
gap_word_t * | gap_operation_and (const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf) |
| GAP AND operation.
|
gap_word_t * | gap_operation_xor (const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf) |
| GAP XOR operation.
|
gap_word_t * | gap_operation_or (const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf) |
| GAP OR operation.
|
gap_word_t * | gap_operation_sub (const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf) |
| GAP SUB (AND NOT) operation.
|
void | bit_block_copy (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| Bitblock copy operation.
|
void | bit_block_and (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| Plain bitblock AND operation. Function does not analyse availability of source and destination blocks.
|
unsigned | bit_block_and_count (const bm::word_t *src1, const bm::word_t *src1_end, const bm::word_t *src2) |
| Function ANDs two bitblocks and computes the bitcount. Function does not analyse availability of source blocks.
|
unsigned | bit_block_xor_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| Function XORs two bitblocks and computes the bitcount. Function does not analyse availability of source blocks.
|
unsigned | bit_block_sub_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| Function SUBs two bitblocks and computes the bitcount. Function does not analyse availability of source blocks.
|
unsigned | bit_block_or_count (const bm::word_t *src1, const bm::word_t *src1_end, const bm::word_t *src2) |
| Function ORs two bitblocks and computes the bitcount. Function does not analyse availability of source blocks.
|
bm::word_t * | bit_operation_and (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| bitblock AND operation.
|
bm::id_t | bit_operation_and_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| Performs bitblock AND operation and calculates bitcount of the result.
|
bm::id_t | bit_operation_sub_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| Performs bitblock SUB operation and calculates bitcount of the result.
|
bm::id_t | bit_operation_or_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| Performs bitblock OR operation and calculates bitcount of the result.
|
void | bit_block_or (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
|
bm::word_t * | bit_operation_or (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| Block OR operation. Makes analysis if block is 0 or FULL.
|
void | bit_block_sub (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destination blocks.
|
bm::word_t * | bit_operation_sub (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| bitblock SUB operation.
|
void | bit_block_xor (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks.
|
bm::word_t * | bit_operation_xor (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| bitblock XOR operation.
|
bm::id_t | bit_operation_xor_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| Performs bitblock XOR operation and calculates bitcount of the result.
|
void | bit_find_head_tail (const bm::word_t *data, unsigned *head_idx, unsigned *tail_idx) |
| Inspects bit block for zero words at the head and at the end.
|
int | bit_find_in_block (const bm::word_t *data, unsigned nbit, bm::id_t *prev) |
| Searches for the next 1 bit in the BIT block.
|
template<typename T, typename B> |
unsigned | bit_list (T w, B *bits) |
| Unpacks word into list of ON bit indexes.
|
template<typename T> |
unsigned | gap_overhead (const T *length, const T *length_end, const T *glevel_len) |
| Calculates memory overhead for number of gap blocks sharing the same memory allocation table (level lengths table).
|
template<typename T> |
bool | improve_gap_levels (const T *length, const T *length_end, T *glevel_len) |
| Finds optimal gap blocks lengths.
|
BMFORCEINLINE void | sse2_xor_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) |
| XOR array elements to specified mask dst = *src ^ mask.
|
BMFORCEINLINE void | sse2_andnot_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) |
| Inverts array elements and NOT them to specified mask dst = ~*src & mask.
|
BMFORCEINLINE void | sse2_and_arr (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| AND array elements against another array dst &= *src.
|
BMFORCEINLINE void | sse2_or_arr (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| OR array elements against another array dst |= *src.
|
BMFORCEINLINE void | sse2_xor_arr (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| OR array elements against another array dst |= *src.
|
BMFORCEINLINE void | sse2_sub_arr (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| AND-NOT (SUB) array elements against another array dst &= ~*src.
|
BMFORCEINLINE void | sse2_set_block (__m128i *BMRESTRICT dst, __m128i *BMRESTRICT dst_end, bm::word_t value) |
| SSE2 block memset dst = value.
|
BMFORCEINLINE void | sse2_copy_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| SSE2 block copy dst = *src.
|
BMFORCEINLINE void | sse2_invert_arr (bm::word_t *first, bm::word_t *last) |
| Invert array elements dst = ~*dst or dst ^= *dst.
|
bm::id_t | sse2_bit_count (const __m128i *block, const __m128i *block_end) |
BMFORCEINLINE __m128i | sse2_and (__m128i a, __m128i b) |
BMFORCEINLINE __m128i | sse2_or (__m128i a, __m128i b) |
BMFORCEINLINE __m128i | sse2_xor (__m128i a, __m128i b) |
BMFORCEINLINE __m128i | sse2_sub (__m128i a, __m128i b) |
template<class Func> |
bm::id_t | sse2_bit_count_op (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT block_end, const __m128i *BMRESTRICT mask_block, Func sse2_func) |
void | combine_count_operation_with_block (const bm::word_t *blk, unsigned gap, const bm::word_t *arg_blk, int arg_gap, bm::word_t *temp_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) |
| Internal function computes different distance metrics.
|
template<class BV> |
void | distance_operation (const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) |
| Distance computing template function.
|
template<class BV> |
bm::id_t | count_and (const BV &bv1, const BV &bv2) |
| Computes bitcount of AND operation of two bitsets.
|
template<class BV> |
bm::id_t | count_xor (const BV &bv1, const BV &bv2) |
| Computes bitcount of XOR operation of two bitsets.
|
template<class BV> |
bm::id_t | count_sub (const BV &bv1, const BV &bv2) |
| Computes bitcount of SUB operation of two bitsets.
|
template<class BV> |
bm::id_t | count_or (const BV &bv1, const BV &bv2) |
| Computes bitcount of OR operation of two bitsets.
|
template<class It> |
It | block_range_scan (It first, It last, unsigned nblock, unsigned *max_id) |
| Internal algorithms scans the input for the block range limit.
|
template<class BV, class It> |
void | combine_or (BV &bv, It first, It last) |
| OR Combine bitvector and the iterable sequence.
|
template<class BV, class It> |
void | combine_xor (BV &bv, It first, It last) |
| XOR Combine bitvector and the iterable sequence.
|
template<class BV, class It> |
void | combine_sub (BV &bv, It first, It last) |
| SUB Combine bitvector and the iterable sequence.
|
template<class BV, class It> |
void | combine_and (BV &bv, It first, It last) |
| AND Combine bitvector and the iterable sequence.
|
template<class BV> |
bm::id_t | count_intervals (const BV &bv) |
| Compute number of bit intervals (GAPs) in the bitvector.
|
template<class BV, class It> |
void | export_array (BV &bv, It first, It last) |
| Export bitset from an array of binary data representing the bit vector.
|
Variables |
const unsigned | id_max = 0xFFFFFFFF |
const unsigned | set_block_size = 2048u |
const unsigned | set_block_shift = 16u |
const unsigned | set_block_mask = 0xFFFFu |
const unsigned | set_blkblk_mask = 0xFFFFFFu |
const unsigned | set_word_shift = 5u |
const unsigned | set_word_mask = 0x1Fu |
const unsigned | gap_max_buff_len = 1280 |
const unsigned | gap_max_bits = 65536 |
const unsigned | gap_equiv_len |
const unsigned | gap_levels = 4 |
const unsigned | gap_max_level = bm::gap_levels - 1 |
const unsigned | set_array_size = 256u |
const unsigned | set_array_shift = 8u |
const unsigned | set_array_mask = 0xFFu |
const unsigned | set_total_blocks = (bm::set_array_size * bm::set_array_size) |
const unsigned | bits_in_block = bm::set_block_size * sizeof(bm::word_t) * 8 |
const unsigned | bits_in_array = bm::bits_in_block * bm::set_array_size |
const id64_t | all_bits_mask = 0xffffffffffffffff |
const unsigned | set_block_size_op = bm::set_block_size / 2 |