00001 #ifndef BMSERIAL__H__INCLUDED__
00002 #define BMSERIAL__H__INCLUDED__
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036 #ifndef BM__H__INCLUDED__
00037 #define BM__H__INCLUDED__
00038
00039 #include "bm.h"
00040
00041 #endif
00042
00043 #ifdef _MSC_VER
00044 #pragma warning( disable : 4311 4312)
00045 #endif
00046
00047
00048
00049 #include "encoding.h"
00050 #include "bmdef.h"
00051 #include "bmfunc.h"
00052 #include "bmtrans.h"
00053 #include "bmalgo_impl.h"
00054 #include "bmutil.h"
00055
00056
00057
00058
00059 namespace bm
00060 {
00061
00062
00063
00064
00065
00066 const unsigned char set_block_end = 0;
00067 const unsigned char set_block_1zero = 1;
00068 const unsigned char set_block_1one = 2;
00069 const unsigned char set_block_8zero = 3;
00070 const unsigned char set_block_8one = 4;
00071 const unsigned char set_block_16zero = 5;
00072 const unsigned char set_block_16one = 6;
00073 const unsigned char set_block_32zero = 7;
00074 const unsigned char set_block_32one = 8;
00075 const unsigned char set_block_azero = 9;
00076 const unsigned char set_block_aone = 10;
00077 const unsigned char set_block_bit = 11;
00078 const unsigned char set_block_sgapbit = 12;
00079 const unsigned char set_block_sgapgap = 13;
00080 const unsigned char set_block_gap = 14;
00081 const unsigned char set_block_gapbit = 15;
00082 const unsigned char set_block_arrbit = 16;
00083 const unsigned char set_block_bit_interval = 17;
00084 const unsigned char set_block_arrgap = 18;
00085 const unsigned char set_block_bit_1bit = 19;
00086 const unsigned char set_block_gap_egamma = 20;
00087 const unsigned char set_block_arrgap_egamma = 21;
00088 const unsigned char set_block_bit_0runs = 22;
00089 const unsigned char set_block_arrgap_egamma_inv = 23;
00090 const unsigned char set_block_arrgap_inv = 24;
00091
00092
00093
00094
00095 enum serialization_header_mask {
00096 BM_HM_DEFAULT = 1,
00097 BM_HM_RESIZE = (1 << 1),
00098 BM_HM_ID_LIST = (1 << 2),
00099 BM_HM_NO_BO = (1 << 3),
00100 BM_HM_NO_GAPL = (1 << 4)
00101 };
00102
00103
00104
00105 #define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO) \
00106 if (nb == 1) \
00107 enc.put_8(B_1ZERO); \
00108 else if (nb < 256) \
00109 { \
00110 enc.put_8(B_8ZERO); \
00111 enc.put_8((unsigned char)nb); \
00112 } \
00113 else if (nb < 65536) \
00114 { \
00115 enc.put_8(B_16ZERO); \
00116 enc.put_16((unsigned short)nb); \
00117 } \
00118 else \
00119 {\
00120 enc.put_8(B_32ZERO); \
00121 enc.put_32(nb); \
00122 }
00123
00124
00125 #define BM_SET_ONE_BLOCKS(x) \
00126 {\
00127 unsigned end_block = i + x; \
00128 for (;i < end_block; ++i) \
00129 bman.set_block_all_set(i); \
00130 } \
00131 --i
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144 template<class BV>
00145 class serializer
00146 {
00147 public:
00148 typedef typename BV::allocator_type allocator_type;
00149 typedef typename BV::blocks_manager_type blocks_manager_type;
00150 public:
00151 serializer(const allocator_type& alloc = allocator_type());
00152 ~serializer();
00153
00154
00155
00156
00157
00158
00159 void set_compression_level(unsigned clevel);
00160
00161
00162
00163
00164 unsigned get_compression_level() const;
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180 unsigned serialize(const BV& bv,
00181 unsigned char* buf, unsigned buf_size);
00182
00183
00184
00185
00186
00187
00188
00189 void gap_length_serialization(bool value);
00190
00191
00192
00193
00194
00195 void byte_order_serialization(bool value);
00196
00197 protected:
00198
00199
00200
00201 void encode_header(const BV& bv, bm::encoder& enc);
00202
00203
00204
00205
00206 void encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
00207
00208
00209
00210
00211 void gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
00212
00213
00214
00215
00216 void gamma_gap_array(const bm::gap_word_t* gap_block,
00217 unsigned arr_len,
00218 bm::encoder& enc,
00219 bool inverted = false);
00220
00221
00222
00223
00224 void encode_bit_interval(const bm::word_t* blk,
00225 bm::encoder& enc,
00226 unsigned size_control);
00227
00228 private:
00229 serializer(const serializer&);
00230 serializer& operator=(const serializer&);
00231
00232 private:
00233
00234 typedef bm::bit_out<bm::encoder> bit_out_type;
00235 typedef bm::gamma_encoder<bm::gap_word_t, bit_out_type> gamma_encoder_func;
00236
00237 private:
00238 allocator_type alloc_;
00239 bool gap_serial_;
00240 bool byte_order_serial_;
00241 bm::word_t* temp_block_;
00242 unsigned compression_level_;
00243 };
00244
00245
00246
00247
00248
00249 template<class DEC>
00250 class deseriaizer_base
00251 {
00252 public:
00253 typedef DEC decoder_type;
00254 protected:
00255 deseriaizer_base(){}
00256
00257
00258 void read_gap_block(decoder_type& decoder,
00259 unsigned block_type,
00260 bm::gap_word_t* dst_block,
00261 bm::gap_word_t& gap_head);
00262 };
00263
00264
00265
00266
00267
00268 template<class BV, class DEC>
00269 class deserializer : protected deseriaizer_base<DEC>
00270 {
00271 public:
00272 typedef BV bvector_type;
00273 typedef typename deseriaizer_base<DEC>::decoder_type decoder_type;
00274
00275 public:
00276 deserializer() : temp_block_(0) {}
00277
00278 unsigned deserialize(bvector_type& bv,
00279 const unsigned char* buf,
00280 bm::word_t* temp_block);
00281 protected:
00282 typedef typename BV::blocks_manager_type blocks_manager_type;
00283 typedef typename BV::allocator_type allocator_type;
00284
00285 protected:
00286 void deserialize_gap(unsigned char btype, decoder_type& dec,
00287 bvector_type& bv, blocks_manager_type& bman,
00288 unsigned i,
00289 bm::word_t* blk);
00290 protected:
00291 bm::gap_word_t gap_temp_block_[bm::gap_equiv_len * 3];
00292 bm::word_t* temp_block_;
00293 };
00294
00295
00296
00297
00298
00299
00300
00301
00302 template<class BV, class SerialIterator>
00303 class iterator_deserializer
00304 {
00305 public:
00306 typedef BV bvector_type;
00307 typedef SerialIterator serial_iterator_type;
00308 public:
00309 static
00310 unsigned deserialize(bvector_type& bv,
00311 serial_iterator_type& sit,
00312 bm::word_t* temp_block,
00313 set_operation op = bm::set_OR);
00314 private:
00315 typedef typename BV::blocks_manager_type blocks_manager_type;
00316
00317
00318 static
00319 void load_id_list(bvector_type& bv,
00320 serial_iterator_type& sit,
00321 unsigned id_count,
00322 bool set_clear);
00323 };
00324
00325
00326
00327
00328
00329
00330
00331 template<class DEC>
00332 class serial_stream_iterator : protected deseriaizer_base<DEC>
00333 {
00334 public:
00335 typedef typename deseriaizer_base<DEC>::decoder_type decoder_type;
00336 public:
00337 serial_stream_iterator(const unsigned char* buf);
00338
00339
00340 unsigned bv_size() const { return bv_size_; }
00341
00342
00343 bool is_eof() const { return end_of_stream_; }
00344
00345
00346 void next();
00347
00348
00349 unsigned get_bit_block(bm::word_t* dst_block,
00350 bm::word_t* tmp_block,
00351 set_operation op);
00352
00353
00354
00355 void get_gap_block(bm::gap_word_t* dst_block);
00356
00357
00358 unsigned dec_size() const { return decoder_.size(); }
00359
00360
00361 decoder_type& decoder() { return decoder_; }
00362
00363
00364
00365
00366 enum iterator_state
00367 {
00368 e_unknown = 0,
00369 e_list_ids,
00370 e_blocks,
00371 e_zero_blocks,
00372 e_one_blocks,
00373 e_bit_block,
00374 e_gap_block
00375
00376 };
00377
00378
00379 iterator_state state() const { return this->state_; }
00380
00381 iterator_state get_state() const { return this->state_; }
00382
00383 unsigned get_id_count() const { return this->id_cnt_; }
00384
00385
00386 bm::id_t get_id() const { return this->last_id_; }
00387
00388
00389 unsigned block_idx() const { return this->block_idx_; }
00390
00391 public:
00392
00393
00394 typedef
00395 unsigned (serial_stream_iterator<DEC>::*get_bit_func_type)
00396 (bm::word_t*,bm::word_t*);
00397
00398 unsigned
00399 get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
00400 unsigned
00401 get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
00402 unsigned
00403 get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
00404 unsigned
00405 get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
00406 unsigned
00407 get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
00408 unsigned
00409 get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
00410 unsigned
00411 get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
00412 unsigned
00413 get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
00414 unsigned
00415 get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
00416 unsigned
00417 get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
00418 unsigned
00419 get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
00420 unsigned
00421 get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
00422 unsigned
00423 get_bit_block_COUNT_B(bm::word_t* dst_block, bm::word_t* tmp_block)
00424 {
00425 return get_bit_block_COUNT(dst_block, tmp_block);
00426 }
00427
00428
00429
00430
00431 unsigned get_arr_bit(bm::word_t* dst_block,
00432 bool clear_target=true);
00433
00434 protected:
00435 get_bit_func_type bit_func_table_[bm::set_END];
00436
00437 decoder_type decoder_;
00438 bool end_of_stream_;
00439 unsigned bv_size_;
00440 iterator_state state_;
00441 unsigned id_cnt_;
00442 bm::id_t last_id_;
00443 gap_word_t glevels_[bm::gap_levels];
00444
00445 unsigned block_type_;
00446 unsigned block_idx_;
00447 unsigned mono_block_cnt_;
00448
00449 gap_word_t gap_head_;
00450 };
00451
00452
00453
00454
00455
00456
00457
00458 template<class BV>
00459 class operation_deserializer
00460 {
00461 public:
00462 typedef BV bvector_type;
00463 public:
00464 static
00465 unsigned deserialize(bvector_type& bv,
00466 const unsigned char* buf,
00467 bm::word_t* temp_block,
00468 set_operation op = bm::set_OR);
00469 private:
00470 typedef
00471 typename BV::blocks_manager_type blocks_manager_type;
00472 typedef
00473 serial_stream_iterator<bm::decoder> serial_stream_current;
00474 typedef
00475 serial_stream_iterator<bm::decoder_big_endian> serial_stream_be;
00476 typedef
00477 serial_stream_iterator<bm::decoder_little_endian> serial_stream_le;
00478
00479 };
00480
00481
00482
00483
00484
00485
00486
00487 template<class BV>
00488 serializer<BV>::serializer(const allocator_type& alloc)
00489 : alloc_(alloc),
00490 gap_serial_(false),
00491 byte_order_serial_(true),
00492 temp_block_(0),
00493 compression_level_(3)
00494 {
00495 temp_block_ = alloc_.alloc_bit_block();
00496 }
00497
00498 template<class BV>
00499 void serializer<BV>::set_compression_level(unsigned clevel)
00500 {
00501 compression_level_ = clevel;
00502 }
00503
00504 template<class BV>
00505 unsigned serializer<BV>::get_compression_level() const
00506 {
00507 return compression_level_;
00508 }
00509
00510 template<class BV>
00511 serializer<BV>::~serializer()
00512 {
00513 alloc_.free_bit_block(temp_block_);
00514 }
00515
00516
00517 template<class BV>
00518 void serializer<BV>::gap_length_serialization(bool value)
00519 {
00520 gap_serial_ = value;
00521 }
00522
00523 template<class BV>
00524 void serializer<BV>::byte_order_serialization(bool value)
00525 {
00526 byte_order_serial_ = value;
00527 }
00528
00529 template<class BV>
00530 void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc)
00531 {
00532 const blocks_manager_type& bman = bv.get_blocks_manager();
00533
00534 unsigned char header_flag = 0;
00535 if (bv.size() == bm::id_max)
00536 header_flag |= BM_HM_DEFAULT;
00537 else
00538 header_flag |= BM_HM_RESIZE;
00539
00540 if (!byte_order_serial_)
00541 header_flag |= BM_HM_NO_BO;
00542
00543 if (!gap_serial_)
00544 header_flag |= BM_HM_NO_GAPL;
00545
00546 enc.put_8(header_flag);
00547
00548 if (byte_order_serial_)
00549 {
00550 ByteOrder bo = globals<true>::byte_order();
00551 enc.put_8((unsigned char)bo);
00552 }
00553
00554
00555 if (gap_serial_)
00556 {
00557 enc.put_16(bman.glen(), bm::gap_levels);
00558 }
00559
00560
00561 if (header_flag & BM_HM_RESIZE)
00562 {
00563 enc.put_32(bv.size());
00564 }
00565
00566 }
00567
00568 template<class BV>
00569 void serializer<BV>::gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc)
00570 {
00571 unsigned len = gap_length(gap_block);
00572
00573
00574 if (len > 6 && (compression_level_ > 3))
00575 {
00576 encoder::position_type enc_pos0 = enc.get_pos();
00577 {
00578 bit_out_type bout(enc);
00579 gamma_encoder_func gamma(bout);
00580
00581 enc.put_8(set_block_gap_egamma);
00582 enc.put_16(gap_block[0]);
00583
00584 for_each_dgap(gap_block, gamma);
00585 }
00586
00587
00588 encoder::position_type enc_pos1 = enc.get_pos();
00589 unsigned gamma_size = enc_pos1 - enc_pos0;
00590 if (gamma_size > (len-1)*sizeof(gap_word_t))
00591 {
00592 enc.set_pos(enc_pos0);
00593 }
00594 else
00595 {
00596 return;
00597 }
00598 }
00599
00600
00601 enc.put_8(set_block_gap);
00602 enc.put_16(gap_block, len-1);
00603 }
00604
00605 template<class BV>
00606 void serializer<BV>::gamma_gap_array(const bm::gap_word_t* gap_array,
00607 unsigned arr_len,
00608 bm::encoder& enc,
00609 bool inverted)
00610 {
00611 if (compression_level_ > 3 && arr_len > 25)
00612 {
00613 encoder::position_type enc_pos0 = enc.get_pos();
00614 {
00615 bit_out_type bout(enc);
00616
00617 enc.put_8(
00618 inverted ? set_block_arrgap_egamma_inv
00619 : set_block_arrgap_egamma);
00620
00621 bout.gamma(arr_len);
00622
00623 gap_word_t prev = gap_array[0];
00624 bout.gamma(prev + 1);
00625
00626 for (unsigned i = 1; i < arr_len; ++i)
00627 {
00628 gap_word_t curr = gap_array[i];
00629 bout.gamma(curr - prev);
00630 prev = curr;
00631 }
00632 }
00633
00634 encoder::position_type enc_pos1 = enc.get_pos();
00635 unsigned gamma_size = enc_pos1 - enc_pos0;
00636 if (gamma_size > (arr_len)*sizeof(gap_word_t))
00637 {
00638 enc.set_pos(enc_pos0);
00639 }
00640 else
00641 {
00642 return;
00643 }
00644 }
00645
00646
00647 enc.put_prefixed_array_16(inverted ? set_block_arrgap_inv : set_block_arrgap,
00648 gap_array, arr_len, true);
00649 }
00650
00651
00652 template<class BV>
00653 void serializer<BV>::encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc)
00654 {
00655 if (compression_level_ > 2)
00656 {
00657 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
00658 gap_word_t arr_len;
00659
00660 unsigned bc = gap_bit_count(gap_block);
00661 if (bc == 1)
00662 {
00663 arr_len = gap_convert_to_arr(gap_temp_block,
00664 gap_block,
00665 bm::gap_equiv_len-10);
00666 BM_ASSERT(arr_len == 1);
00667 enc.put_8(set_block_bit_1bit);
00668 enc.put_16(gap_temp_block[0]);
00669 return;
00670 }
00671
00672 unsigned len = gap_length(gap_block);
00673 bool invert, use_array;
00674 invert = use_array = false;
00675
00676 if (bc < len-1)
00677 {
00678 use_array = true;
00679 }
00680 else
00681 {
00682 unsigned inverted_bc = bm::gap_max_bits - bc;
00683 if (inverted_bc < len-1)
00684 {
00685 use_array = invert = true;
00686 }
00687 }
00688 if (use_array)
00689 {
00690 arr_len = gap_convert_to_arr(gap_temp_block,
00691 gap_block,
00692 bm::gap_equiv_len-10,
00693 invert);
00694 if (arr_len)
00695 {
00696 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
00697 return;
00698 }
00699 }
00700 }
00701
00702 gamma_gap_block(gap_block, enc);
00703 }
00704
00705 template<class BV>
00706 void serializer<BV>::encode_bit_interval(const bm::word_t* blk,
00707 bm::encoder& enc,
00708 unsigned
00709 )
00710 {
00711 enc.put_8(set_block_bit_0runs);
00712 enc.put_8((blk[0]==0) ? 0 : 1);
00713
00714 unsigned i,j;
00715
00716 for (i = 0; i < bm::set_block_size; ++i)
00717 {
00718 if (blk[i] == 0)
00719 {
00720
00721 for (j = i+1; j < bm::set_block_size; ++j)
00722 {
00723 if (blk[j] != 0)
00724 break;
00725 }
00726 enc.put_16(j-i);
00727 i = j - 1;
00728 }
00729 else
00730 {
00731
00732 for (j = i+1; j < bm::set_block_size; ++j)
00733 {
00734 if (blk[j] == 0)
00735 {
00736
00737 if (((j+1 < bm::set_block_size) && blk[j+1]) ||
00738 ((j+2 < bm::set_block_size) && blk[j+2])
00739 )
00740 {
00741 ++j;
00742 continue;
00743 }
00744 break;
00745 }
00746 }
00747 enc.put_16(j-i);
00748
00749 BM_ASSERT(i < j);
00750 enc.put_32(blk + i, j - i);
00751 i = j - 1;
00752 }
00753 }
00754 }
00755
00756
00757 template<class BV>
00758 unsigned serializer<BV>::serialize(const BV& bv,
00759 unsigned char* buf, unsigned buf_size)
00760 {
00761 BM_ASSERT(temp_block_);
00762
00763 const blocks_manager_type& bman = bv.get_blocks_manager();
00764
00765 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
00766
00767 bm::encoder enc(buf, buf_size);
00768 encode_header(bv, enc);
00769
00770 unsigned i,j;
00771
00772
00773
00774 for (i = 0; i < bm::set_total_blocks; ++i)
00775 {
00776 bm::word_t* blk = bman.get_block(i);
00777
00778
00779
00780 bool flag;
00781 flag = bman.is_block_zero(i, blk, false);
00782 if (flag)
00783 {
00784 zero_block:
00785 unsigned next_nb = bman.find_next_nz_block(i+1, false);
00786 if (next_nb == bm::set_total_blocks)
00787 {
00788 enc.put_8(set_block_azero);
00789 return enc.size();
00790 }
00791 unsigned nb = next_nb - i;
00792
00793 if (nb > 1 && nb < 128)
00794 {
00795
00796 unsigned char c = (1 << 7) | nb;
00797 enc.put_8(c);
00798 }
00799 else
00800 {
00801 SER_NEXT_GRP(enc, nb, set_block_1zero,
00802 set_block_8zero,
00803 set_block_16zero,
00804 set_block_32zero)
00805 }
00806 i = next_nb - 1;
00807 continue;
00808 }
00809 else
00810 {
00811 flag = bman.is_block_one(i, blk, false);
00812 if (flag)
00813 {
00814
00815 for(j = i+1; j < bm::set_total_blocks; ++j)
00816 {
00817 bm::word_t* blk_next = bman.get_block(j);
00818 if (flag != bman.is_block_one(j, blk_next, false))
00819 break;
00820 }
00821 if (j == bm::set_total_blocks)
00822 {
00823 enc.put_8(set_block_aone);
00824 break;
00825 }
00826 else
00827 {
00828 unsigned nb = j - i;
00829 SER_NEXT_GRP(enc, nb, set_block_1one,
00830 set_block_8one,
00831 set_block_16one,
00832 set_block_32one)
00833 }
00834 i = j - 1;
00835 continue;
00836 }
00837 }
00838
00839
00840
00841
00842 if (BM_IS_GAP(bman, blk, i))
00843 {
00844 gap_word_t* gblk = BMGAP_PTR(blk);
00845 encode_gap_block(gblk, enc);
00846 continue;
00847 }
00848
00849
00850
00851
00852 {
00853 if (compression_level_ <= 1)
00854 {
00855 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00856 continue;
00857 }
00858
00859
00860 unsigned block_bc = 0;
00861 bm::id_t bit_gaps =
00862 bit_block_calc_count_change(blk, blk + bm::set_block_size,
00863 &block_bc);
00864 unsigned block_bc_inv = bm::gap_max_bits - block_bc;
00865 switch (block_bc)
00866 {
00867 case 1:
00868 {
00869 bm::id_t bit_idx = 0;
00870 bit_find_in_block(blk, bit_idx, &bit_idx);
00871 enc.put_8(set_block_bit_1bit); enc.put_16((short)bit_idx);
00872 continue;
00873 }
00874 case 0: goto zero_block;
00875 }
00876
00877
00878
00879
00880 unsigned arr_block_size = sizeof(gap_word_t) + (block_bc * sizeof(gap_word_t));
00881 unsigned arr_block_size_inv = sizeof(gap_word_t) + (block_bc_inv * sizeof(gap_word_t));
00882 unsigned gap_block_size = sizeof(gap_word_t) + ((bit_gaps+1) * sizeof(gap_word_t));
00883 unsigned interval_block_size;
00884 interval_block_size = bit_count_nonzero_size(blk, bm::set_block_size);
00885 bool inverted = false;
00886
00887 if (arr_block_size_inv < arr_block_size &&
00888 arr_block_size_inv < gap_block_size &&
00889 arr_block_size_inv < interval_block_size)
00890 {
00891 inverted = true;
00892 goto bit_as_array;
00893 }
00894
00895
00896 if ((interval_block_size > arr_block_size) ||
00897 (interval_block_size > gap_block_size))
00898 {
00899 if (gap_block_size < (bm::gap_equiv_len-64) &&
00900 gap_block_size < arr_block_size)
00901 {
00902 unsigned len = bit_convert_to_gap(gap_temp_block,
00903 blk,
00904 bm::gap_max_bits,
00905 bm::gap_equiv_len-64);
00906 if (len)
00907 {
00908 gamma_gap_block(gap_temp_block, enc);
00909 continue;
00910 }
00911 }
00912
00913 if (arr_block_size < ((bm::gap_equiv_len-64) * sizeof(gap_word_t)))
00914 {
00915 bit_as_array:
00916 gap_word_t arr_len;
00917 unsigned mask = inverted ? ~0 : 0;
00918 arr_len = bit_convert_to_arr(gap_temp_block,
00919 blk,
00920 bm::gap_max_bits,
00921 bm::gap_equiv_len-64,
00922 mask);
00923 if (arr_len)
00924 {
00925 gamma_gap_array(gap_temp_block, arr_len, enc, inverted);
00926 continue;
00927 }
00928
00929 }
00930
00931 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00932 continue;
00933 }
00934
00935
00936 if (interval_block_size < arr_block_size &&
00937 interval_block_size < gap_block_size)
00938 {
00939 encode_bit_interval(blk, enc, interval_block_size);
00940 continue;
00941 }
00942
00943 if (gap_block_size < bm::gap_equiv_len &&
00944 gap_block_size < arr_block_size)
00945 {
00946 unsigned len = bit_convert_to_gap(gap_temp_block,
00947 blk,
00948 bm::gap_max_bits,
00949 bm::gap_equiv_len-64);
00950 if (len)
00951 {
00952 gamma_gap_block(gap_temp_block, enc);
00953 continue;
00954 }
00955 }
00956
00957
00958
00959 if (arr_block_size < bm::gap_equiv_len-64)
00960 {
00961 goto bit_as_array;
00962 }
00963
00964
00965 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00966 continue;
00967 }
00968 }
00969
00970 enc.put_8(set_block_end);
00971
00972 unsigned encoded_size = enc.size();
00973 return encoded_size;
00974
00975 }
00976
00977
00978
00979
00980
00981 enum serialization_flags {
00982 BM_NO_BYTE_ORDER = 1,
00983 BM_NO_GAP_LENGTH = (1 << 1)
00984 };
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020
01021
01022
01023
01024
01025
01026
01027 template<class BV>
01028 unsigned serialize(const BV& bv,
01029 unsigned char* buf,
01030 bm::word_t* temp_block,
01031 unsigned serialization_flags = 0)
01032 {
01033 bm::serializer<BV> bv_serial;
01034 if (serialization_flags & BM_NO_BYTE_ORDER)
01035 bv_serial.byte_order_serialization(false);
01036
01037 if (serialization_flags & BM_NO_GAP_LENGTH)
01038 bv_serial.gap_length_serialization(false);
01039 else
01040 bv_serial.gap_length_serialization(true);
01041
01042 bv_serial.set_compression_level(4);
01043
01044 return bv_serial.serialize(bv, buf, 0);
01045 }
01046
01047
01048
01049
01050
01051
01052
01053 template<class BV>
01054 unsigned serialize(BV& bv,
01055 unsigned char* buf,
01056 unsigned serialization_flags=0)
01057 {
01058 bm::serializer<BV> bv_serial;
01059 if (serialization_flags & BM_NO_BYTE_ORDER)
01060 bv_serial.byte_order_serialization(false);
01061
01062 if (serialization_flags & BM_NO_GAP_LENGTH)
01063 bv_serial.gap_length_serialization(false);
01064 else
01065 bv_serial.gap_length_serialization(true);
01066
01067 bv_serial.set_compression_level(4);
01068
01069 return bv_serial.serialize(bv, buf, 0);
01070 }
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089 template<class BV>
01090 unsigned deserialize(BV& bv,
01091 const unsigned char* buf,
01092 bm::word_t* temp_block=0)
01093 {
01094 ByteOrder bo_current = globals<true>::byte_order();
01095
01096 bm::decoder dec(buf);
01097 unsigned char header_flag = dec.get_8();
01098 ByteOrder bo = bo_current;
01099 if (!(header_flag & BM_HM_NO_BO))
01100 {
01101 bo = (bm::ByteOrder) dec.get_8();
01102 }
01103
01104 if (bo_current == bo)
01105 {
01106 deserializer<BV, bm::decoder> deserial;
01107 return deserial.deserialize(bv, buf, temp_block);
01108 }
01109 switch (bo_current)
01110 {
01111 case BigEndian:
01112 {
01113 deserializer<BV, bm::decoder_big_endian> deserial;
01114 return deserial.deserialize(bv, buf, temp_block);
01115 }
01116 case LittleEndian:
01117 {
01118 deserializer<BV, bm::decoder_little_endian> deserial;
01119 return deserial.deserialize(bv, buf, temp_block);
01120 }
01121 default:
01122 BM_ASSERT(0);
01123 };
01124 return 0;
01125 }
01126
01127
01128 template<class DEC>
01129 void deseriaizer_base<DEC>::read_gap_block(decoder_type& decoder,
01130 unsigned block_type,
01131 bm::gap_word_t* dst_block,
01132 bm::gap_word_t& gap_head)
01133 {
01134 typedef bit_in<DEC> bit_in_type;
01135
01136 switch (block_type)
01137 {
01138 case set_block_gap:
01139 {
01140 unsigned len = gap_length(&gap_head);
01141 --len;
01142 *dst_block = gap_head;
01143 decoder.get_16(dst_block+1, len - 1);
01144 dst_block[len] = gap_max_bits - 1;
01145 }
01146 break;
01147 case set_block_bit_1bit:
01148 {
01149 gap_word_t bit_idx = decoder.get_16();
01150 unsigned is_set;
01151
01152 gap_set_value(true, dst_block, bit_idx, &is_set);
01153 }
01154 break;
01155 case set_block_arrgap:
01156 case set_block_arrgap_inv:
01157 {
01158 gap_set_all(dst_block, bm::gap_max_bits, 0);
01159 gap_word_t len = decoder.get_16();
01160
01161 for (gap_word_t k = 0; k < len; ++k)
01162 {
01163 gap_word_t bit_idx = decoder.get_16();
01164 unsigned is_set;
01165
01166 gap_set_value(true, dst_block, bit_idx, &is_set);
01167 }
01168 }
01169 break;
01170 case set_block_arrgap_egamma:
01171 case set_block_arrgap_egamma_inv:
01172 {
01173 bit_in_type bin(decoder);
01174
01175 gap_set_all(dst_block, bm::gap_max_bits, 0);
01176 gap_word_t len = bin.gamma();
01177
01178 gap_word_t prev = 0;
01179 for (gap_word_t k = 0; k < len; ++k)
01180 {
01181 gap_word_t bit_idx = bin.gamma();
01182 if (k == 0) --bit_idx;
01183 bit_idx += prev;
01184 prev = bit_idx;
01185
01186 unsigned is_set;
01187
01188 gap_set_value(true, dst_block, bit_idx, &is_set);
01189 }
01190 }
01191 break;
01192 case set_block_gap_egamma:
01193 {
01194 unsigned len = (gap_head >> 3);
01195 --len;
01196
01197 {
01198 *dst_block = gap_head;
01199 gap_word_t* gap_data_ptr = dst_block + 1;
01200
01201 bit_in_type bin(decoder);
01202 {
01203 gap_word_t gap_sum = 0;
01204 for (unsigned i = 0; i < len; ++i, ++gap_data_ptr)
01205 {
01206 gap_word_t v = bin.gamma();
01207 if (i == 0) --v;
01208 gap_sum += v;
01209 *gap_data_ptr = gap_sum;
01210 }
01211 dst_block[len+1] = bm::gap_max_bits - 1;
01212 }
01213 }
01214
01215 }
01216 break;
01217 default:
01218 BM_ASSERT(0);
01219 }
01220
01221 if (block_type == set_block_arrgap_egamma_inv ||
01222 block_type == set_block_arrgap_inv)
01223 {
01224 gap_invert(dst_block);
01225 }
01226
01227 }
01228
01229
01230 template<class BV, class DEC>
01231 void
01232 deserializer<BV, DEC>::deserialize_gap(unsigned char btype, decoder_type& dec,
01233 bvector_type& bv, blocks_manager_type& bman,
01234 unsigned i,
01235 bm::word_t* blk)
01236 {
01237 typedef bit_in<DEC> bit_in_type;
01238 gap_word_t gap_head;
01239
01240 switch (btype)
01241 {
01242 case set_block_gap:
01243 case set_block_gapbit:
01244 {
01245 gap_word_t gap_head =
01246 sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32();
01247
01248 unsigned len = gap_length(&gap_head);
01249 int level = gap_calc_level(len, bman.glen());
01250 --len;
01251 if (level == -1)
01252 {
01253 *gap_temp_block_ = gap_head;
01254 dec.get_16(gap_temp_block_+1, len - 1);
01255 gap_temp_block_[len] = gap_max_bits - 1;
01256
01257 if (blk == 0)
01258 {
01259 blk = bman.get_allocator().alloc_bit_block();
01260 bman.set_block(i, blk);
01261 gap_convert_to_bitset(blk, gap_temp_block_);
01262 }
01263 else
01264 {
01265 gap_convert_to_bitset(temp_block_,
01266 gap_temp_block_);
01267
01268 bv.combine_operation_with_block(i,
01269 temp_block_,
01270 0,
01271 BM_OR);
01272 }
01273
01274 return;
01275 }
01276
01277 set_gap_level(&gap_head, level);
01278
01279 if (blk == 0)
01280 {
01281 gap_word_t* gap_blk =
01282 bman.get_allocator().alloc_gap_block(level, bman.glen());
01283 gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
01284 *gap_blk_ptr = gap_head;
01285 set_gap_level(gap_blk_ptr, level);
01286 bman.set_block(i, (bm::word_t*)gap_blk);
01287 bman.set_block_gap(i);
01288 for (unsigned k = 1; k < len; ++k)
01289 {
01290 gap_blk[k] = dec.get_16();
01291 }
01292 gap_blk[len] = bm::gap_max_bits - 1;
01293 }
01294 else
01295 {
01296
01297 *gap_temp_block_ = gap_head;
01298 for (unsigned k = 1; k < len; ++k)
01299 {
01300 gap_temp_block_[k] = dec.get_16();
01301 }
01302 gap_temp_block_[len] = bm::gap_max_bits - 1;
01303
01304 bv.combine_operation_with_block(i,
01305 (bm::word_t*)gap_temp_block_,
01306 1,
01307 BM_OR);
01308 }
01309 return;
01310 }
01311 case set_block_arrgap:
01312 {
01313 gap_word_t len = dec.get_16();
01314 int block_type;
01315 unsigned k = 0;
01316 for (; k < len; )
01317 {
01318 bm::word_t* blk = bman.check_allocate_block(i,
01319 true,
01320 BM_GAP,
01321 &block_type,
01322 true
01323 );
01324
01325 if (!blk)
01326 {
01327
01328 for (; k < len; ++k)
01329 dec.get_16();
01330 return;
01331 }
01332
01333 if (block_type == 1)
01334 {
01335 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01336 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01337
01338 for (; k < len; ++k)
01339 {
01340 gap_word_t bit_idx = dec.get_16();
01341 unsigned is_set;
01342 unsigned new_block_len =
01343 gap_set_value(true, gap_blk, bit_idx, &is_set);
01344 if (new_block_len > threshold)
01345 {
01346 bman.extend_gap_block(i, gap_blk);
01347 ++k;
01348 break;
01349 }
01350 }
01351 }
01352 else
01353 {
01354
01355 for(;k < len; ++k)
01356 {
01357 gap_word_t bit_idx = dec.get_16();
01358 or_bit_block(blk, bit_idx, 1);
01359 }
01360 }
01361 }
01362 return;
01363 }
01364 case set_block_arrgap_egamma:
01365 {
01366 bit_in_type bin(dec);
01367 int block_type;
01368
01369 gap_word_t len = bin.gamma();
01370 gap_word_t prev = 0;
01371
01372 unsigned k = 0;
01373 for (; k < len; )
01374 {
01375 bm::word_t* blk = bman.check_allocate_block(i,
01376 true,
01377 BM_GAP,
01378 &block_type,
01379 true
01380 );
01381
01382 if (!blk)
01383 {
01384
01385 for (; k < len; ++k)
01386 bin.gamma();
01387 return;
01388 }
01389
01390 if (block_type == 1)
01391 {
01392 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01393 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01394
01395 for (; k < len; ++k)
01396 {
01397 gap_word_t bit_idx = bin.gamma();
01398 if (k==0) --bit_idx;
01399 bit_idx += prev;
01400 prev = bit_idx;
01401
01402 unsigned is_set;
01403 unsigned new_block_len =
01404 gap_set_value(true, gap_blk, bit_idx, &is_set);
01405 if (new_block_len > threshold)
01406 {
01407 bman.extend_gap_block(i, gap_blk);
01408 ++k;
01409 break;
01410 }
01411 }
01412 }
01413 else
01414 {
01415
01416 for(;k < len; ++k)
01417 {
01418 gap_word_t bit_idx = bin.gamma();
01419 if (k==0) --bit_idx;
01420 bit_idx += prev;
01421 prev = bit_idx;
01422
01423 or_bit_block(blk, bit_idx, 1);
01424 }
01425 }
01426 }
01427 return;
01428
01429 }
01430 case set_block_gap_egamma:
01431 gap_head =
01432 sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32();
01433 case set_block_arrgap_egamma_inv:
01434 case set_block_arrgap_inv:
01435 read_gap_block(dec, btype, gap_temp_block_, gap_head);
01436
01437
01438
01439 bv.combine_operation_with_block(i,
01440 (bm::word_t*)gap_temp_block_,
01441 1,
01442 BM_OR);
01443
01444 return;
01445 default:
01446 BM_ASSERT(0);
01447 }
01448
01449 }
01450
01451
01452 template<class BV, class DEC>
01453 unsigned deserializer<BV, DEC>::deserialize(bvector_type& bv,
01454 const unsigned char* buf,
01455 bm::word_t* temp_block)
01456 {
01457 blocks_manager_type& bman = bv.get_blocks_manager();
01458
01459 bm::wordop_t* tmp_buf =
01460 temp_block ? (bm::wordop_t*) temp_block
01461 : (bm::wordop_t*)bman.check_allocate_tempblock();
01462
01463 temp_block_ = temp_block = (word_t*)tmp_buf;
01464
01465 decoder_type dec(buf);
01466
01467 bv.forget_count();
01468
01469 BM_SET_MMX_GUARD
01470
01471
01472
01473 unsigned char header_flag = dec.get_8();
01474 if (!(header_flag & BM_HM_NO_BO))
01475 {
01476 dec.get_8();
01477 }
01478
01479 if (header_flag & BM_HM_ID_LIST)
01480 {
01481
01482 if (header_flag & BM_HM_RESIZE)
01483 {
01484 unsigned bv_size = dec.get_32();
01485 if (bv_size > bv.size())
01486 {
01487 bv.resize(bv_size);
01488 }
01489 }
01490
01491
01492 for (unsigned cnt = dec.get_32(); cnt; --cnt) {
01493 bm::id_t id = dec.get_32();
01494 bv.set(id);
01495 }
01496
01497 return dec.size()-1;
01498 }
01499
01500 unsigned i;
01501
01502 if (!(header_flag & BM_HM_NO_GAPL))
01503 {
01504 gap_word_t glevels[bm::gap_levels];
01505
01506 for (i = 0; i < bm::gap_levels; ++i)
01507 {
01508 glevels[i] = dec.get_16();
01509 }
01510 }
01511
01512 if (header_flag & (1 << 1))
01513 {
01514 unsigned bv_size = dec.get_32();
01515 if (bv_size > bv.size())
01516 {
01517 bv.resize(bv_size);
01518 }
01519 }
01520
01521 unsigned char btype;
01522 unsigned nb;
01523
01524 for (i = 0; i < bm::set_total_blocks; ++i)
01525 {
01526 btype = dec.get_8();
01527 bm::word_t* blk = bman.get_block(i);
01528
01529
01530 if (btype & (1 << 7))
01531 {
01532 nb = btype & ~(1 << 7);
01533 i += nb-1;
01534 continue;
01535 }
01536
01537 switch (btype)
01538 {
01539 case set_block_azero:
01540 case set_block_end:
01541 i = bm::set_total_blocks;
01542 break;
01543 case set_block_1zero:
01544 continue;
01545 case set_block_8zero:
01546 nb = dec.get_8();
01547 i += nb-1;
01548 continue;
01549 case set_block_16zero:
01550 nb = dec.get_16();
01551 i += nb-1;
01552 continue;
01553 case set_block_32zero:
01554 nb = dec.get_32();
01555 i += nb-1;
01556 continue;
01557 case set_block_aone:
01558 for (;i < bm::set_total_blocks; ++i)
01559 {
01560 bman.set_block_all_set(i);
01561 }
01562 break;
01563 case set_block_1one:
01564 bman.set_block_all_set(i);
01565 continue;
01566 case set_block_8one:
01567 BM_SET_ONE_BLOCKS(dec.get_8());
01568 continue;
01569 case set_block_16one:
01570 BM_SET_ONE_BLOCKS(dec.get_16());
01571 continue;
01572 case set_block_32one:
01573 BM_SET_ONE_BLOCKS(dec.get_32());
01574 continue;
01575 case set_block_bit:
01576 {
01577 if (blk == 0)
01578 {
01579 blk = bman.get_allocator().alloc_bit_block();
01580 bman.set_block(i, blk);
01581 dec.get_32(blk, bm::set_block_size);
01582 continue;
01583 }
01584 dec.get_32(temp_block, bm::set_block_size);
01585 bv.combine_operation_with_block(i,
01586 temp_block,
01587 0, BM_OR);
01588 continue;
01589 }
01590 case set_block_bit_1bit:
01591 {
01592 unsigned bit_idx = dec.get_16();
01593 bit_idx += i * bm::bits_in_block;
01594 bv.set_bit(bit_idx);
01595 continue;
01596 }
01597 case set_block_bit_0runs:
01598 {
01599
01600 bit_block_set(temp_block, 0);
01601
01602 unsigned char run_type = dec.get_8();
01603 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
01604 {
01605 unsigned run_length = dec.get_16();
01606 if (run_type)
01607 {
01608 unsigned run_end = j + run_length;
01609 for (;j < run_end; ++j)
01610 {
01611 BM_ASSERT(j < bm::set_block_size);
01612 temp_block[j] = dec.get_32();
01613 }
01614 }
01615 else
01616 {
01617 j += run_length;
01618 }
01619 }
01620
01621 bv.combine_operation_with_block(i,
01622 temp_block,
01623 0, BM_OR);
01624 continue;
01625 }
01626 case set_block_bit_interval:
01627 {
01628 unsigned head_idx, tail_idx;
01629 head_idx = dec.get_16();
01630 tail_idx = dec.get_16();
01631
01632 if (blk == 0)
01633 {
01634 blk = bman.get_allocator().alloc_bit_block();
01635 bman.set_block(i, blk);
01636 for (unsigned i = 0; i < head_idx; ++i)
01637 {
01638 blk[i] = 0;
01639 }
01640 dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
01641 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
01642 {
01643 blk[j] = 0;
01644 }
01645 continue;
01646 }
01647 bit_block_set(temp_block, 0);
01648 dec.get_32(temp_block + head_idx, tail_idx - head_idx + 1);
01649
01650 bv.combine_operation_with_block(i,
01651 temp_block,
01652 0, BM_OR);
01653 continue;
01654 }
01655 case set_block_gap:
01656 case set_block_gapbit:
01657 case set_block_arrgap:
01658 case set_block_gap_egamma:
01659 case set_block_arrgap_egamma:
01660 case set_block_arrgap_egamma_inv:
01661 case set_block_arrgap_inv:
01662 deserialize_gap(btype, dec, bv, bman, i, blk);
01663 continue;
01664 case set_block_arrbit:
01665 {
01666 gap_word_t len =
01667 sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32();
01668
01669 if (bman.is_block_gap(i))
01670 {
01671
01672
01673
01674 blk = bman.convert_gap2bitset(i);
01675 }
01676 else
01677 {
01678 if (blk == 0)
01679 {
01680 blk = bman.get_allocator().alloc_bit_block();
01681 bit_block_set(blk, 0);
01682 bman.set_block(i, blk);
01683 }
01684 }
01685
01686
01687 for (unsigned k = 0; k < len; ++k)
01688 {
01689 gap_word_t bit_idx = dec.get_16();
01690 or_bit_block(blk, bit_idx, 1);
01691 }
01692 continue;
01693 }
01694 default:
01695 BM_ASSERT(0);
01696 }
01697 }
01698
01699 return dec.size();
01700 }
01701
01702
01703
01704 template<class DEC>
01705 serial_stream_iterator<DEC>::serial_stream_iterator(const unsigned char* buf)
01706 : decoder_(buf),
01707 end_of_stream_(false),
01708 bv_size_(0),
01709 state_(e_unknown),
01710 id_cnt_(0),
01711 block_idx_(0),
01712 mono_block_cnt_(0)
01713 {
01714 ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
01715
01716 bit_func_table_[bm::set_AND] =
01717 &serial_stream_iterator<DEC>::get_bit_block_AND;
01718 bit_func_table_[bm::set_ASSIGN] =
01719 &serial_stream_iterator<DEC>::get_bit_block_ASSIGN;
01720 bit_func_table_[bm::set_OR] =
01721 &serial_stream_iterator<DEC>::get_bit_block_OR;
01722 bit_func_table_[bm::set_SUB] =
01723 &serial_stream_iterator<DEC>::get_bit_block_SUB;
01724 bit_func_table_[bm::set_XOR] =
01725 &serial_stream_iterator<DEC>::get_bit_block_XOR;
01726 bit_func_table_[bm::set_COUNT] =
01727 &serial_stream_iterator<DEC>::get_bit_block_COUNT;
01728 bit_func_table_[bm::set_COUNT_AND] =
01729 &serial_stream_iterator<DEC>::get_bit_block_COUNT_AND;
01730 bit_func_table_[bm::set_COUNT_XOR] =
01731 &serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR;
01732 bit_func_table_[bm::set_COUNT_OR] =
01733 &serial_stream_iterator<DEC>::get_bit_block_COUNT_OR;
01734 bit_func_table_[bm::set_COUNT_SUB_AB] =
01735 &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB;
01736 bit_func_table_[bm::set_COUNT_SUB_BA] =
01737 &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA;
01738 bit_func_table_[bm::set_COUNT_A] =
01739 &serial_stream_iterator<DEC>::get_bit_block_COUNT_A;
01740 bit_func_table_[bm::set_COUNT_B] =
01741 &serial_stream_iterator<DEC>::get_bit_block_COUNT;
01742
01743
01744
01745 unsigned char header_flag = decoder_.get_8();
01746 if (!(header_flag & BM_HM_NO_BO))
01747 {
01748 decoder_.get_8();
01749 }
01750
01751
01752
01753 if (header_flag & BM_HM_ID_LIST)
01754 {
01755
01756 if (header_flag & BM_HM_RESIZE)
01757 {
01758 bv_size_ = decoder_.get_32();
01759 }
01760
01761 state_ = e_list_ids;
01762 id_cnt_ = decoder_.get_32();
01763 next();
01764 }
01765 else
01766 {
01767 if (!(header_flag & BM_HM_NO_GAPL))
01768 {
01769 unsigned i;
01770
01771 for (i = 0; i < bm::gap_levels; ++i)
01772 {
01773 glevels_[i] = decoder_.get_16();
01774 }
01775 }
01776
01777 if (header_flag & (1 << 1))
01778 {
01779 bv_size_ = decoder_.get_32();
01780 }
01781 state_ = e_blocks;
01782 }
01783 }
01784
01785 template<class DEC>
01786 void serial_stream_iterator<DEC>::next()
01787 {
01788 if (is_eof()) return;
01789
01790 switch (state_)
01791 {
01792 case e_list_ids:
01793
01794 if (id_cnt_ == 0)
01795 {
01796 end_of_stream_ = true;
01797 state_ = e_unknown;
01798 break;
01799 }
01800 last_id_ = decoder_.get_32();
01801 --id_cnt_;
01802 break;
01803
01804 case e_blocks:
01805 if (block_idx_ == bm::set_total_blocks)
01806 {
01807 end_of_stream_ = true;
01808 state_ = e_unknown;
01809 break;
01810 }
01811
01812 block_type_ = decoder_.get_8();
01813
01814
01815
01816 if (block_type_ & (1 << 7))
01817 {
01818 mono_block_cnt_ = (block_type_ & ~(1 << 7)) - 1;
01819 state_ = e_zero_blocks;
01820 break;
01821 }
01822
01823 switch (block_type_)
01824 {
01825 case set_block_azero:
01826 case set_block_end:
01827 end_of_stream_ = true; state_ = e_unknown;
01828 break;
01829 case set_block_1zero:
01830 state_ = e_zero_blocks;
01831 mono_block_cnt_ = 0;
01832 break;
01833 case set_block_8zero:
01834 state_ = e_zero_blocks;
01835 mono_block_cnt_ = decoder_.get_8()-1;
01836 break;
01837 case set_block_16zero:
01838 state_ = e_zero_blocks;
01839 mono_block_cnt_ = decoder_.get_16()-1;
01840 break;
01841 case set_block_32zero:
01842 state_ = e_zero_blocks;
01843 mono_block_cnt_ = decoder_.get_32()-1;
01844 break;
01845 case set_block_aone:
01846 state_ = e_one_blocks;
01847 mono_block_cnt_ = bm::set_total_blocks - block_idx_;
01848 break;
01849 case set_block_1one:
01850 state_ = e_one_blocks;
01851 mono_block_cnt_ = 0;
01852 break;
01853 case set_block_8one:
01854 state_ = e_one_blocks;
01855 mono_block_cnt_ = decoder_.get_8()-1;
01856 break;
01857 case set_block_16one:
01858 state_ = e_one_blocks;
01859 mono_block_cnt_ = decoder_.get_16()-1;
01860 break;
01861 case set_block_32one:
01862 state_ = e_one_blocks;
01863 mono_block_cnt_ = decoder_.get_32()-1;
01864 break;
01865
01866 case set_block_bit:
01867 case set_block_bit_interval:
01868 case set_block_bit_0runs:
01869 case set_block_arrbit:
01870 case set_block_bit_1bit:
01871 state_ = e_bit_block;
01872 break;
01873
01874 case set_block_gap:
01875 case set_block_gap_egamma:
01876 gap_head_ =
01877 sizeof(gap_word_t) == 2 ?
01878 decoder_.get_16() : decoder_.get_32();
01879 case set_block_arrgap:
01880 case set_block_arrgap_egamma:
01881 case set_block_arrgap_egamma_inv:
01882 case set_block_arrgap_inv:
01883 state_ = e_gap_block;
01884 break;
01885 case set_block_gapbit:
01886 state_ = e_bit_block;
01887 break;
01888
01889 default:
01890 BM_ASSERT(0);
01891 }
01892
01893 break;
01894
01895 case e_zero_blocks:
01896 case e_one_blocks:
01897 ++block_idx_;
01898 if (!mono_block_cnt_)
01899 {
01900 state_ = e_blocks;
01901 break;
01902 }
01903 --mono_block_cnt_;
01904 break;
01905
01906 case e_unknown:
01907 default:
01908 BM_ASSERT(0);
01909 }
01910 }
01911
01912
01913
01914 template<class DEC>
01915 unsigned
01916 serial_stream_iterator<DEC>::get_bit_block_ASSIGN(
01917 bm::word_t* dst_block,
01918 bm::word_t* )
01919 {
01920 BM_ASSERT(this->state_ == e_bit_block);
01921 unsigned count = 0;
01922
01923 switch (this->block_type_)
01924 {
01925 case set_block_bit:
01926 decoder_.get_32(dst_block, bm::set_block_size);
01927 break;
01928 case set_block_bit_0runs:
01929 {
01930 if (dst_block)
01931 bit_block_set(dst_block, 0);
01932 unsigned char run_type = decoder_.get_8();
01933 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
01934 {
01935 unsigned run_length = decoder_.get_16();
01936 if (run_type)
01937 {
01938 unsigned run_end = j + run_length;
01939 if (dst_block)
01940 {
01941 for (;j < run_end; ++j)
01942 {
01943 BM_ASSERT(j < bm::set_block_size);
01944 dst_block[j] = decoder_.get_32();
01945 }
01946 }
01947 else
01948 {
01949
01950 for (;j < run_end; ++j)
01951 {
01952 decoder_.get_32();
01953 }
01954 }
01955 }
01956 else
01957 {
01958 j += run_length;
01959 }
01960 }
01961 }
01962 break;
01963 case set_block_bit_interval:
01964 {
01965 unsigned head_idx = decoder_.get_16();
01966 unsigned tail_idx = decoder_.get_16();
01967 if (dst_block)
01968 {
01969 for (unsigned i = 0; i < head_idx; ++i)
01970 dst_block[i] = 0;
01971 decoder_.get_32(dst_block + head_idx,
01972 tail_idx - head_idx + 1);
01973 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
01974 dst_block[j] = 0;
01975 }
01976 else
01977 {
01978 decoder_.seek((tail_idx - head_idx + 1) * 4);
01979 }
01980 }
01981 break;
01982 case set_block_arrbit:
01983 case set_block_bit_1bit:
01984 get_arr_bit(dst_block, true );
01985 break;
01986 case set_block_gapbit:
01987 BM_ASSERT(0);
01988 break;
01989 default:
01990 BM_ASSERT(0);
01991 }
01992 return count;
01993 }
01994
01995 template<class DEC>
01996 unsigned
01997 serial_stream_iterator<DEC>::get_bit_block_OR(bm::word_t* dst_block,
01998 bm::word_t* )
01999 {
02000 BM_ASSERT(this->state_ == e_bit_block);
02001 unsigned count = 0;
02002 switch (block_type_)
02003 {
02004 case set_block_bit:
02005 {
02006 bitblock_get_adapter ga(dst_block);
02007 bit_OR<bm::word_t> func;
02008 bitblock_store_adapter sa(dst_block);
02009
02010 bit_recomb(ga,
02011 decoder_,
02012 func,
02013 sa
02014 );
02015 }
02016 break;
02017 case set_block_bit_interval:
02018 {
02019 unsigned head_idx = decoder_.get_16();
02020 unsigned tail_idx = decoder_.get_16();
02021 for (unsigned i = head_idx; i <= tail_idx; ++i)
02022 dst_block[i] |= decoder_.get_32();
02023 }
02024 break;
02025 case set_block_bit_0runs:
02026 {
02027 unsigned char run_type = decoder_.get_8();
02028 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02029 {
02030 unsigned run_length = decoder_.get_16();
02031 if (run_type)
02032 {
02033 unsigned run_end = j + run_length;
02034 for (;j < run_end; ++j)
02035 {
02036 BM_ASSERT(j < bm::set_block_size);
02037 dst_block[j] |= decoder_.get_32();
02038 }
02039 }
02040 else
02041 {
02042 j += run_length;
02043 }
02044 }
02045 }
02046 break;
02047 case set_block_bit_1bit:
02048 case set_block_arrbit:
02049 get_arr_bit(dst_block, false );
02050 break;
02051 default:
02052 BM_ASSERT(0);
02053 }
02054 return count;
02055 }
02056
02057 template<class DEC>
02058 unsigned
02059 serial_stream_iterator<DEC>::get_bit_block_AND(bm::word_t* dst_block,
02060 bm::word_t* tmp_block)
02061 {
02062 BM_ASSERT(this->state_ == e_bit_block);
02063 BM_ASSERT(dst_block != tmp_block);
02064
02065 unsigned count = 0;
02066 switch (block_type_)
02067 {
02068 case set_block_bit:
02069 for (unsigned i = 0; i < bm::set_block_size; ++i)
02070 dst_block[i] &= decoder_.get_32();
02071 break;
02072 case set_block_bit_0runs:
02073 {
02074 unsigned char run_type = decoder_.get_8();
02075 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02076 {
02077 unsigned run_length = decoder_.get_16();
02078 unsigned run_end = j + run_length;
02079 if (run_type)
02080 {
02081 for (;j < run_end; ++j)
02082 {
02083 BM_ASSERT(j < bm::set_block_size);
02084 dst_block[j] &= decoder_.get_32();
02085 }
02086 }
02087 else
02088 {
02089 for (;j < run_end; ++j)
02090 {
02091 BM_ASSERT(j < bm::set_block_size);
02092 dst_block[j] = 0;
02093 }
02094 }
02095 }
02096 }
02097 break;
02098 case set_block_bit_interval:
02099 {
02100 unsigned head_idx = decoder_.get_16();
02101 unsigned tail_idx = decoder_.get_16();
02102 unsigned i;
02103 for ( i = 0; i < head_idx; ++i)
02104 dst_block[i] = 0;
02105 for ( i = head_idx; i <= tail_idx; ++i)
02106 dst_block[i] &= decoder_.get_32();
02107 for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
02108 dst_block[i] = 0;
02109 }
02110 break;
02111 case set_block_bit_1bit:
02112
02113 case set_block_arrbit:
02114 get_arr_bit(tmp_block, true );
02115 if (dst_block)
02116 bit_block_and(dst_block, tmp_block);
02117 break;
02118 default:
02119 BM_ASSERT(0);
02120 }
02121 return count;
02122 }
02123
02124 template<class DEC>
02125 unsigned
02126 serial_stream_iterator<DEC>::get_bit_block_XOR(bm::word_t* dst_block,
02127 bm::word_t* tmp_block)
02128 {
02129 BM_ASSERT(this->state_ == e_bit_block);
02130 BM_ASSERT(dst_block != tmp_block);
02131
02132 unsigned count = 0;
02133 switch (block_type_)
02134 {
02135 case set_block_bit:
02136 for (unsigned i = 0; i < bm::set_block_size; ++i)
02137 dst_block[i] ^= decoder_.get_32();
02138 break;
02139 case set_block_bit_0runs:
02140 {
02141 unsigned char run_type = decoder_.get_8();
02142 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02143 {
02144 unsigned run_length = decoder_.get_16();
02145 if (run_type)
02146 {
02147 unsigned run_end = j + run_length;
02148 for (;j < run_end; ++j)
02149 {
02150 BM_ASSERT(j < bm::set_block_size);
02151 dst_block[j] ^= decoder_.get_32();
02152 }
02153 }
02154 else
02155 {
02156 j += run_length;
02157 }
02158 }
02159 }
02160 break;
02161 case set_block_bit_interval:
02162 {
02163 unsigned head_idx = decoder_.get_16();
02164 unsigned tail_idx = decoder_.get_16();
02165 for (unsigned i = head_idx; i <= tail_idx; ++i)
02166 dst_block[i] ^= decoder_.get_32();
02167 }
02168 break;
02169 case set_block_bit_1bit:
02170
02171 case set_block_arrbit:
02172 get_arr_bit(tmp_block, true );
02173 if (dst_block)
02174 {
02175 bit_block_xor(dst_block, tmp_block);
02176 }
02177 break;
02178 default:
02179 BM_ASSERT(0);
02180 }
02181 return count;
02182 }
02183
02184 template<class DEC>
02185 unsigned
02186 serial_stream_iterator<DEC>::get_bit_block_SUB(bm::word_t* dst_block,
02187 bm::word_t* tmp_block)
02188 {
02189 BM_ASSERT(this->state_ == e_bit_block);
02190 BM_ASSERT(dst_block != tmp_block);
02191
02192 unsigned count = 0;
02193 switch (block_type_)
02194 {
02195 case set_block_bit:
02196 for (unsigned i = 0; i < bm::set_block_size; ++i)
02197 dst_block[i] &= ~decoder_.get_32();
02198 break;
02199 case set_block_bit_0runs:
02200 {
02201 unsigned char run_type = decoder_.get_8();
02202 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02203 {
02204 unsigned run_length = decoder_.get_16();
02205 if (run_type)
02206 {
02207 unsigned run_end = j + run_length;
02208 for (;j < run_end; ++j)
02209 {
02210 BM_ASSERT(j < bm::set_block_size);
02211 dst_block[j] &= ~decoder_.get_32();
02212 }
02213 }
02214 else
02215 {
02216 j += run_length;
02217 }
02218 }
02219 }
02220 break;
02221 case set_block_bit_interval:
02222 {
02223 unsigned head_idx = decoder_.get_16();
02224 unsigned tail_idx = decoder_.get_16();
02225 for (unsigned i = head_idx; i <= tail_idx; ++i)
02226 dst_block[i] &= ~decoder_.get_32();
02227 }
02228 break;
02229 case set_block_bit_1bit:
02230
02231 case set_block_arrbit:
02232 get_arr_bit(tmp_block, true );
02233 if (dst_block)
02234 bit_block_sub(dst_block, tmp_block);
02235 break;
02236 default:
02237 BM_ASSERT(0);
02238 }
02239 return count;
02240 }
02241
02242
02243 template<class DEC>
02244 unsigned
02245 serial_stream_iterator<DEC>::get_bit_block_COUNT(bm::word_t* ,
02246 bm::word_t* )
02247 {
02248 BM_ASSERT(this->state_ == e_bit_block);
02249
02250 unsigned count = 0;
02251 switch (block_type_)
02252 {
02253 case set_block_bit:
02254 for (unsigned i = 0; i < bm::set_block_size; ++i)
02255 count += word_bitcount(decoder_.get_32());
02256 break;
02257 case set_block_bit_0runs:
02258 {
02259 unsigned count = 0;
02260 unsigned char run_type = decoder_.get_8();
02261 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02262 {
02263 unsigned run_length = decoder_.get_16();
02264 if (run_type)
02265 {
02266 unsigned run_end = j + run_length;
02267 for (;j < run_end; ++j)
02268 {
02269 count += word_bitcount(decoder_.get_32());
02270 }
02271 }
02272 else
02273 {
02274 j += run_length;
02275 }
02276 }
02277 return count;
02278 }
02279 case set_block_bit_interval:
02280 {
02281 unsigned head_idx = decoder_.get_16();
02282 unsigned tail_idx = decoder_.get_16();
02283 for (unsigned i = head_idx; i <= tail_idx; ++i)
02284 count += word_bitcount(decoder_.get_32());
02285 }
02286 break;
02287 case set_block_arrbit:
02288 count += get_arr_bit(0);
02289 break;
02290 case set_block_bit_1bit:
02291 ++count;
02292 decoder_.get_16();
02293 break;
02294 default:
02295 BM_ASSERT(0);
02296 }
02297 return count;
02298 }
02299
02300 template<class DEC>
02301 unsigned
02302 serial_stream_iterator<DEC>::get_bit_block_COUNT_A(bm::word_t* dst_block,
02303 bm::word_t* )
02304 {
02305 BM_ASSERT(this->state_ == e_bit_block);
02306 unsigned count = 0;
02307 if (dst_block)
02308 {
02309
02310 count =
02311 bit_block_calc_count(dst_block,
02312 dst_block + bm::set_block_size);
02313 }
02314
02315 switch (block_type_)
02316 {
02317 case set_block_bit:
02318 decoder_.get_32(0, bm::set_block_size);
02319 break;
02320 case set_block_bit_0runs:
02321 {
02322 unsigned char run_type = decoder_.get_8();
02323 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02324 {
02325 unsigned run_length = decoder_.get_16();
02326 if (run_type)
02327 {
02328 unsigned run_end = j + run_length;
02329 for (;j < run_end; ++j)
02330 {
02331 decoder_.get_32();
02332 }
02333 }
02334 else
02335 {
02336 j += run_length;
02337 }
02338 }
02339 }
02340 break;
02341
02342 case set_block_bit_interval:
02343 {
02344 unsigned head_idx = decoder_.get_16();
02345 unsigned tail_idx = decoder_.get_16();
02346 for (unsigned i = head_idx; i <= tail_idx; ++i)
02347 decoder_.get_32();
02348 }
02349 break;
02350 case set_block_arrbit:
02351 get_arr_bit(0);
02352 break;
02353 case set_block_bit_1bit:
02354 decoder_.get_16();
02355 break;
02356 default:
02357 BM_ASSERT(0);
02358 }
02359 return count;
02360 }
02361
02362
02363 template<class DEC>
02364 unsigned
02365 serial_stream_iterator<DEC>::get_bit_block_COUNT_AND(bm::word_t* dst_block,
02366 bm::word_t* tmp_block)
02367 {
02368 BM_ASSERT(this->state_ == e_bit_block);
02369 BM_ASSERT(dst_block);
02370
02371 unsigned count = 0;
02372 switch (block_type_)
02373 {
02374 case set_block_bit:
02375 for (unsigned i = 0; i < bm::set_block_size; ++i)
02376 count += word_bitcount(dst_block[i] & decoder_.get_32());
02377 break;
02378 case set_block_bit_0runs:
02379 {
02380 unsigned count = 0;
02381 unsigned char run_type = decoder_.get_8();
02382 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02383 {
02384 unsigned run_length = decoder_.get_16();
02385 if (run_type)
02386 {
02387 unsigned run_end = j + run_length;
02388 for (;j < run_end; ++j)
02389 {
02390 count += word_bitcount(dst_block[j] & decoder_.get_32());
02391 }
02392 }
02393 else
02394 {
02395 j += run_length;
02396 }
02397 }
02398 return count;
02399 }
02400 case set_block_bit_interval:
02401 {
02402 unsigned head_idx = decoder_.get_16();
02403 unsigned tail_idx = decoder_.get_16();
02404 for (unsigned i = head_idx; i <= tail_idx; ++i)
02405 count += word_bitcount(dst_block[i] & decoder_.get_32());
02406 }
02407 break;
02408 case set_block_bit_1bit:
02409
02410 case set_block_arrbit:
02411 get_arr_bit(tmp_block, true );
02412 count +=
02413 bit_operation_and_count(dst_block, dst_block + bm::set_block_size,
02414 tmp_block);
02415 break;
02416 default:
02417 BM_ASSERT(0);
02418 }
02419 return count;
02420 }
02421
02422 template<class DEC>
02423 unsigned
02424 serial_stream_iterator<DEC>::get_bit_block_COUNT_OR(bm::word_t* dst_block,
02425 bm::word_t* tmp_block)
02426 {
02427 BM_ASSERT(this->state_ == e_bit_block);
02428 BM_ASSERT(dst_block);
02429
02430 bitblock_sum_adapter count_adapter;
02431 switch (block_type_)
02432 {
02433 case set_block_bit:
02434 {
02435 bitblock_get_adapter ga(dst_block);
02436 bit_COUNT_OR<bm::word_t> func;
02437
02438 bit_recomb(ga,
02439 decoder_,
02440 func,
02441 count_adapter
02442 );
02443 }
02444 break;
02445 case set_block_bit_0runs:
02446 {
02447 unsigned count = 0;
02448 unsigned char run_type = decoder_.get_8();
02449 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02450 {
02451 unsigned run_length = decoder_.get_16();
02452 unsigned run_end = j + run_length;
02453 if (run_type)
02454 {
02455 for (;j < run_end; ++j)
02456 {
02457 BM_ASSERT(j < bm::set_block_size);
02458 count += word_bitcount(dst_block[j] | decoder_.get_32());
02459 }
02460 }
02461 else
02462 {
02463 for (;j < run_end; ++j)
02464 {
02465 BM_ASSERT(j < bm::set_block_size);
02466 count += word_bitcount(dst_block[j]);
02467 }
02468 }
02469 }
02470 return count;
02471 }
02472 case set_block_bit_interval:
02473 {
02474 unsigned head_idx = decoder_.get_16();
02475 unsigned tail_idx = decoder_.get_16();
02476 unsigned count = 0;
02477 unsigned i;
02478 for (i = 0; i < head_idx; ++i)
02479 count += word_bitcount(dst_block[i]);
02480 for (i = head_idx; i <= tail_idx; ++i)
02481 count += word_bitcount(dst_block[i] | decoder_.get_32());
02482 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02483 count += word_bitcount(dst_block[i]);
02484 return count;
02485 }
02486 case set_block_bit_1bit:
02487
02488 case set_block_arrbit:
02489 get_arr_bit(tmp_block, true );
02490 return
02491 bit_operation_or_count(dst_block,
02492 dst_block + bm::set_block_size,
02493 tmp_block);
02494 default:
02495 BM_ASSERT(0);
02496 }
02497 return count_adapter.sum();
02498 }
02499
02500 template<class DEC>
02501 unsigned
02502 serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR(bm::word_t* dst_block,
02503 bm::word_t* tmp_block)
02504 {
02505 BM_ASSERT(this->state_ == e_bit_block);
02506 BM_ASSERT(dst_block);
02507
02508 bitblock_sum_adapter count_adapter;
02509 switch (block_type_)
02510 {
02511 case set_block_bit:
02512 {
02513 bitblock_get_adapter ga(dst_block);
02514 bit_COUNT_XOR<bm::word_t> func;
02515
02516 bit_recomb(ga,
02517 decoder_,
02518 func,
02519 count_adapter
02520 );
02521 }
02522 break;
02523 case set_block_bit_0runs:
02524 {
02525 unsigned count = 0;
02526 unsigned char run_type = decoder_.get_8();
02527 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02528 {
02529 unsigned run_length = decoder_.get_16();
02530 unsigned run_end = j + run_length;
02531 if (run_type)
02532 {
02533 for (;j < run_end; ++j)
02534 {
02535 BM_ASSERT(j < bm::set_block_size);
02536 count += word_bitcount(dst_block[j] ^ decoder_.get_32());
02537 }
02538 }
02539 else
02540 {
02541 for (;j < run_end; ++j)
02542 {
02543 BM_ASSERT(j < bm::set_block_size);
02544 count += word_bitcount(dst_block[j]);
02545 }
02546 }
02547 }
02548 return count;
02549 }
02550 case set_block_bit_interval:
02551 {
02552 unsigned head_idx = decoder_.get_16();
02553 unsigned tail_idx = decoder_.get_16();
02554 unsigned count = 0;
02555 unsigned i;
02556 for (i = 0; i < head_idx; ++i)
02557 count += word_bitcount(dst_block[i]);
02558 for (i = head_idx; i <= tail_idx; ++i)
02559 count += word_bitcount(dst_block[i] ^ decoder_.get_32());
02560 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02561 count += word_bitcount(dst_block[i]);
02562 return count;
02563 }
02564 case set_block_bit_1bit:
02565
02566 case set_block_arrbit:
02567 get_arr_bit(tmp_block, true );
02568 return
02569 bit_operation_xor_count(dst_block,
02570 dst_block + bm::set_block_size,
02571 tmp_block);
02572 default:
02573 BM_ASSERT(0);
02574 }
02575 return count_adapter.sum();
02576 }
02577
02578 template<class DEC>
02579 unsigned
02580 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block,
02581 bm::word_t* tmp_block)
02582 {
02583 BM_ASSERT(this->state_ == e_bit_block);
02584 BM_ASSERT(dst_block);
02585
02586 bitblock_sum_adapter count_adapter;
02587 switch (block_type_)
02588 {
02589 case set_block_bit:
02590 {
02591 bitblock_get_adapter ga(dst_block);
02592 bit_COUNT_SUB_AB<bm::word_t> func;
02593
02594 bit_recomb(ga,
02595 decoder_,
02596 func,
02597 count_adapter
02598 );
02599 }
02600 break;
02601 case set_block_bit_0runs:
02602 {
02603 unsigned count = 0;
02604 unsigned char run_type = decoder_.get_8();
02605 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02606 {
02607 unsigned run_length = decoder_.get_16();
02608 unsigned run_end = j + run_length;
02609 if (run_type)
02610 {
02611 for (;j < run_end; ++j)
02612 {
02613 BM_ASSERT(j < bm::set_block_size);
02614 count += word_bitcount(dst_block[j] & ~decoder_.get_32());
02615 }
02616 }
02617 else
02618 {
02619 for (;j < run_end; ++j)
02620 {
02621 BM_ASSERT(j < bm::set_block_size);
02622 count += word_bitcount(dst_block[j]);
02623 }
02624 }
02625 }
02626 return count;
02627 }
02628 case set_block_bit_interval:
02629 {
02630 unsigned head_idx = decoder_.get_16();
02631 unsigned tail_idx = decoder_.get_16();
02632 unsigned count = 0;
02633 unsigned i;
02634 for (i = 0; i < head_idx; ++i)
02635 count += word_bitcount(dst_block[i]);
02636 for (i = head_idx; i <= tail_idx; ++i)
02637 count += word_bitcount(dst_block[i] & (~decoder_.get_32()));
02638 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02639 count += word_bitcount(dst_block[i]);
02640 return count;
02641 }
02642 break;
02643 case set_block_bit_1bit:
02644
02645 case set_block_arrbit:
02646 get_arr_bit(tmp_block, true );
02647 return
02648 bit_operation_sub_count(dst_block,
02649 dst_block + bm::set_block_size,
02650 tmp_block);
02651 default:
02652 BM_ASSERT(0);
02653 }
02654 return count_adapter.sum();
02655 }
02656
02657 template<class DEC>
02658 unsigned
02659 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block,
02660 bm::word_t* tmp_block)
02661 {
02662 BM_ASSERT(this->state_ == e_bit_block);
02663 BM_ASSERT(dst_block);
02664
02665 bitblock_sum_adapter count_adapter;
02666 switch (block_type_)
02667 {
02668 case set_block_bit:
02669 {
02670 bitblock_get_adapter ga(dst_block);
02671 bit_COUNT_SUB_BA<bm::word_t> func;
02672
02673 bit_recomb(ga,
02674 decoder_,
02675 func,
02676 count_adapter
02677 );
02678 }
02679 break;
02680 case set_block_bit_0runs:
02681 {
02682 unsigned count = 0;
02683 unsigned char run_type = decoder_.get_8();
02684 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02685 {
02686 unsigned run_length = decoder_.get_16();
02687 unsigned run_end = j + run_length;
02688 if (run_type)
02689 {
02690 for (;j < run_end; ++j)
02691 {
02692 BM_ASSERT(j < bm::set_block_size);
02693 count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
02694 }
02695 }
02696 else
02697 {
02698 j += run_length;
02699 }
02700 }
02701 return count;
02702 }
02703
02704 case set_block_bit_interval:
02705 {
02706 unsigned head_idx = decoder_.get_16();
02707 unsigned tail_idx = decoder_.get_16();
02708 unsigned count = 0;
02709 unsigned i;
02710 for (i = head_idx; i <= tail_idx; ++i)
02711 count += word_bitcount(decoder_.get_32() & (~dst_block[i]));
02712 return count;
02713 }
02714 break;
02715 case set_block_bit_1bit:
02716
02717 case set_block_arrbit:
02718 get_arr_bit(tmp_block, true );
02719 return
02720 bit_operation_sub_count(tmp_block,
02721 tmp_block + bm::set_block_size,
02722 dst_block);
02723 default:
02724 BM_ASSERT(0);
02725 }
02726 return count_adapter.sum();
02727 }
02728
02729
02730
02731 template<class DEC>
02732 unsigned serial_stream_iterator<DEC>::get_arr_bit(bm::word_t* dst_block,
02733 bool clear_target)
02734 {
02735 BM_ASSERT(this->block_type_ == set_block_arrbit ||
02736 this->block_type_ == set_block_bit_1bit);
02737
02738 gap_word_t len = decoder_.get_16();
02739 if (dst_block)
02740 {
02741 if (clear_target)
02742 bit_block_set(dst_block, 0);
02743
02744 if (this->block_type_ == set_block_bit_1bit)
02745 {
02746
02747 set_bit(dst_block, len);
02748 return 1;
02749 }
02750
02751 for (unsigned k = 0; k < len; ++k)
02752 {
02753 gap_word_t bit_idx = decoder_.get_16();
02754 set_bit(dst_block, bit_idx);
02755 }
02756 }
02757 else
02758 {
02759 if (this->block_type_ == set_block_bit_1bit)
02760 {
02761 return 1;
02762 }
02763
02764 decoder_.seek(len * 2);
02765 }
02766 return len;
02767 }
02768
02769 template<class DEC>
02770 void
02771 serial_stream_iterator<DEC>::get_gap_block(bm::gap_word_t* dst_block)
02772 {
02773 BM_ASSERT(this->state_ == e_gap_block ||
02774 this->block_type_ == set_block_bit_1bit);
02775 BM_ASSERT(dst_block);
02776
02777 read_gap_block(this->decoder_,
02778 this->block_type_,
02779 dst_block,
02780 this->gap_head_);
02781
02782 ++(this->block_idx_);
02783 this->state_ = e_blocks;
02784 }
02785
02786
02787 template<class DEC>
02788 unsigned
02789 serial_stream_iterator<DEC>::get_bit_block(bm::word_t* dst_block,
02790 bm::word_t* tmp_block,
02791 set_operation op)
02792 {
02793 BM_ASSERT(this->state_ == e_bit_block);
02794 get_bit_func_type bit_func = bit_func_table_[op];
02795 BM_ASSERT(bit_func);
02796 unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
02797 this->state_ = e_blocks;
02798 ++(this->block_idx_);
02799 return cnt;
02800 }
02801
02802
02803
02804 template<class BV>
02805 unsigned operation_deserializer<BV>::deserialize(
02806 bvector_type& bv,
02807 const unsigned char* buf,
02808 bm::word_t* temp_block,
02809 set_operation op)
02810 {
02811 ByteOrder bo_current = globals<true>::byte_order();
02812
02813 bm::decoder dec(buf);
02814 unsigned char header_flag = dec.get_8();
02815 ByteOrder bo = bo_current;
02816 if (!(header_flag & BM_HM_NO_BO))
02817 {
02818 bo = (bm::ByteOrder) dec.get_8();
02819 }
02820
02821 blocks_manager_type& bman = bv.get_blocks_manager();
02822 bit_block_guard<blocks_manager_type> bg(bman);
02823 if (temp_block == 0)
02824 {
02825 temp_block = bg.allocate();
02826 }
02827
02828 if (bo_current == bo)
02829 {
02830 serial_stream_current ss(buf);
02831 return
02832 iterator_deserializer<BV, serial_stream_current>::
02833 deserialize(bv, ss, temp_block, op);
02834 }
02835 switch (bo_current)
02836 {
02837 case BigEndian:
02838 {
02839 serial_stream_be ss(buf);
02840 return
02841 iterator_deserializer<BV, serial_stream_be>::
02842 deserialize(bv, ss, temp_block, op);
02843 }
02844 case LittleEndian:
02845 {
02846 serial_stream_le ss(buf);
02847 return
02848 iterator_deserializer<BV, serial_stream_le>::
02849 deserialize(bv, ss, temp_block, op);
02850 }
02851 default:
02852 BM_ASSERT(0);
02853 };
02854 return 0;
02855 }
02856
02857
02858 template<class BV, class SerialIterator>
02859 void iterator_deserializer<BV, SerialIterator>::load_id_list(
02860 bvector_type& bv,
02861 serial_iterator_type& sit,
02862 unsigned id_count,
02863 bool set_clear)
02864 {
02865 const unsigned win_size = 64;
02866 bm::id_t id_buffer[win_size+1];
02867
02868 if (set_clear)
02869 {
02870 for (unsigned i = 0; i <= id_count;)
02871 {
02872 unsigned j;
02873 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
02874 {
02875 id_buffer[j] = sit.get_id();
02876 sit.next();
02877 }
02878 bm::combine_or(bv, id_buffer, id_buffer + j);
02879 }
02880 }
02881 else
02882 {
02883 for (unsigned i = 0; i <= id_count;)
02884 {
02885 unsigned j;
02886 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
02887 {
02888 id_buffer[j] = sit.get_id();
02889 sit.next();
02890 }
02891 bm::combine_sub(bv, id_buffer, id_buffer + j);
02892 }
02893 }
02894 }
02895
02896
02897 template<class BV, class SerialIterator>
02898 unsigned
02899 iterator_deserializer<BV, SerialIterator>::deserialize(
02900 bvector_type& bv,
02901 serial_iterator_type& sit,
02902 bm::word_t* temp_block,
02903 set_operation op)
02904 {
02905 BM_ASSERT(temp_block);
02906
02907 unsigned count = 0;
02908 gap_word_t gap_temp_block[bm::gap_equiv_len*3];
02909 gap_temp_block[0] = 0;
02910
02911 blocks_manager_type& bman = bv.get_blocks_manager();
02912
02913 bv.forget_count();
02914
02915 if (sit.bv_size() && (sit.bv_size() > bv.size()))
02916 {
02917 bv.resize(sit.bv_size());
02918 }
02919
02920 BM_SET_MMX_GUARD
02921
02922 typename serial_iterator_type::iterator_state state;
02923 state = sit.get_state();
02924 if (state == serial_iterator_type::e_list_ids)
02925 {
02926 unsigned id_count = sit.get_id_count();
02927 bool set_clear = true;
02928 switch (op)
02929 {
02930 case set_AND:
02931 {
02932
02933 BV bv_tmp(BM_GAP);
02934 load_id_list(bv_tmp, sit, id_count, true);
02935 bv &= bv_tmp;
02936 }
02937 break;
02938 case set_ASSIGN:
02939 bv.clear(true);
02940
02941 case set_OR:
02942 set_clear = true;
02943
02944 case set_SUB:
02945 load_id_list(bv, sit, id_count, set_clear);
02946 break;
02947 case set_XOR:
02948 for (unsigned i = 0; i < id_count; ++i)
02949 {
02950 bm::id_t id = sit.get_id();
02951 bv[id] ^= true;
02952 sit.next();
02953 }
02954 break;
02955 case set_COUNT: case set_COUNT_B:
02956 for (unsigned i = 0; i < id_count; ++i)
02957 {
02958 sit.get_id();
02959 ++count;
02960 sit.next();
02961 }
02962 break;
02963 case set_COUNT_A:
02964 return bv.count();
02965 case set_COUNT_AND:
02966 for (unsigned i = 0; i < id_count; ++i)
02967 {
02968 bm::id_t id = sit.get_id();
02969 count += bv.get_bit(id);
02970 sit.next();
02971 }
02972 break;
02973 case set_COUNT_XOR:
02974 {
02975
02976 BV bv_tmp(BM_GAP);
02977 load_id_list(bv_tmp, sit, id_count, true);
02978 count += count_xor(bv, bv_tmp);
02979 }
02980 break;
02981 case set_COUNT_OR:
02982 {
02983
02984 BV bv_tmp(BM_GAP);
02985 load_id_list(bv_tmp, sit, id_count, true);
02986 count += count_or(bv, bv_tmp);
02987 }
02988 break;
02989 case set_COUNT_SUB_AB:
02990 {
02991
02992 BV bv_tmp(bv);
02993 load_id_list(bv_tmp, sit, id_count, false);
02994 count += bv_tmp.count();
02995 }
02996 break;
02997 default:
02998 BM_ASSERT(0);
02999 }
03000 return count;
03001 }
03002
03003 unsigned bv_block_idx = 0;
03004
03005 for (;1;)
03006 {
03007 bm::set_operation sop = op;
03008 if (sit.is_eof())
03009 {
03010
03011 switch (op)
03012 {
03013 case set_OR: case set_SUB: case set_XOR:
03014 case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
03015 case set_COUNT_SUB_BA:
03016
03017 break;
03018 case set_AND: case set_ASSIGN:
03019
03020 {
03021 unsigned i, j;
03022 bman.get_block_coord(bv_block_idx, &i, &j);
03023 bm::word_t*** blk_root = bman.get_rootblock();
03024 unsigned effective_top_size =
03025 bman.effective_top_block_size();
03026 for (;i < effective_top_size; ++i)
03027 {
03028 bm::word_t** blk_blk = blk_root[i];
03029 if (blk_blk == 0)
03030 {
03031 bv_block_idx+=bm::set_array_size-j;
03032 j = 0;
03033 continue;
03034 }
03035 for (;j < bm::set_array_size; ++j, ++bv_block_idx)
03036 {
03037 if (blk_blk[j])
03038 bman.zero_block(bv_block_idx);
03039 }
03040 j = 0;
03041 }
03042
03043 }
03044 break;
03045 case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
03046 case set_COUNT_SUB_AB:
03047
03048 {
03049 unsigned i, j;
03050 bman.get_block_coord(bv_block_idx, &i, &j);
03051 bm::word_t*** blk_root = bman.get_rootblock();
03052 unsigned effective_top_size =
03053 bman.effective_top_block_size();
03054 for (;i < effective_top_size; ++i)
03055 {
03056 bm::word_t** blk_blk = blk_root[i];
03057 if (blk_blk == 0)
03058 {
03059 bv_block_idx+=bm::set_array_size-j;
03060 j = 0;
03061 continue;
03062 }
03063 for (;j < bm::set_array_size; ++j, ++bv_block_idx)
03064 {
03065 if (blk_blk[j])
03066 count += bman.block_bitcount(blk_blk[j], bv_block_idx);
03067 }
03068 j = 0;
03069 }
03070
03071 }
03072 break;
03073 default:
03074 BM_ASSERT(0);
03075 }
03076 return count;
03077 }
03078
03079 state = sit.state();
03080 switch (state)
03081 {
03082 case serial_iterator_type::e_blocks:
03083 sit.next();
03084 continue;
03085 case serial_iterator_type::e_bit_block:
03086 {
03087 BM_ASSERT(sit.block_idx() == bv_block_idx);
03088 bm::word_t* blk = bman.get_block(bv_block_idx);
03089
03090 if (!blk)
03091 {
03092 switch (op)
03093 {
03094 case set_AND: case set_SUB: case set_COUNT_AND:
03095 case set_COUNT_SUB_AB: case set_COUNT_A:
03096
03097
03098 sop = set_ASSIGN;
03099 break;
03100
03101 case set_OR: case set_XOR: case set_ASSIGN:
03102 blk = bman.make_bit_block(bv_block_idx);
03103 break;
03104
03105 case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR:
03106 case set_COUNT_SUB_BA: case set_COUNT_B:
03107
03108 sop = set_COUNT;
03109 break;
03110
03111 case set_END:
03112 default:
03113 BM_ASSERT(0);
03114 }
03115 }
03116 else
03117 {
03118 int is_gap = BM_IS_GAP(bman, blk, bv_block_idx);
03119 if (is_gap || IS_FULL_BLOCK(blk))
03120 {
03121 if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
03122 {
03123 }
03124 else
03125 {
03126
03127
03128 blk = bman.deoptimize_block(bv_block_idx);
03129 }
03130 }
03131 }
03132
03133
03134 unsigned c = sit.get_bit_block(blk, temp_block, sop);
03135 count += c;
03136 }
03137 break;
03138
03139 case serial_iterator_type::e_zero_blocks:
03140 {
03141 BM_ASSERT(bv_block_idx == sit.block_idx());
03142 bm::word_t* blk = bman.get_block(bv_block_idx);
03143 sit.next();
03144
03145 if (blk)
03146 {
03147 switch (op)
03148 {
03149 case set_AND: case set_ASSIGN:
03150
03151 blk = bman.zero_block(bv_block_idx);
03152 break;
03153
03154 case set_SUB: case set_COUNT_AND: case set_OR:
03155 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
03156
03157 break;
03158
03159 case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
03160 case set_COUNT: case set_COUNT_XOR:
03161
03162
03163
03164 {
03165 unsigned c = bman.block_bitcount(blk, bv_block_idx);
03166 count += c;
03167 }
03168 break;
03169 case set_END:
03170 default:
03171 BM_ASSERT(0);
03172 }
03173 }
03174 }
03175 break;
03176
03177 case serial_iterator_type::e_one_blocks:
03178 {
03179 BM_ASSERT(bv_block_idx == sit.block_idx());
03180 bm::word_t* blk = bman.get_block(bv_block_idx);
03181 sit.next();
03182
03183 switch (op)
03184 {
03185 case set_OR: case set_ASSIGN:
03186 bman.set_block_all_set(bv_block_idx);
03187 break;
03188 case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
03189
03190 count += bm::bits_in_block;
03191 break;
03192 case set_SUB:
03193 blk = bman.zero_block(bv_block_idx);
03194 break;
03195 case set_COUNT_SUB_AB: case set_AND:
03196
03197 break;
03198 case set_COUNT_AND: case set_COUNT_A:
03199 count += bman.block_bitcount(blk, bv_block_idx);
03200 break;
03201 default:
03202 if (blk)
03203 {
03204 switch (op)
03205 {
03206 case set_XOR:
03207 blk = bman.deoptimize_block(bv_block_idx);
03208 bit_block_xor(blk, FULL_BLOCK_ADDR);
03209 break;
03210 case set_COUNT_XOR:
03211 {
03212 int is_gap = BM_IS_GAP(bman, blk, bv_block_idx);
03213 count +=
03214 combine_count_operation_with_block(
03215 blk,
03216 is_gap,
03217 FULL_BLOCK_ADDR,
03218 0,
03219 temp_block,
03220 bm::COUNT_XOR);
03221 }
03222 break;
03223 case set_COUNT_SUB_BA:
03224 {
03225 int is_gap = BM_IS_GAP(bman, blk, bv_block_idx);
03226 count +=
03227 combine_count_operation_with_block(
03228 blk,
03229 is_gap,
03230 FULL_BLOCK_ADDR,
03231 0,
03232 temp_block,
03233 bm::COUNT_SUB_BA);
03234 }
03235 break;
03236 default:
03237 BM_ASSERT(0);
03238 }
03239 }
03240 else
03241 {
03242 switch (op)
03243 {
03244 case set_XOR:
03245
03246 bman.set_block_all_set(bv_block_idx);
03247 break;
03248 case set_COUNT_XOR:
03249 count += bm::bits_in_block;
03250 break;
03251 case set_COUNT_SUB_BA:
03252
03253 count += bm::bits_in_block;
03254 break;
03255 default:
03256 break;
03257 }
03258 }
03259 }
03260
03261 }
03262 break;
03263
03264 case serial_iterator_type::e_gap_block:
03265 {
03266 BM_ASSERT(bv_block_idx == sit.block_idx());
03267 bm::word_t* blk = bman.get_block(bv_block_idx);
03268
03269 sit.get_gap_block(gap_temp_block);
03270
03271
03272 unsigned len = gap_length(gap_temp_block);
03273 int level = gap_calc_level(len, bman.glen());
03274 --len;
03275
03276 bool const_op = is_const_set_operation(op);
03277 if (const_op)
03278 {
03279 distance_metric metric = operation2metric(op);
03280 int is_gap = blk ? BM_IS_GAP(bman, blk, bv_block_idx) : 0;
03281 unsigned c =
03282 combine_count_operation_with_block(
03283 blk,
03284 is_gap,
03285 (bm::word_t*)gap_temp_block,
03286 1,
03287 temp_block,
03288 metric);
03289 count += c;
03290 }
03291 else
03292 {
03293 if ((sop == set_ASSIGN) && blk)
03294 {
03295 blk = bman.zero_block(bv_block_idx);
03296 sop = set_OR;
03297 }
03298 if (blk == 0)
03299 {
03300 switch (sop)
03301 {
03302 case set_AND: case set_SUB:
03303 break;
03304 case set_OR: case set_XOR: case set_ASSIGN:
03305 if (level == -1)
03306 {
03307 blk = bman.get_allocator().alloc_bit_block();
03308 bman.set_block(bv_block_idx, blk);
03309 gap_convert_to_bitset(blk, gap_temp_block);
03310 }
03311 else
03312 {
03313 gap_word_t* gap_blk =
03314 bman.get_allocator().alloc_gap_block(
03315 level, bman.glen());
03316 gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
03317 ::memcpy(gap_blk_ptr,
03318 gap_temp_block,
03319 gap_length(gap_temp_block)
03320 * sizeof(gap_word_t));
03321 set_gap_level(gap_blk_ptr, level);
03322 bman.set_block(bv_block_idx,
03323 (bm::word_t*)gap_blk, true );
03324 }
03325 break;
03326 default:
03327 BM_ASSERT(0);
03328 }
03329 }
03330 else
03331 {
03332 bm::operation bop = bm::setop2op(op);
03333 if (level == -1)
03334 {
03335 gap_convert_to_bitset(temp_block, gap_temp_block);
03336 bv.combine_operation_with_block(bv_block_idx,
03337 temp_block,
03338 0,
03339 bop);
03340 }
03341 else
03342 {
03343 set_gap_level(gap_temp_block, level);
03344 bv.combine_operation_with_block(
03345 bv_block_idx,
03346 (bm::word_t*)gap_temp_block,
03347 1,
03348 bop);
03349 }
03350 }
03351 }
03352 }
03353 break;
03354
03355 default:
03356 BM_ASSERT(0);
03357 }
03358
03359 ++bv_block_idx;
03360
03361 }
03362
03363 return count;
03364 }
03365
03366
03367
03368
03369 }
03370
03371 #include "bmundef.h"
03372
03373 #ifdef _MSC_VER
03374 #pragma warning( default : 4311 4312)
03375 #endif
03376
03377
03378 #endif