LLVM API Documentation
00001 00002 /*-------------------------------------------------------------*/ 00003 /*--- Block sorting machinery ---*/ 00004 /*--- blocksort.c ---*/ 00005 /*-------------------------------------------------------------*/ 00006 00007 /*-- 00008 This file is a part of bzip2 and/or libbzip2, a program and 00009 library for lossless, block-sorting data compression. 00010 00011 Copyright (C) 1996-2002 Julian R Seward. All rights reserved. 00012 00013 Redistribution and use in source and binary forms, with or without 00014 modification, are permitted provided that the following conditions 00015 are met: 00016 00017 1. Redistributions of source code must retain the above copyright 00018 notice, this list of conditions and the following disclaimer. 00019 00020 2. The origin of this software must not be misrepresented; you must 00021 not claim that you wrote the original software. If you use this 00022 software in a product, an acknowledgment in the product 00023 documentation would be appreciated but is not required. 00024 00025 3. Altered source versions must be plainly marked as such, and must 00026 not be misrepresented as being the original software. 00027 00028 4. The name of the author may not be used to endorse or promote 00029 products derived from this software without specific prior written 00030 permission. 00031 00032 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 00033 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00034 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00035 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 00036 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00037 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 00038 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00039 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 00040 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00041 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00042 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00043 00044 Julian Seward, Cambridge, UK. 00045 jseward@acm.org 00046 bzip2/libbzip2 version 1.0 of 21 March 2000 00047 00048 This program is based on (at least) the work of: 00049 Mike Burrows 00050 David Wheeler 00051 Peter Fenwick 00052 Alistair Moffat 00053 Radford Neal 00054 Ian H. Witten 00055 Robert Sedgewick 00056 Jon L. Bentley 00057 00058 For more information on these sources, see the manual. 00059 00060 To get some idea how the block sorting algorithms in this file 00061 work, read my paper 00062 On the Performance of BWT Sorting Algorithms 00063 in Proceedings of the IEEE Data Compression Conference 2000, 00064 Snowbird, Utah, USA, 27-30 March 2000. The main sort in this 00065 file implements the algorithm called cache in the paper. 00066 --*/ 00067 00068 00069 #include "bzlib_private.h" 00070 00071 /*---------------------------------------------*/ 00072 /*--- Fallback O(N log(N)^2) sorting ---*/ 00073 /*--- algorithm, for repetitive blocks ---*/ 00074 /*---------------------------------------------*/ 00075 00076 /*---------------------------------------------*/ 00077 static 00078 __inline__ 00079 void fallbackSimpleSort ( UInt32* fmap, 00080 UInt32* eclass, 00081 Int32 lo, 00082 Int32 hi ) 00083 { 00084 Int32 i, j, tmp; 00085 UInt32 ec_tmp; 00086 00087 if (lo == hi) return; 00088 00089 if (hi - lo > 3) { 00090 for ( i = hi-4; i >= lo; i-- ) { 00091 tmp = fmap[i]; 00092 ec_tmp = eclass[tmp]; 00093 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 00094 fmap[j-4] = fmap[j]; 00095 fmap[j-4] = tmp; 00096 } 00097 } 00098 00099 for ( i = hi-1; i >= lo; i-- ) { 00100 tmp = fmap[i]; 00101 ec_tmp = eclass[tmp]; 00102 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 00103 fmap[j-1] = fmap[j]; 00104 fmap[j-1] = tmp; 00105 } 00106 } 00107 00108 00109 /*---------------------------------------------*/ 00110 #define fswap(zz1, zz2) \ 00111 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 00112 00113 #define fvswap(zzp1, zzp2, zzn) \ 00114 { \ 00115 Int32 yyp1 = (zzp1); \ 00116 Int32 yyp2 = (zzp2); \ 00117 Int32 yyn = (zzn); \ 00118 while (yyn > 0) { \ 00119 fswap(fmap[yyp1], fmap[yyp2]); \ 00120 yyp1++; yyp2++; yyn--; \ 00121 } \ 00122 } 00123 00124 00125 #define fmin(a,b) ((a) < (b)) ? (a) : (b) 00126 00127 #define fpush(lz,hz) { stackLo[sp] = lz; \ 00128 stackHi[sp] = hz; \ 00129 sp++; } 00130 00131 #define fpop(lz,hz) { sp--; \ 00132 lz = stackLo[sp]; \ 00133 hz = stackHi[sp]; } 00134 00135 #define FALLBACK_QSORT_SMALL_THRESH 10 00136 #define FALLBACK_QSORT_STACK_SIZE 100 00137 00138 00139 static 00140 void fallbackQSort3 ( UInt32* fmap, 00141 UInt32* eclass, 00142 Int32 loSt, 00143 Int32 hiSt ) 00144 { 00145 Int32 unLo, unHi, ltLo, gtHi, n, m; 00146 Int32 sp, lo, hi; 00147 UInt32 med, r, r3; 00148 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 00149 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 00150 00151 r = 0; 00152 00153 sp = 0; 00154 fpush ( loSt, hiSt ); 00155 00156 while (sp > 0) { 00157 00158 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 ); 00159 00160 fpop ( lo, hi ); 00161 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 00162 fallbackSimpleSort ( fmap, eclass, lo, hi ); 00163 continue; 00164 } 00165 00166 /* Random partitioning. Median of 3 sometimes fails to 00167 avoid bad cases. Median of 9 seems to help but 00168 looks rather expensive. This too seems to work but 00169 is cheaper. Guidance for the magic constants 00170 7621 and 32768 is taken from Sedgewick's algorithms 00171 book, chapter 35. 00172 */ 00173 r = ((r * 7621) + 1) % 32768; 00174 r3 = r % 3; 00175 if (r3 == 0) med = eclass[fmap[lo]]; else 00176 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 00177 med = eclass[fmap[hi]]; 00178 00179 unLo = ltLo = lo; 00180 unHi = gtHi = hi; 00181 00182 while (1) { 00183 while (1) { 00184 if (unLo > unHi) break; 00185 n = (Int32)eclass[fmap[unLo]] - (Int32)med; 00186 if (n == 0) { 00187 fswap(fmap[unLo], fmap[ltLo]); 00188 ltLo++; unLo++; 00189 continue; 00190 }; 00191 if (n > 0) break; 00192 unLo++; 00193 } 00194 while (1) { 00195 if (unLo > unHi) break; 00196 n = (Int32)eclass[fmap[unHi]] - (Int32)med; 00197 if (n == 0) { 00198 fswap(fmap[unHi], fmap[gtHi]); 00199 gtHi--; unHi--; 00200 continue; 00201 }; 00202 if (n < 0) break; 00203 unHi--; 00204 } 00205 if (unLo > unHi) break; 00206 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 00207 } 00208 00209 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 00210 00211 if (gtHi < ltLo) continue; 00212 00213 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 00214 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 00215 00216 n = lo + unLo - ltLo - 1; 00217 m = hi - (gtHi - unHi) + 1; 00218 00219 if (n - lo > hi - m) { 00220 fpush ( lo, n ); 00221 fpush ( m, hi ); 00222 } else { 00223 fpush ( m, hi ); 00224 fpush ( lo, n ); 00225 } 00226 } 00227 } 00228 00229 #undef fmin 00230 #undef fpush 00231 #undef fpop 00232 #undef fswap 00233 #undef fvswap 00234 #undef FALLBACK_QSORT_SMALL_THRESH 00235 #undef FALLBACK_QSORT_STACK_SIZE 00236 00237 00238 /*---------------------------------------------*/ 00239 /* Pre: 00240 nblock > 0 00241 eclass exists for [0 .. nblock-1] 00242 ((UChar*)eclass) [0 .. nblock-1] holds block 00243 ptr exists for [0 .. nblock-1] 00244 00245 Post: 00246 ((UChar*)eclass) [0 .. nblock-1] holds block 00247 All other areas of eclass destroyed 00248 fmap [0 .. nblock-1] holds sorted order 00249 bhtab [ 0 .. 2+(nblock/32) ] destroyed 00250 */ 00251 00252 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 00253 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 00254 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 00255 #define WORD_BH(zz) bhtab[(zz) >> 5] 00256 #define UNALIGNED_BH(zz) ((zz) & 0x01f) 00257 00258 static 00259 void fallbackSort ( UInt32* fmap, 00260 UInt32* eclass, 00261 UInt32* bhtab, 00262 Int32 nblock, 00263 Int32 verb ) 00264 { 00265 Int32 ftab[257]; 00266 Int32 ftabCopy[256]; 00267 Int32 H, i, j, k, l, r, cc, cc1; 00268 Int32 nNotDone; 00269 Int32 nBhtab; 00270 UChar* eclass8 = (UChar*)eclass; 00271 00272 /*-- 00273 Initial 1-char radix sort to generate 00274 initial fmap and initial BH bits. 00275 --*/ 00276 if (verb >= 4) 00277 VPrintf0 ( " bucket sorting ...\n" ); 00278 for (i = 0; i < 257; i++) ftab[i] = 0; 00279 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 00280 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 00281 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 00282 00283 for (i = 0; i < nblock; i++) { 00284 j = eclass8[i]; 00285 k = ftab[j] - 1; 00286 ftab[j] = k; 00287 fmap[k] = i; 00288 } 00289 00290 nBhtab = 2 + (nblock / 32); 00291 for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 00292 for (i = 0; i < 256; i++) SET_BH(ftab[i]); 00293 00294 /*-- 00295 Inductively refine the buckets. Kind-of an 00296 "exponential radix sort" (!), inspired by the 00297 Manber-Myers suffix array construction algorithm. 00298 --*/ 00299 00300 /*-- set sentinel bits for block-end detection --*/ 00301 for (i = 0; i < 32; i++) { 00302 SET_BH(nblock + 2*i); 00303 CLEAR_BH(nblock + 2*i + 1); 00304 } 00305 00306 /*-- the log(N) loop --*/ 00307 H = 1; 00308 while (1) { 00309 00310 if (verb >= 4) 00311 VPrintf1 ( " depth %6d has ", H ); 00312 00313 j = 0; 00314 for (i = 0; i < nblock; i++) { 00315 if (ISSET_BH(i)) j = i; 00316 k = fmap[i] - H; if (k < 0) k += nblock; 00317 eclass[k] = j; 00318 } 00319 00320 nNotDone = 0; 00321 r = -1; 00322 while (1) { 00323 00324 /*-- find the next non-singleton bucket --*/ 00325 k = r + 1; 00326 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 00327 if (ISSET_BH(k)) { 00328 while (WORD_BH(k) == 0xffffffff) k += 32; 00329 while (ISSET_BH(k)) k++; 00330 } 00331 l = k - 1; 00332 if (l >= nblock) break; 00333 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 00334 if (!ISSET_BH(k)) { 00335 while (WORD_BH(k) == 0x00000000) k += 32; 00336 while (!ISSET_BH(k)) k++; 00337 } 00338 r = k - 1; 00339 if (r >= nblock) break; 00340 00341 /*-- now [l, r] bracket current bucket --*/ 00342 if (r > l) { 00343 nNotDone += (r - l + 1); 00344 fallbackQSort3 ( fmap, eclass, l, r ); 00345 00346 /*-- scan bucket and generate header bits-- */ 00347 cc = -1; 00348 for (i = l; i <= r; i++) { 00349 cc1 = eclass[fmap[i]]; 00350 if (cc != cc1) { SET_BH(i); cc = cc1; }; 00351 } 00352 } 00353 } 00354 00355 if (verb >= 4) 00356 VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 00357 00358 H *= 2; 00359 if (H > nblock || nNotDone == 0) break; 00360 } 00361 00362 /*-- 00363 Reconstruct the original block in 00364 eclass8 [0 .. nblock-1], since the 00365 previous phase destroyed it. 00366 --*/ 00367 if (verb >= 4) 00368 VPrintf0 ( " reconstructing block ...\n" ); 00369 j = 0; 00370 for (i = 0; i < nblock; i++) { 00371 while (ftabCopy[j] == 0) j++; 00372 ftabCopy[j]--; 00373 eclass8[fmap[i]] = (UChar)j; 00374 } 00375 AssertH ( j < 256, 1005 ); 00376 } 00377 00378 #undef SET_BH 00379 #undef CLEAR_BH 00380 #undef ISSET_BH 00381 #undef WORD_BH 00382 #undef UNALIGNED_BH 00383 00384 00385 /*---------------------------------------------*/ 00386 /*--- The main, O(N^2 log(N)) sorting ---*/ 00387 /*--- algorithm. Faster for "normal" ---*/ 00388 /*--- non-repetitive blocks. ---*/ 00389 /*---------------------------------------------*/ 00390 00391 /*---------------------------------------------*/ 00392 static 00393 __inline__ 00394 Bool mainGtU ( UInt32 i1, 00395 UInt32 i2, 00396 UChar* block, 00397 UInt16* quadrant, 00398 UInt32 nblock, 00399 Int32* budget ) 00400 { 00401 Int32 k; 00402 UChar c1, c2; 00403 UInt16 s1, s2; 00404 00405 AssertD ( i1 != i2, "mainGtU" ); 00406 /* 1 */ 00407 c1 = block[i1]; c2 = block[i2]; 00408 if (c1 != c2) return (c1 > c2); 00409 i1++; i2++; 00410 /* 2 */ 00411 c1 = block[i1]; c2 = block[i2]; 00412 if (c1 != c2) return (c1 > c2); 00413 i1++; i2++; 00414 /* 3 */ 00415 c1 = block[i1]; c2 = block[i2]; 00416 if (c1 != c2) return (c1 > c2); 00417 i1++; i2++; 00418 /* 4 */ 00419 c1 = block[i1]; c2 = block[i2]; 00420 if (c1 != c2) return (c1 > c2); 00421 i1++; i2++; 00422 /* 5 */ 00423 c1 = block[i1]; c2 = block[i2]; 00424 if (c1 != c2) return (c1 > c2); 00425 i1++; i2++; 00426 /* 6 */ 00427 c1 = block[i1]; c2 = block[i2]; 00428 if (c1 != c2) return (c1 > c2); 00429 i1++; i2++; 00430 /* 7 */ 00431 c1 = block[i1]; c2 = block[i2]; 00432 if (c1 != c2) return (c1 > c2); 00433 i1++; i2++; 00434 /* 8 */ 00435 c1 = block[i1]; c2 = block[i2]; 00436 if (c1 != c2) return (c1 > c2); 00437 i1++; i2++; 00438 /* 9 */ 00439 c1 = block[i1]; c2 = block[i2]; 00440 if (c1 != c2) return (c1 > c2); 00441 i1++; i2++; 00442 /* 10 */ 00443 c1 = block[i1]; c2 = block[i2]; 00444 if (c1 != c2) return (c1 > c2); 00445 i1++; i2++; 00446 /* 11 */ 00447 c1 = block[i1]; c2 = block[i2]; 00448 if (c1 != c2) return (c1 > c2); 00449 i1++; i2++; 00450 /* 12 */ 00451 c1 = block[i1]; c2 = block[i2]; 00452 if (c1 != c2) return (c1 > c2); 00453 i1++; i2++; 00454 00455 k = nblock + 8; 00456 00457 do { 00458 /* 1 */ 00459 c1 = block[i1]; c2 = block[i2]; 00460 if (c1 != c2) return (c1 > c2); 00461 s1 = quadrant[i1]; s2 = quadrant[i2]; 00462 if (s1 != s2) return (s1 > s2); 00463 i1++; i2++; 00464 /* 2 */ 00465 c1 = block[i1]; c2 = block[i2]; 00466 if (c1 != c2) return (c1 > c2); 00467 s1 = quadrant[i1]; s2 = quadrant[i2]; 00468 if (s1 != s2) return (s1 > s2); 00469 i1++; i2++; 00470 /* 3 */ 00471 c1 = block[i1]; c2 = block[i2]; 00472 if (c1 != c2) return (c1 > c2); 00473 s1 = quadrant[i1]; s2 = quadrant[i2]; 00474 if (s1 != s2) return (s1 > s2); 00475 i1++; i2++; 00476 /* 4 */ 00477 c1 = block[i1]; c2 = block[i2]; 00478 if (c1 != c2) return (c1 > c2); 00479 s1 = quadrant[i1]; s2 = quadrant[i2]; 00480 if (s1 != s2) return (s1 > s2); 00481 i1++; i2++; 00482 /* 5 */ 00483 c1 = block[i1]; c2 = block[i2]; 00484 if (c1 != c2) return (c1 > c2); 00485 s1 = quadrant[i1]; s2 = quadrant[i2]; 00486 if (s1 != s2) return (s1 > s2); 00487 i1++; i2++; 00488 /* 6 */ 00489 c1 = block[i1]; c2 = block[i2]; 00490 if (c1 != c2) return (c1 > c2); 00491 s1 = quadrant[i1]; s2 = quadrant[i2]; 00492 if (s1 != s2) return (s1 > s2); 00493 i1++; i2++; 00494 /* 7 */ 00495 c1 = block[i1]; c2 = block[i2]; 00496 if (c1 != c2) return (c1 > c2); 00497 s1 = quadrant[i1]; s2 = quadrant[i2]; 00498 if (s1 != s2) return (s1 > s2); 00499 i1++; i2++; 00500 /* 8 */ 00501 c1 = block[i1]; c2 = block[i2]; 00502 if (c1 != c2) return (c1 > c2); 00503 s1 = quadrant[i1]; s2 = quadrant[i2]; 00504 if (s1 != s2) return (s1 > s2); 00505 i1++; i2++; 00506 00507 if (i1 >= nblock) i1 -= nblock; 00508 if (i2 >= nblock) i2 -= nblock; 00509 00510 k -= 8; 00511 (*budget)--; 00512 } 00513 while (k >= 0); 00514 00515 return False; 00516 } 00517 00518 00519 /*---------------------------------------------*/ 00520 /*-- 00521 Knuth's increments seem to work better 00522 than Incerpi-Sedgewick here. Possibly 00523 because the number of elems to sort is 00524 usually small, typically <= 20. 00525 --*/ 00526 static 00527 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 00528 9841, 29524, 88573, 265720, 00529 797161, 2391484 }; 00530 00531 static 00532 void mainSimpleSort ( UInt32* ptr, 00533 UChar* block, 00534 UInt16* quadrant, 00535 Int32 nblock, 00536 Int32 lo, 00537 Int32 hi, 00538 Int32 d, 00539 Int32* budget ) 00540 { 00541 Int32 i, j, h, bigN, hp; 00542 UInt32 v; 00543 00544 bigN = hi - lo + 1; 00545 if (bigN < 2) return; 00546 00547 hp = 0; 00548 while (incs[hp] < bigN) hp++; 00549 hp--; 00550 00551 for (; hp >= 0; hp--) { 00552 h = incs[hp]; 00553 00554 i = lo + h; 00555 while (True) { 00556 00557 /*-- copy 1 --*/ 00558 if (i > hi) break; 00559 v = ptr[i]; 00560 j = i; 00561 while ( mainGtU ( 00562 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 00563 ) ) { 00564 ptr[j] = ptr[j-h]; 00565 j = j - h; 00566 if (j <= (lo + h - 1)) break; 00567 } 00568 ptr[j] = v; 00569 i++; 00570 00571 /*-- copy 2 --*/ 00572 if (i > hi) break; 00573 v = ptr[i]; 00574 j = i; 00575 while ( mainGtU ( 00576 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 00577 ) ) { 00578 ptr[j] = ptr[j-h]; 00579 j = j - h; 00580 if (j <= (lo + h - 1)) break; 00581 } 00582 ptr[j] = v; 00583 i++; 00584 00585 /*-- copy 3 --*/ 00586 if (i > hi) break; 00587 v = ptr[i]; 00588 j = i; 00589 while ( mainGtU ( 00590 ptr[j-h]+d, v+d, block, quadrant, nblock, budget 00591 ) ) { 00592 ptr[j] = ptr[j-h]; 00593 j = j - h; 00594 if (j <= (lo + h - 1)) break; 00595 } 00596 ptr[j] = v; 00597 i++; 00598 00599 if (*budget < 0) return; 00600 } 00601 } 00602 } 00603 00604 00605 /*---------------------------------------------*/ 00606 /*-- 00607 The following is an implementation of 00608 an elegant 3-way quicksort for strings, 00609 described in a paper "Fast Algorithms for 00610 Sorting and Searching Strings", by Robert 00611 Sedgewick and Jon L. Bentley. 00612 --*/ 00613 00614 #define mswap(zz1, zz2) \ 00615 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 00616 00617 #define mvswap(zzp1, zzp2, zzn) \ 00618 { \ 00619 Int32 yyp1 = (zzp1); \ 00620 Int32 yyp2 = (zzp2); \ 00621 Int32 yyn = (zzn); \ 00622 while (yyn > 0) { \ 00623 mswap(ptr[yyp1], ptr[yyp2]); \ 00624 yyp1++; yyp2++; yyn--; \ 00625 } \ 00626 } 00627 00628 static 00629 __inline__ 00630 UChar mmed3 ( UChar a, UChar b, UChar c ) 00631 { 00632 UChar t; 00633 if (a > b) { t = a; a = b; b = t; }; 00634 if (b > c) { 00635 b = c; 00636 if (a > b) b = a; 00637 } 00638 return b; 00639 } 00640 00641 #define mmin(a,b) ((a) < (b)) ? (a) : (b) 00642 00643 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 00644 stackHi[sp] = hz; \ 00645 stackD [sp] = dz; \ 00646 sp++; } 00647 00648 #define mpop(lz,hz,dz) { sp--; \ 00649 lz = stackLo[sp]; \ 00650 hz = stackHi[sp]; \ 00651 dz = stackD [sp]; } 00652 00653 00654 #define mnextsize(az) (nextHi[az]-nextLo[az]) 00655 00656 #define mnextswap(az,bz) \ 00657 { Int32 tz; \ 00658 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 00659 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 00660 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 00661 00662 00663 #define MAIN_QSORT_SMALL_THRESH 20 00664 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 00665 #define MAIN_QSORT_STACK_SIZE 100 00666 00667 static 00668 void mainQSort3 ( UInt32* ptr, 00669 UChar* block, 00670 UInt16* quadrant, 00671 Int32 nblock, 00672 Int32 loSt, 00673 Int32 hiSt, 00674 Int32 dSt, 00675 Int32* budget ) 00676 { 00677 Int32 unLo, unHi, ltLo, gtHi, n, m, med; 00678 Int32 sp, lo, hi, d; 00679 00680 Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 00681 Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 00682 Int32 stackD [MAIN_QSORT_STACK_SIZE]; 00683 00684 Int32 nextLo[3]; 00685 Int32 nextHi[3]; 00686 Int32 nextD [3]; 00687 00688 sp = 0; 00689 mpush ( loSt, hiSt, dSt ); 00690 00691 while (sp > 0) { 00692 00693 AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 ); 00694 00695 mpop ( lo, hi, d ); 00696 if (hi - lo < MAIN_QSORT_SMALL_THRESH || 00697 d > MAIN_QSORT_DEPTH_THRESH) { 00698 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 00699 if (*budget < 0) return; 00700 continue; 00701 } 00702 00703 med = (Int32) 00704 mmed3 ( block[ptr[ lo ]+d], 00705 block[ptr[ hi ]+d], 00706 block[ptr[ (lo+hi)>>1 ]+d] ); 00707 00708 unLo = ltLo = lo; 00709 unHi = gtHi = hi; 00710 00711 while (True) { 00712 while (True) { 00713 if (unLo > unHi) break; 00714 n = ((Int32)block[ptr[unLo]+d]) - med; 00715 if (n == 0) { 00716 mswap(ptr[unLo], ptr[ltLo]); 00717 ltLo++; unLo++; continue; 00718 }; 00719 if (n > 0) break; 00720 unLo++; 00721 } 00722 while (True) { 00723 if (unLo > unHi) break; 00724 n = ((Int32)block[ptr[unHi]+d]) - med; 00725 if (n == 0) { 00726 mswap(ptr[unHi], ptr[gtHi]); 00727 gtHi--; unHi--; continue; 00728 }; 00729 if (n < 0) break; 00730 unHi--; 00731 } 00732 if (unLo > unHi) break; 00733 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 00734 } 00735 00736 AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 00737 00738 if (gtHi < ltLo) { 00739 mpush(lo, hi, d+1 ); 00740 continue; 00741 } 00742 00743 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 00744 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 00745 00746 n = lo + unLo - ltLo - 1; 00747 m = hi - (gtHi - unHi) + 1; 00748 00749 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 00750 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 00751 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 00752 00753 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 00754 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 00755 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 00756 00757 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 00758 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 00759 00760 mpush (nextLo[0], nextHi[0], nextD[0]); 00761 mpush (nextLo[1], nextHi[1], nextD[1]); 00762 mpush (nextLo[2], nextHi[2], nextD[2]); 00763 } 00764 } 00765 00766 #undef mswap 00767 #undef mvswap 00768 #undef mpush 00769 #undef mpop 00770 #undef mmin 00771 #undef mnextsize 00772 #undef mnextswap 00773 #undef MAIN_QSORT_SMALL_THRESH 00774 #undef MAIN_QSORT_DEPTH_THRESH 00775 #undef MAIN_QSORT_STACK_SIZE 00776 00777 00778 /*---------------------------------------------*/ 00779 /* Pre: 00780 nblock > N_OVERSHOOT 00781 block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 00782 ((UChar*)block32) [0 .. nblock-1] holds block 00783 ptr exists for [0 .. nblock-1] 00784 00785 Post: 00786 ((UChar*)block32) [0 .. nblock-1] holds block 00787 All other areas of block32 destroyed 00788 ftab [0 .. 65536 ] destroyed 00789 ptr [0 .. nblock-1] holds sorted order 00790 if (*budget < 0), sorting was abandoned 00791 */ 00792 00793 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 00794 #define SETMASK (1 << 21) 00795 #define CLEARMASK (~(SETMASK)) 00796 00797 static 00798 void mainSort ( UInt32* ptr, 00799 UChar* block, 00800 UInt16* quadrant, 00801 UInt32* ftab, 00802 Int32 nblock, 00803 Int32 verb, 00804 Int32* budget ) 00805 { 00806 Int32 i, j, k, ss, sb; 00807 Int32 runningOrder[256]; 00808 Bool bigDone[256]; 00809 Int32 copyStart[256]; 00810 Int32 copyEnd [256]; 00811 UChar c1; 00812 Int32 numQSorted; 00813 UInt16 s; 00814 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 00815 00816 /*-- set up the 2-byte frequency table --*/ 00817 for (i = 65536; i >= 0; i--) ftab[i] = 0; 00818 00819 j = block[0] << 8; 00820 i = nblock-1; 00821 for (; i >= 3; i -= 4) { 00822 quadrant[i] = 0; 00823 j = (j >> 8) | ( ((UInt16)block[i]) << 8); 00824 ftab[j]++; 00825 quadrant[i-1] = 0; 00826 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 00827 ftab[j]++; 00828 quadrant[i-2] = 0; 00829 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 00830 ftab[j]++; 00831 quadrant[i-3] = 0; 00832 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 00833 ftab[j]++; 00834 } 00835 for (; i >= 0; i--) { 00836 quadrant[i] = 0; 00837 j = (j >> 8) | ( ((UInt16)block[i]) << 8); 00838 ftab[j]++; 00839 } 00840 00841 /*-- (emphasises close relationship of block & quadrant) --*/ 00842 for (i = 0; i < BZ_N_OVERSHOOT; i++) { 00843 block [nblock+i] = block[i]; 00844 quadrant[nblock+i] = 0; 00845 } 00846 00847 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 00848 00849 /*-- Complete the initial radix sort --*/ 00850 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 00851 00852 s = block[0] << 8; 00853 i = nblock-1; 00854 for (; i >= 3; i -= 4) { 00855 s = (s >> 8) | (block[i] << 8); 00856 j = ftab[s] -1; 00857 ftab[s] = j; 00858 ptr[j] = i; 00859 s = (s >> 8) | (block[i-1] << 8); 00860 j = ftab[s] -1; 00861 ftab[s] = j; 00862 ptr[j] = i-1; 00863 s = (s >> 8) | (block[i-2] << 8); 00864 j = ftab[s] -1; 00865 ftab[s] = j; 00866 ptr[j] = i-2; 00867 s = (s >> 8) | (block[i-3] << 8); 00868 j = ftab[s] -1; 00869 ftab[s] = j; 00870 ptr[j] = i-3; 00871 } 00872 for (; i >= 0; i--) { 00873 s = (s >> 8) | (block[i] << 8); 00874 j = ftab[s] -1; 00875 ftab[s] = j; 00876 ptr[j] = i; 00877 } 00878 00879 /*-- 00880 Now ftab contains the first loc of every small bucket. 00881 Calculate the running order, from smallest to largest 00882 big bucket. 00883 --*/ 00884 for (i = 0; i <= 255; i++) { 00885 bigDone [i] = False; 00886 runningOrder[i] = i; 00887 } 00888 00889 { 00890 Int32 vv; 00891 Int32 h = 1; 00892 do h = 3 * h + 1; while (h <= 256); 00893 do { 00894 h = h / 3; 00895 for (i = h; i <= 255; i++) { 00896 vv = runningOrder[i]; 00897 j = i; 00898 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 00899 runningOrder[j] = runningOrder[j-h]; 00900 j = j - h; 00901 if (j <= (h - 1)) goto zero; 00902 } 00903 zero: 00904 runningOrder[j] = vv; 00905 } 00906 } while (h != 1); 00907 } 00908 00909 /*-- 00910 The main sorting loop. 00911 --*/ 00912 00913 numQSorted = 0; 00914 00915 for (i = 0; i <= 255; i++) { 00916 00917 /*-- 00918 Process big buckets, starting with the least full. 00919 Basically this is a 3-step process in which we call 00920 mainQSort3 to sort the small buckets [ss, j], but 00921 also make a big effort to avoid the calls if we can. 00922 --*/ 00923 ss = runningOrder[i]; 00924 00925 /*-- 00926 Step 1: 00927 Complete the big bucket [ss] by quicksorting 00928 any unsorted small buckets [ss, j], for j != ss. 00929 Hopefully previous pointer-scanning phases have already 00930 completed many of the small buckets [ss, j], so 00931 we don't have to sort them at all. 00932 --*/ 00933 for (j = 0; j <= 255; j++) { 00934 if (j != ss) { 00935 sb = (ss << 8) + j; 00936 if ( ! (ftab[sb] & SETMASK) ) { 00937 Int32 lo = ftab[sb] & CLEARMASK; 00938 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 00939 if (hi > lo) { 00940 if (verb >= 4) 00941 VPrintf4 ( " qsort [0x%x, 0x%x] " 00942 "done %d this %d\n", 00943 ss, j, numQSorted, hi - lo + 1 ); 00944 mainQSort3 ( 00945 ptr, block, quadrant, nblock, 00946 lo, hi, BZ_N_RADIX, budget 00947 ); 00948 numQSorted += (hi - lo + 1); 00949 if (*budget < 0) return; 00950 } 00951 } 00952 ftab[sb] |= SETMASK; 00953 } 00954 } 00955 00956 AssertH ( !bigDone[ss], 1006 ); 00957 00958 /*-- 00959 Step 2: 00960 Now scan this big bucket [ss] so as to synthesise the 00961 sorted order for small buckets [t, ss] for all t, 00962 including, magically, the bucket [ss,ss] too. 00963 This will avoid doing Real Work in subsequent Step 1's. 00964 --*/ 00965 { 00966 for (j = 0; j <= 255; j++) { 00967 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 00968 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 00969 } 00970 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 00971 k = ptr[j]-1; if (k < 0) k += nblock; 00972 c1 = block[k]; 00973 if (!bigDone[c1]) 00974 ptr[ copyStart[c1]++ ] = k; 00975 } 00976 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 00977 k = ptr[j]-1; if (k < 0) k += nblock; 00978 c1 = block[k]; 00979 if (!bigDone[c1]) 00980 ptr[ copyEnd[c1]-- ] = k; 00981 } 00982 } 00983 00984 AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 00985 || 00986 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 00987 Necessity for this case is demonstrated by compressing 00988 a sequence of approximately 48.5 million of character 00989 251; 1.0.0/1.0.1 will then die here. */ 00990 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 00991 1007 ) 00992 00993 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 00994 00995 /*-- 00996 Step 3: 00997 The [ss] big bucket is now done. Record this fact, 00998 and update the quadrant descriptors. Remember to 00999 update quadrants in the overshoot area too, if 01000 necessary. The "if (i < 255)" test merely skips 01001 this updating for the last bucket processed, since 01002 updating for the last bucket is pointless. 01003 01004 The quadrant array provides a way to incrementally 01005 cache sort orderings, as they appear, so as to 01006 make subsequent comparisons in fullGtU() complete 01007 faster. For repetitive blocks this makes a big 01008 difference (but not big enough to be able to avoid 01009 the fallback sorting mechanism, exponential radix sort). 01010 01011 The precise meaning is: at all times: 01012 01013 for 0 <= i < nblock and 0 <= j <= nblock 01014 01015 if block[i] != block[j], 01016 01017 then the relative values of quadrant[i] and 01018 quadrant[j] are meaningless. 01019 01020 else { 01021 if quadrant[i] < quadrant[j] 01022 then the string starting at i lexicographically 01023 precedes the string starting at j 01024 01025 else if quadrant[i] > quadrant[j] 01026 then the string starting at j lexicographically 01027 precedes the string starting at i 01028 01029 else 01030 the relative ordering of the strings starting 01031 at i and j has not yet been determined. 01032 } 01033 --*/ 01034 bigDone[ss] = True; 01035 01036 if (i < 255) { 01037 Int32 bbStart = ftab[ss << 8] & CLEARMASK; 01038 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 01039 Int32 shifts = 0; 01040 01041 while ((bbSize >> shifts) > 65534) shifts++; 01042 01043 for (j = bbSize-1; j >= 0; j--) { 01044 Int32 a2update = ptr[bbStart + j]; 01045 UInt16 qVal = (UInt16)(j >> shifts); 01046 quadrant[a2update] = qVal; 01047 if (a2update < BZ_N_OVERSHOOT) 01048 quadrant[a2update + nblock] = qVal; 01049 } 01050 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 01051 } 01052 01053 } 01054 01055 if (verb >= 4) 01056 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 01057 nblock, numQSorted, nblock - numQSorted ); 01058 } 01059 01060 #undef BIGFREQ 01061 #undef SETMASK 01062 #undef CLEARMASK 01063 01064 01065 /*---------------------------------------------*/ 01066 /* Pre: 01067 nblock > 0 01068 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 01069 ((UChar*)arr2) [0 .. nblock-1] holds block 01070 arr1 exists for [0 .. nblock-1] 01071 01072 Post: 01073 ((UChar*)arr2) [0 .. nblock-1] holds block 01074 All other areas of block destroyed 01075 ftab [ 0 .. 65536 ] destroyed 01076 arr1 [0 .. nblock-1] holds sorted order 01077 */ 01078 void BZ2_blockSort ( EState* s ) 01079 { 01080 UInt32* ptr = s->ptr; 01081 UChar* block = s->block; 01082 UInt32* ftab = s->ftab; 01083 Int32 nblock = s->nblock; 01084 Int32 verb = s->verbosity; 01085 Int32 wfact = s->workFactor; 01086 UInt16* quadrant; 01087 Int32 budget; 01088 Int32 budgetInit; 01089 Int32 i; 01090 01091 if (nblock < 10000) { 01092 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 01093 } else { 01094 /* Calculate the location for quadrant, remembering to get 01095 the alignment right. Assumes that &(block[0]) is at least 01096 2-byte aligned -- this should be ok since block is really 01097 the first section of arr2. 01098 */ 01099 i = nblock+BZ_N_OVERSHOOT; 01100 if (i & 1) i++; 01101 quadrant = (UInt16*)(&(block[i])); 01102 01103 /* (wfact-1) / 3 puts the default-factor-30 01104 transition point at very roughly the same place as 01105 with v0.1 and v0.9.0. 01106 Not that it particularly matters any more, since the 01107 resulting compressed stream is now the same regardless 01108 of whether or not we use the main sort or fallback sort. 01109 */ 01110 if (wfact < 1 ) wfact = 1; 01111 if (wfact > 100) wfact = 100; 01112 budgetInit = nblock * ((wfact-1) / 3); 01113 budget = budgetInit; 01114 01115 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 01116 if (verb >= 3) 01117 VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 01118 budgetInit - budget, 01119 nblock, 01120 (float)(budgetInit - budget) / 01121 (float)(nblock==0 ? 1 : nblock) ); 01122 if (budget < 0) { 01123 if (verb >= 2) 01124 VPrintf0 ( " too repetitive; using fallback" 01125 " sorting algorithm\n" ); 01126 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 01127 } 01128 } 01129 01130 s->origPtr = -1; 01131 for (i = 0; i < s->nblock; i++) 01132 if (ptr[i] == 0) 01133 { s->origPtr = i; break; }; 01134 01135 AssertH( s->origPtr != -1, 1003 ); 01136 } 01137 01138 01139 /*-------------------------------------------------------------*/ 01140 /*--- end blocksort.c ---*/ 01141 /*-------------------------------------------------------------*/