Back to index

lightning-sunbird  0.9+nobinonly
blocksort.c
Go to the documentation of this file.
00001 
00002 /*-------------------------------------------------------------*/
00003 /*--- Block sorting machinery                               ---*/
00004 /*---                                           blocksort.c ---*/
00005 /*-------------------------------------------------------------*/
00006 
00007 /*--
00008   This file is a part of bzip2 and/or libbzip2, a program and
00009   library for lossless, block-sorting data compression.
00010 
00011   Copyright (C) 1996-2005 Julian R Seward.  All rights reserved.
00012 
00013   Redistribution and use in source and binary forms, with or without
00014   modification, are permitted provided that the following conditions
00015   are met:
00016 
00017   1. Redistributions of source code must retain the above copyright
00018      notice, this list of conditions and the following disclaimer.
00019 
00020   2. The origin of this software must not be misrepresented; you must 
00021      not claim that you wrote the original software.  If you use this 
00022      software in a product, an acknowledgment in the product 
00023      documentation would be appreciated but is not required.
00024 
00025   3. Altered source versions must be plainly marked as such, and must
00026      not be misrepresented as being the original software.
00027 
00028   4. The name of the author may not be used to endorse or promote 
00029      products derived from this software without specific prior written 
00030      permission.
00031 
00032   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
00033   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00034   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00035   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
00036   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00037   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
00038   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00039   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00040   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00041   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00042   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00043 
00044   Julian Seward, Cambridge, UK.
00045   jseward@bzip.org
00046   bzip2/libbzip2 version 1.0 of 21 March 2000
00047 
00048   This program is based on (at least) the work of:
00049      Mike Burrows
00050      David Wheeler
00051      Peter Fenwick
00052      Alistair Moffat
00053      Radford Neal
00054      Ian H. Witten
00055      Robert Sedgewick
00056      Jon L. Bentley
00057 
00058   For more information on these sources, see the manual.
00059 
00060   To get some idea how the block sorting algorithms in this file 
00061   work, read my paper 
00062      On the Performance of BWT Sorting Algorithms
00063   in Proceedings of the IEEE Data Compression Conference 2000,
00064   Snowbird, Utah, USA, 27-30 March 2000.  The main sort in this
00065   file implements the algorithm called  cache  in the paper.
00066 --*/
00067 
00068 
00069 #include "bzlib_private.h"
00070 
00071 /*---------------------------------------------*/
00072 /*--- Fallback O(N log(N)^2) sorting        ---*/
00073 /*--- algorithm, for repetitive blocks      ---*/
00074 /*---------------------------------------------*/
00075 
00076 /*---------------------------------------------*/
00077 static 
00078 __inline__
00079 void fallbackSimpleSort ( UInt32* fmap, 
00080                           UInt32* eclass, 
00081                           Int32   lo, 
00082                           Int32   hi )
00083 {
00084    Int32 i, j, tmp;
00085    UInt32 ec_tmp;
00086 
00087    if (lo == hi) return;
00088 
00089    if (hi - lo > 3) {
00090       for ( i = hi-4; i >= lo; i-- ) {
00091          tmp = fmap[i];
00092          ec_tmp = eclass[tmp];
00093          for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
00094             fmap[j-4] = fmap[j];
00095          fmap[j-4] = tmp;
00096       }
00097    }
00098 
00099    for ( i = hi-1; i >= lo; i-- ) {
00100       tmp = fmap[i];
00101       ec_tmp = eclass[tmp];
00102       for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
00103          fmap[j-1] = fmap[j];
00104       fmap[j-1] = tmp;
00105    }
00106 }
00107 
00108 
00109 /*---------------------------------------------*/
00110 #define fswap(zz1, zz2) \
00111    { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
00112 
00113 #define fvswap(zzp1, zzp2, zzn)       \
00114 {                                     \
00115    Int32 yyp1 = (zzp1);               \
00116    Int32 yyp2 = (zzp2);               \
00117    Int32 yyn  = (zzn);                \
00118    while (yyn > 0) {                  \
00119       fswap(fmap[yyp1], fmap[yyp2]);  \
00120       yyp1++; yyp2++; yyn--;          \
00121    }                                  \
00122 }
00123 
00124 
00125 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
00126 
00127 #define fpush(lz,hz) { stackLo[sp] = lz; \
00128                        stackHi[sp] = hz; \
00129                        sp++; }
00130 
00131 #define fpop(lz,hz) { sp--;              \
00132                       lz = stackLo[sp];  \
00133                       hz = stackHi[sp]; }
00134 
00135 #define FALLBACK_QSORT_SMALL_THRESH 10
00136 #define FALLBACK_QSORT_STACK_SIZE   100
00137 
00138 
00139 static
00140 void fallbackQSort3 ( UInt32* fmap, 
00141                       UInt32* eclass,
00142                       Int32   loSt, 
00143                       Int32   hiSt )
00144 {
00145    Int32 unLo, unHi, ltLo, gtHi, n, m;
00146    Int32 sp, lo, hi;
00147    UInt32 med, r, r3;
00148    Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
00149    Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
00150 
00151    r = 0;
00152 
00153    sp = 0;
00154    fpush ( loSt, hiSt );
00155 
00156    while (sp > 0) {
00157 
00158       AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
00159 
00160       fpop ( lo, hi );
00161       if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
00162          fallbackSimpleSort ( fmap, eclass, lo, hi );
00163          continue;
00164       }
00165 
00166       /* Random partitioning.  Median of 3 sometimes fails to
00167          avoid bad cases.  Median of 9 seems to help but 
00168          looks rather expensive.  This too seems to work but
00169          is cheaper.  Guidance for the magic constants 
00170          7621 and 32768 is taken from Sedgewick's algorithms
00171          book, chapter 35.
00172       */
00173       r = ((r * 7621) + 1) % 32768;
00174       r3 = r % 3;
00175       if (r3 == 0) med = eclass[fmap[lo]]; else
00176       if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
00177                    med = eclass[fmap[hi]];
00178 
00179       unLo = ltLo = lo;
00180       unHi = gtHi = hi;
00181 
00182       while (1) {
00183          while (1) {
00184             if (unLo > unHi) break;
00185             n = (Int32)eclass[fmap[unLo]] - (Int32)med;
00186             if (n == 0) { 
00187                fswap(fmap[unLo], fmap[ltLo]); 
00188                ltLo++; unLo++; 
00189                continue; 
00190             };
00191             if (n > 0) break;
00192             unLo++;
00193          }
00194          while (1) {
00195             if (unLo > unHi) break;
00196             n = (Int32)eclass[fmap[unHi]] - (Int32)med;
00197             if (n == 0) { 
00198                fswap(fmap[unHi], fmap[gtHi]); 
00199                gtHi--; unHi--; 
00200                continue; 
00201             };
00202             if (n < 0) break;
00203             unHi--;
00204          }
00205          if (unLo > unHi) break;
00206          fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
00207       }
00208 
00209       AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
00210 
00211       if (gtHi < ltLo) continue;
00212 
00213       n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
00214       m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
00215 
00216       n = lo + unLo - ltLo - 1;
00217       m = hi - (gtHi - unHi) + 1;
00218 
00219       if (n - lo > hi - m) {
00220          fpush ( lo, n );
00221          fpush ( m, hi );
00222       } else {
00223          fpush ( m, hi );
00224          fpush ( lo, n );
00225       }
00226    }
00227 }
00228 
00229 #undef fmin
00230 #undef fpush
00231 #undef fpop
00232 #undef fswap
00233 #undef fvswap
00234 #undef FALLBACK_QSORT_SMALL_THRESH
00235 #undef FALLBACK_QSORT_STACK_SIZE
00236 
00237 
00238 /*---------------------------------------------*/
00239 /* Pre:
00240       nblock > 0
00241       eclass exists for [0 .. nblock-1]
00242       ((UChar*)eclass) [0 .. nblock-1] holds block
00243       ptr exists for [0 .. nblock-1]
00244 
00245    Post:
00246       ((UChar*)eclass) [0 .. nblock-1] holds block
00247       All other areas of eclass destroyed
00248       fmap [0 .. nblock-1] holds sorted order
00249       bhtab [ 0 .. 2+(nblock/32) ] destroyed
00250 */
00251 
00252 #define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
00253 #define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
00254 #define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
00255 #define      WORD_BH(zz)  bhtab[(zz) >> 5]
00256 #define UNALIGNED_BH(zz)  ((zz) & 0x01f)
00257 
00258 static
00259 void fallbackSort ( UInt32* fmap, 
00260                     UInt32* eclass, 
00261                     UInt32* bhtab,
00262                     Int32   nblock,
00263                     Int32   verb )
00264 {
00265    Int32 ftab[257];
00266    Int32 ftabCopy[256];
00267    Int32 H, i, j, k, l, r, cc, cc1;
00268    Int32 nNotDone;
00269    Int32 nBhtab;
00270    UChar* eclass8 = (UChar*)eclass;
00271 
00272    /*--
00273       Initial 1-char radix sort to generate
00274       initial fmap and initial BH bits.
00275    --*/
00276    if (verb >= 4)
00277       VPrintf0 ( "        bucket sorting ...\n" );
00278    for (i = 0; i < 257;    i++) ftab[i] = 0;
00279    for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
00280    for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
00281    for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
00282 
00283    for (i = 0; i < nblock; i++) {
00284       j = eclass8[i];
00285       k = ftab[j] - 1;
00286       ftab[j] = k;
00287       fmap[k] = i;
00288    }
00289 
00290    nBhtab = 2 + (nblock / 32);
00291    for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
00292    for (i = 0; i < 256; i++) SET_BH(ftab[i]);
00293 
00294    /*--
00295       Inductively refine the buckets.  Kind-of an
00296       "exponential radix sort" (!), inspired by the
00297       Manber-Myers suffix array construction algorithm.
00298    --*/
00299 
00300    /*-- set sentinel bits for block-end detection --*/
00301    for (i = 0; i < 32; i++) { 
00302       SET_BH(nblock + 2*i);
00303       CLEAR_BH(nblock + 2*i + 1);
00304    }
00305 
00306    /*-- the log(N) loop --*/
00307    H = 1;
00308    while (1) {
00309 
00310       if (verb >= 4) 
00311          VPrintf1 ( "        depth %6d has ", H );
00312 
00313       j = 0;
00314       for (i = 0; i < nblock; i++) {
00315          if (ISSET_BH(i)) j = i;
00316          k = fmap[i] - H; if (k < 0) k += nblock;
00317          eclass[k] = j;
00318       }
00319 
00320       nNotDone = 0;
00321       r = -1;
00322       while (1) {
00323 
00324         /*-- find the next non-singleton bucket --*/
00325          k = r + 1;
00326          while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
00327          if (ISSET_BH(k)) {
00328             while (WORD_BH(k) == 0xffffffff) k += 32;
00329             while (ISSET_BH(k)) k++;
00330          }
00331          l = k - 1;
00332          if (l >= nblock) break;
00333          while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
00334          if (!ISSET_BH(k)) {
00335             while (WORD_BH(k) == 0x00000000) k += 32;
00336             while (!ISSET_BH(k)) k++;
00337          }
00338          r = k - 1;
00339          if (r >= nblock) break;
00340 
00341          /*-- now [l, r] bracket current bucket --*/
00342          if (r > l) {
00343             nNotDone += (r - l + 1);
00344             fallbackQSort3 ( fmap, eclass, l, r );
00345 
00346             /*-- scan bucket and generate header bits-- */
00347             cc = -1;
00348             for (i = l; i <= r; i++) {
00349                cc1 = eclass[fmap[i]];
00350                if (cc != cc1) { SET_BH(i); cc = cc1; };
00351             }
00352          }
00353       }
00354 
00355       if (verb >= 4) 
00356          VPrintf1 ( "%6d unresolved strings\n", nNotDone );
00357 
00358       H *= 2;
00359       if (H > nblock || nNotDone == 0) break;
00360    }
00361 
00362    /*-- 
00363       Reconstruct the original block in
00364       eclass8 [0 .. nblock-1], since the
00365       previous phase destroyed it.
00366    --*/
00367    if (verb >= 4)
00368       VPrintf0 ( "        reconstructing block ...\n" );
00369    j = 0;
00370    for (i = 0; i < nblock; i++) {
00371       while (ftabCopy[j] == 0) j++;
00372       ftabCopy[j]--;
00373       eclass8[fmap[i]] = (UChar)j;
00374    }
00375    AssertH ( j < 256, 1005 );
00376 }
00377 
00378 #undef       SET_BH
00379 #undef     CLEAR_BH
00380 #undef     ISSET_BH
00381 #undef      WORD_BH
00382 #undef UNALIGNED_BH
00383 
00384 
00385 /*---------------------------------------------*/
00386 /*--- The main, O(N^2 log(N)) sorting       ---*/
00387 /*--- algorithm.  Faster for "normal"       ---*/
00388 /*--- non-repetitive blocks.                ---*/
00389 /*---------------------------------------------*/
00390 
00391 /*---------------------------------------------*/
00392 static
00393 __inline__
00394 Bool mainGtU ( UInt32  i1, 
00395                UInt32  i2,
00396                UChar*  block, 
00397                UInt16* quadrant,
00398                UInt32  nblock,
00399                Int32*  budget )
00400 {
00401    Int32  k;
00402    UChar  c1, c2;
00403    UInt16 s1, s2;
00404 
00405    AssertD ( i1 != i2, "mainGtU" );
00406    /* 1 */
00407    c1 = block[i1]; c2 = block[i2];
00408    if (c1 != c2) return (c1 > c2);
00409    i1++; i2++;
00410    /* 2 */
00411    c1 = block[i1]; c2 = block[i2];
00412    if (c1 != c2) return (c1 > c2);
00413    i1++; i2++;
00414    /* 3 */
00415    c1 = block[i1]; c2 = block[i2];
00416    if (c1 != c2) return (c1 > c2);
00417    i1++; i2++;
00418    /* 4 */
00419    c1 = block[i1]; c2 = block[i2];
00420    if (c1 != c2) return (c1 > c2);
00421    i1++; i2++;
00422    /* 5 */
00423    c1 = block[i1]; c2 = block[i2];
00424    if (c1 != c2) return (c1 > c2);
00425    i1++; i2++;
00426    /* 6 */
00427    c1 = block[i1]; c2 = block[i2];
00428    if (c1 != c2) return (c1 > c2);
00429    i1++; i2++;
00430    /* 7 */
00431    c1 = block[i1]; c2 = block[i2];
00432    if (c1 != c2) return (c1 > c2);
00433    i1++; i2++;
00434    /* 8 */
00435    c1 = block[i1]; c2 = block[i2];
00436    if (c1 != c2) return (c1 > c2);
00437    i1++; i2++;
00438    /* 9 */
00439    c1 = block[i1]; c2 = block[i2];
00440    if (c1 != c2) return (c1 > c2);
00441    i1++; i2++;
00442    /* 10 */
00443    c1 = block[i1]; c2 = block[i2];
00444    if (c1 != c2) return (c1 > c2);
00445    i1++; i2++;
00446    /* 11 */
00447    c1 = block[i1]; c2 = block[i2];
00448    if (c1 != c2) return (c1 > c2);
00449    i1++; i2++;
00450    /* 12 */
00451    c1 = block[i1]; c2 = block[i2];
00452    if (c1 != c2) return (c1 > c2);
00453    i1++; i2++;
00454 
00455    k = nblock + 8;
00456 
00457    do {
00458       /* 1 */
00459       c1 = block[i1]; c2 = block[i2];
00460       if (c1 != c2) return (c1 > c2);
00461       s1 = quadrant[i1]; s2 = quadrant[i2];
00462       if (s1 != s2) return (s1 > s2);
00463       i1++; i2++;
00464       /* 2 */
00465       c1 = block[i1]; c2 = block[i2];
00466       if (c1 != c2) return (c1 > c2);
00467       s1 = quadrant[i1]; s2 = quadrant[i2];
00468       if (s1 != s2) return (s1 > s2);
00469       i1++; i2++;
00470       /* 3 */
00471       c1 = block[i1]; c2 = block[i2];
00472       if (c1 != c2) return (c1 > c2);
00473       s1 = quadrant[i1]; s2 = quadrant[i2];
00474       if (s1 != s2) return (s1 > s2);
00475       i1++; i2++;
00476       /* 4 */
00477       c1 = block[i1]; c2 = block[i2];
00478       if (c1 != c2) return (c1 > c2);
00479       s1 = quadrant[i1]; s2 = quadrant[i2];
00480       if (s1 != s2) return (s1 > s2);
00481       i1++; i2++;
00482       /* 5 */
00483       c1 = block[i1]; c2 = block[i2];
00484       if (c1 != c2) return (c1 > c2);
00485       s1 = quadrant[i1]; s2 = quadrant[i2];
00486       if (s1 != s2) return (s1 > s2);
00487       i1++; i2++;
00488       /* 6 */
00489       c1 = block[i1]; c2 = block[i2];
00490       if (c1 != c2) return (c1 > c2);
00491       s1 = quadrant[i1]; s2 = quadrant[i2];
00492       if (s1 != s2) return (s1 > s2);
00493       i1++; i2++;
00494       /* 7 */
00495       c1 = block[i1]; c2 = block[i2];
00496       if (c1 != c2) return (c1 > c2);
00497       s1 = quadrant[i1]; s2 = quadrant[i2];
00498       if (s1 != s2) return (s1 > s2);
00499       i1++; i2++;
00500       /* 8 */
00501       c1 = block[i1]; c2 = block[i2];
00502       if (c1 != c2) return (c1 > c2);
00503       s1 = quadrant[i1]; s2 = quadrant[i2];
00504       if (s1 != s2) return (s1 > s2);
00505       i1++; i2++;
00506 
00507       if (i1 >= nblock) i1 -= nblock;
00508       if (i2 >= nblock) i2 -= nblock;
00509 
00510       k -= 8;
00511       (*budget)--;
00512    }
00513       while (k >= 0);
00514 
00515    return False;
00516 }
00517 
00518 
00519 /*---------------------------------------------*/
00520 /*--
00521    Knuth's increments seem to work better
00522    than Incerpi-Sedgewick here.  Possibly
00523    because the number of elems to sort is
00524    usually small, typically <= 20.
00525 --*/
00526 static
00527 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
00528                    9841, 29524, 88573, 265720,
00529                    797161, 2391484 };
00530 
00531 static
00532 void mainSimpleSort ( UInt32* ptr,
00533                       UChar*  block,
00534                       UInt16* quadrant,
00535                       Int32   nblock,
00536                       Int32   lo, 
00537                       Int32   hi, 
00538                       Int32   d,
00539                       Int32*  budget )
00540 {
00541    Int32 i, j, h, bigN, hp;
00542    UInt32 v;
00543 
00544    bigN = hi - lo + 1;
00545    if (bigN < 2) return;
00546 
00547    hp = 0;
00548    while (incs[hp] < bigN) hp++;
00549    hp--;
00550 
00551    for (; hp >= 0; hp--) {
00552       h = incs[hp];
00553 
00554       i = lo + h;
00555       while (True) {
00556 
00557          /*-- copy 1 --*/
00558          if (i > hi) break;
00559          v = ptr[i];
00560          j = i;
00561          while ( mainGtU ( 
00562                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
00563                  ) ) {
00564             ptr[j] = ptr[j-h];
00565             j = j - h;
00566             if (j <= (lo + h - 1)) break;
00567          }
00568          ptr[j] = v;
00569          i++;
00570 
00571          /*-- copy 2 --*/
00572          if (i > hi) break;
00573          v = ptr[i];
00574          j = i;
00575          while ( mainGtU ( 
00576                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
00577                  ) ) {
00578             ptr[j] = ptr[j-h];
00579             j = j - h;
00580             if (j <= (lo + h - 1)) break;
00581          }
00582          ptr[j] = v;
00583          i++;
00584 
00585          /*-- copy 3 --*/
00586          if (i > hi) break;
00587          v = ptr[i];
00588          j = i;
00589          while ( mainGtU ( 
00590                     ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
00591                  ) ) {
00592             ptr[j] = ptr[j-h];
00593             j = j - h;
00594             if (j <= (lo + h - 1)) break;
00595          }
00596          ptr[j] = v;
00597          i++;
00598 
00599          if (*budget < 0) return;
00600       }
00601    }
00602 }
00603 
00604 
00605 /*---------------------------------------------*/
00606 /*--
00607    The following is an implementation of
00608    an elegant 3-way quicksort for strings,
00609    described in a paper "Fast Algorithms for
00610    Sorting and Searching Strings", by Robert
00611    Sedgewick and Jon L. Bentley.
00612 --*/
00613 
00614 #define mswap(zz1, zz2) \
00615    { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
00616 
00617 #define mvswap(zzp1, zzp2, zzn)       \
00618 {                                     \
00619    Int32 yyp1 = (zzp1);               \
00620    Int32 yyp2 = (zzp2);               \
00621    Int32 yyn  = (zzn);                \
00622    while (yyn > 0) {                  \
00623       mswap(ptr[yyp1], ptr[yyp2]);    \
00624       yyp1++; yyp2++; yyn--;          \
00625    }                                  \
00626 }
00627 
00628 static 
00629 __inline__
00630 UChar mmed3 ( UChar a, UChar b, UChar c )
00631 {
00632    UChar t;
00633    if (a > b) { t = a; a = b; b = t; };
00634    if (b > c) { 
00635       b = c;
00636       if (a > b) b = a;
00637    }
00638    return b;
00639 }
00640 
00641 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
00642 
00643 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
00644                           stackHi[sp] = hz; \
00645                           stackD [sp] = dz; \
00646                           sp++; }
00647 
00648 #define mpop(lz,hz,dz) { sp--;             \
00649                          lz = stackLo[sp]; \
00650                          hz = stackHi[sp]; \
00651                          dz = stackD [sp]; }
00652 
00653 
00654 #define mnextsize(az) (nextHi[az]-nextLo[az])
00655 
00656 #define mnextswap(az,bz)                                        \
00657    { Int32 tz;                                                  \
00658      tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
00659      tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
00660      tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
00661 
00662 
00663 #define MAIN_QSORT_SMALL_THRESH 20
00664 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
00665 #define MAIN_QSORT_STACK_SIZE 100
00666 
00667 static
00668 void mainQSort3 ( UInt32* ptr,
00669                   UChar*  block,
00670                   UInt16* quadrant,
00671                   Int32   nblock,
00672                   Int32   loSt, 
00673                   Int32   hiSt, 
00674                   Int32   dSt,
00675                   Int32*  budget )
00676 {
00677    Int32 unLo, unHi, ltLo, gtHi, n, m, med;
00678    Int32 sp, lo, hi, d;
00679 
00680    Int32 stackLo[MAIN_QSORT_STACK_SIZE];
00681    Int32 stackHi[MAIN_QSORT_STACK_SIZE];
00682    Int32 stackD [MAIN_QSORT_STACK_SIZE];
00683 
00684    Int32 nextLo[3];
00685    Int32 nextHi[3];
00686    Int32 nextD [3];
00687 
00688    sp = 0;
00689    mpush ( loSt, hiSt, dSt );
00690 
00691    while (sp > 0) {
00692 
00693       AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
00694 
00695       mpop ( lo, hi, d );
00696       if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
00697           d > MAIN_QSORT_DEPTH_THRESH) {
00698          mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
00699          if (*budget < 0) return;
00700          continue;
00701       }
00702 
00703       med = (Int32) 
00704             mmed3 ( block[ptr[ lo         ]+d],
00705                     block[ptr[ hi         ]+d],
00706                     block[ptr[ (lo+hi)>>1 ]+d] );
00707 
00708       unLo = ltLo = lo;
00709       unHi = gtHi = hi;
00710 
00711       while (True) {
00712          while (True) {
00713             if (unLo > unHi) break;
00714             n = ((Int32)block[ptr[unLo]+d]) - med;
00715             if (n == 0) { 
00716                mswap(ptr[unLo], ptr[ltLo]); 
00717                ltLo++; unLo++; continue; 
00718             };
00719             if (n >  0) break;
00720             unLo++;
00721          }
00722          while (True) {
00723             if (unLo > unHi) break;
00724             n = ((Int32)block[ptr[unHi]+d]) - med;
00725             if (n == 0) { 
00726                mswap(ptr[unHi], ptr[gtHi]); 
00727                gtHi--; unHi--; continue; 
00728             };
00729             if (n <  0) break;
00730             unHi--;
00731          }
00732          if (unLo > unHi) break;
00733          mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
00734       }
00735 
00736       AssertD ( unHi == unLo-1, "mainQSort3(2)" );
00737 
00738       if (gtHi < ltLo) {
00739          mpush(lo, hi, d+1 );
00740          continue;
00741       }
00742 
00743       n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
00744       m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
00745 
00746       n = lo + unLo - ltLo - 1;
00747       m = hi - (gtHi - unHi) + 1;
00748 
00749       nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
00750       nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
00751       nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
00752 
00753       if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
00754       if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
00755       if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
00756 
00757       AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
00758       AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
00759 
00760       mpush (nextLo[0], nextHi[0], nextD[0]);
00761       mpush (nextLo[1], nextHi[1], nextD[1]);
00762       mpush (nextLo[2], nextHi[2], nextD[2]);
00763    }
00764 }
00765 
00766 #undef mswap
00767 #undef mvswap
00768 #undef mpush
00769 #undef mpop
00770 #undef mmin
00771 #undef mnextsize
00772 #undef mnextswap
00773 #undef MAIN_QSORT_SMALL_THRESH
00774 #undef MAIN_QSORT_DEPTH_THRESH
00775 #undef MAIN_QSORT_STACK_SIZE
00776 
00777 
00778 /*---------------------------------------------*/
00779 /* Pre:
00780       nblock > N_OVERSHOOT
00781       block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
00782       ((UChar*)block32) [0 .. nblock-1] holds block
00783       ptr exists for [0 .. nblock-1]
00784 
00785    Post:
00786       ((UChar*)block32) [0 .. nblock-1] holds block
00787       All other areas of block32 destroyed
00788       ftab [0 .. 65536 ] destroyed
00789       ptr [0 .. nblock-1] holds sorted order
00790       if (*budget < 0), sorting was abandoned
00791 */
00792 
00793 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
00794 #define SETMASK (1 << 21)
00795 #define CLEARMASK (~(SETMASK))
00796 
00797 static
00798 void mainSort ( UInt32* ptr, 
00799                 UChar*  block,
00800                 UInt16* quadrant, 
00801                 UInt32* ftab,
00802                 Int32   nblock,
00803                 Int32   verb,
00804                 Int32*  budget )
00805 {
00806    Int32  i, j, k, ss, sb;
00807    Int32  runningOrder[256];
00808    Bool   bigDone[256];
00809    Int32  copyStart[256];
00810    Int32  copyEnd  [256];
00811    UChar  c1;
00812    Int32  numQSorted;
00813    UInt16 s;
00814    if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
00815 
00816    /*-- set up the 2-byte frequency table --*/
00817    for (i = 65536; i >= 0; i--) ftab[i] = 0;
00818 
00819    j = block[0] << 8;
00820    i = nblock-1;
00821    for (; i >= 3; i -= 4) {
00822       quadrant[i] = 0;
00823       j = (j >> 8) | ( ((UInt16)block[i]) << 8);
00824       ftab[j]++;
00825       quadrant[i-1] = 0;
00826       j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
00827       ftab[j]++;
00828       quadrant[i-2] = 0;
00829       j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
00830       ftab[j]++;
00831       quadrant[i-3] = 0;
00832       j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
00833       ftab[j]++;
00834    }
00835    for (; i >= 0; i--) {
00836       quadrant[i] = 0;
00837       j = (j >> 8) | ( ((UInt16)block[i]) << 8);
00838       ftab[j]++;
00839    }
00840 
00841    /*-- (emphasises close relationship of block & quadrant) --*/
00842    for (i = 0; i < BZ_N_OVERSHOOT; i++) {
00843       block   [nblock+i] = block[i];
00844       quadrant[nblock+i] = 0;
00845    }
00846 
00847    if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
00848 
00849    /*-- Complete the initial radix sort --*/
00850    for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
00851 
00852    s = block[0] << 8;
00853    i = nblock-1;
00854    for (; i >= 3; i -= 4) {
00855       s = (s >> 8) | (block[i] << 8);
00856       j = ftab[s] -1;
00857       ftab[s] = j;
00858       ptr[j] = i;
00859       s = (s >> 8) | (block[i-1] << 8);
00860       j = ftab[s] -1;
00861       ftab[s] = j;
00862       ptr[j] = i-1;
00863       s = (s >> 8) | (block[i-2] << 8);
00864       j = ftab[s] -1;
00865       ftab[s] = j;
00866       ptr[j] = i-2;
00867       s = (s >> 8) | (block[i-3] << 8);
00868       j = ftab[s] -1;
00869       ftab[s] = j;
00870       ptr[j] = i-3;
00871    }
00872    for (; i >= 0; i--) {
00873       s = (s >> 8) | (block[i] << 8);
00874       j = ftab[s] -1;
00875       ftab[s] = j;
00876       ptr[j] = i;
00877    }
00878 
00879    /*--
00880       Now ftab contains the first loc of every small bucket.
00881       Calculate the running order, from smallest to largest
00882       big bucket.
00883    --*/
00884    for (i = 0; i <= 255; i++) {
00885       bigDone     [i] = False;
00886       runningOrder[i] = i;
00887    }
00888 
00889    {
00890       Int32 vv;
00891       Int32 h = 1;
00892       do h = 3 * h + 1; while (h <= 256);
00893       do {
00894          h = h / 3;
00895          for (i = h; i <= 255; i++) {
00896             vv = runningOrder[i];
00897             j = i;
00898             while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
00899                runningOrder[j] = runningOrder[j-h];
00900                j = j - h;
00901                if (j <= (h - 1)) goto zero;
00902             }
00903             zero:
00904             runningOrder[j] = vv;
00905          }
00906       } while (h != 1);
00907    }
00908 
00909    /*--
00910       The main sorting loop.
00911    --*/
00912 
00913    numQSorted = 0;
00914 
00915    for (i = 0; i <= 255; i++) {
00916 
00917       /*--
00918          Process big buckets, starting with the least full.
00919          Basically this is a 3-step process in which we call
00920          mainQSort3 to sort the small buckets [ss, j], but
00921          also make a big effort to avoid the calls if we can.
00922       --*/
00923       ss = runningOrder[i];
00924 
00925       /*--
00926          Step 1:
00927          Complete the big bucket [ss] by quicksorting
00928          any unsorted small buckets [ss, j], for j != ss.  
00929          Hopefully previous pointer-scanning phases have already
00930          completed many of the small buckets [ss, j], so
00931          we don't have to sort them at all.
00932       --*/
00933       for (j = 0; j <= 255; j++) {
00934          if (j != ss) {
00935             sb = (ss << 8) + j;
00936             if ( ! (ftab[sb] & SETMASK) ) {
00937                Int32 lo = ftab[sb]   & CLEARMASK;
00938                Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
00939                if (hi > lo) {
00940                   if (verb >= 4)
00941                      VPrintf4 ( "        qsort [0x%x, 0x%x]   "
00942                                 "done %d   this %d\n",
00943                                 ss, j, numQSorted, hi - lo + 1 );
00944                   mainQSort3 ( 
00945                      ptr, block, quadrant, nblock, 
00946                      lo, hi, BZ_N_RADIX, budget 
00947                   );   
00948                   numQSorted += (hi - lo + 1);
00949                   if (*budget < 0) return;
00950                }
00951             }
00952             ftab[sb] |= SETMASK;
00953          }
00954       }
00955 
00956       AssertH ( !bigDone[ss], 1006 );
00957 
00958       /*--
00959          Step 2:
00960          Now scan this big bucket [ss] so as to synthesise the
00961          sorted order for small buckets [t, ss] for all t,
00962          including, magically, the bucket [ss,ss] too.
00963          This will avoid doing Real Work in subsequent Step 1's.
00964       --*/
00965       {
00966          for (j = 0; j <= 255; j++) {
00967             copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
00968             copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
00969          }
00970          for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
00971             k = ptr[j]-1; if (k < 0) k += nblock;
00972             c1 = block[k];
00973             if (!bigDone[c1])
00974                ptr[ copyStart[c1]++ ] = k;
00975          }
00976          for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
00977             k = ptr[j]-1; if (k < 0) k += nblock;
00978             c1 = block[k];
00979             if (!bigDone[c1]) 
00980                ptr[ copyEnd[c1]-- ] = k;
00981          }
00982       }
00983 
00984       AssertH ( (copyStart[ss]-1 == copyEnd[ss])
00985                 || 
00986                 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
00987                    Necessity for this case is demonstrated by compressing 
00988                    a sequence of approximately 48.5 million of character 
00989                    251; 1.0.0/1.0.1 will then die here. */
00990                 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
00991                 1007 )
00992 
00993       for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
00994 
00995       /*--
00996          Step 3:
00997          The [ss] big bucket is now done.  Record this fact,
00998          and update the quadrant descriptors.  Remember to
00999          update quadrants in the overshoot area too, if
01000          necessary.  The "if (i < 255)" test merely skips
01001          this updating for the last bucket processed, since
01002          updating for the last bucket is pointless.
01003 
01004          The quadrant array provides a way to incrementally
01005          cache sort orderings, as they appear, so as to 
01006          make subsequent comparisons in fullGtU() complete
01007          faster.  For repetitive blocks this makes a big
01008          difference (but not big enough to be able to avoid
01009          the fallback sorting mechanism, exponential radix sort).
01010 
01011          The precise meaning is: at all times:
01012 
01013             for 0 <= i < nblock and 0 <= j <= nblock
01014 
01015             if block[i] != block[j], 
01016 
01017                then the relative values of quadrant[i] and 
01018                     quadrant[j] are meaningless.
01019 
01020                else {
01021                   if quadrant[i] < quadrant[j]
01022                      then the string starting at i lexicographically
01023                      precedes the string starting at j
01024 
01025                   else if quadrant[i] > quadrant[j]
01026                      then the string starting at j lexicographically
01027                      precedes the string starting at i
01028 
01029                   else
01030                      the relative ordering of the strings starting
01031                      at i and j has not yet been determined.
01032                }
01033       --*/
01034       bigDone[ss] = True;
01035 
01036       if (i < 255) {
01037          Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
01038          Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
01039          Int32 shifts   = 0;
01040 
01041          while ((bbSize >> shifts) > 65534) shifts++;
01042 
01043          for (j = bbSize-1; j >= 0; j--) {
01044             Int32 a2update     = ptr[bbStart + j];
01045             UInt16 qVal        = (UInt16)(j >> shifts);
01046             quadrant[a2update] = qVal;
01047             if (a2update < BZ_N_OVERSHOOT)
01048                quadrant[a2update + nblock] = qVal;
01049          }
01050          AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
01051       }
01052 
01053    }
01054 
01055    if (verb >= 4)
01056       VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
01057                  nblock, numQSorted, nblock - numQSorted );
01058 }
01059 
01060 #undef BIGFREQ
01061 #undef SETMASK
01062 #undef CLEARMASK
01063 
01064 
01065 /*---------------------------------------------*/
01066 /* Pre:
01067       nblock > 0
01068       arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
01069       ((UChar*)arr2)  [0 .. nblock-1] holds block
01070       arr1 exists for [0 .. nblock-1]
01071 
01072    Post:
01073       ((UChar*)arr2) [0 .. nblock-1] holds block
01074       All other areas of block destroyed
01075       ftab [ 0 .. 65536 ] destroyed
01076       arr1 [0 .. nblock-1] holds sorted order
01077 */
01078 void BZ2_blockSort ( EState* s )
01079 {
01080    UInt32* ptr    = s->ptr; 
01081    UChar*  block  = s->block;
01082    UInt32* ftab   = s->ftab;
01083    Int32   nblock = s->nblock;
01084    Int32   verb   = s->verbosity;
01085    Int32   wfact  = s->workFactor;
01086    UInt16* quadrant;
01087    Int32   budget;
01088    Int32   budgetInit;
01089    Int32   i;
01090 
01091    if (nblock < 10000) {
01092       fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
01093    } else {
01094       /* Calculate the location for quadrant, remembering to get
01095          the alignment right.  Assumes that &(block[0]) is at least
01096          2-byte aligned -- this should be ok since block is really
01097          the first section of arr2.
01098       */
01099       i = nblock+BZ_N_OVERSHOOT;
01100       if (i & 1) i++;
01101       quadrant = (UInt16*)(&(block[i]));
01102 
01103       /* (wfact-1) / 3 puts the default-factor-30
01104          transition point at very roughly the same place as 
01105          with v0.1 and v0.9.0.  
01106          Not that it particularly matters any more, since the
01107          resulting compressed stream is now the same regardless
01108          of whether or not we use the main sort or fallback sort.
01109       */
01110       if (wfact < 1  ) wfact = 1;
01111       if (wfact > 100) wfact = 100;
01112       budgetInit = nblock * ((wfact-1) / 3);
01113       budget = budgetInit;
01114 
01115       mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
01116       if (verb >= 3) 
01117          VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
01118                     budgetInit - budget,
01119                     nblock, 
01120                     (float)(budgetInit - budget) /
01121                     (float)(nblock==0 ? 1 : nblock) ); 
01122       if (budget < 0) {
01123          if (verb >= 2) 
01124             VPrintf0 ( "    too repetitive; using fallback"
01125                        " sorting algorithm\n" );
01126          fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
01127       }
01128    }
01129 
01130    s->origPtr = -1;
01131    for (i = 0; i < s->nblock; i++)
01132       if (ptr[i] == 0)
01133          { s->origPtr = i; break; };
01134 
01135    AssertH( s->origPtr != -1, 1003 );
01136 }
01137 
01138 
01139 /*-------------------------------------------------------------*/
01140 /*--- end                                       blocksort.c ---*/
01141 /*-------------------------------------------------------------*/