Back to index

lightning-sunbird  0.9+nobinonly
huffman.c
Go to the documentation of this file.
00001 
00002 /*-------------------------------------------------------------*/
00003 /*--- Huffman coding low-level stuff                        ---*/
00004 /*---                                             huffman.c ---*/
00005 /*-------------------------------------------------------------*/
00006 
00007 /*--
00008   This file is a part of bzip2 and/or libbzip2, a program and
00009   library for lossless, block-sorting data compression.
00010 
00011   Copyright (C) 1996-2005 Julian R Seward.  All rights reserved.
00012 
00013   Redistribution and use in source and binary forms, with or without
00014   modification, are permitted provided that the following conditions
00015   are met:
00016 
00017   1. Redistributions of source code must retain the above copyright
00018      notice, this list of conditions and the following disclaimer.
00019 
00020   2. The origin of this software must not be misrepresented; you must 
00021      not claim that you wrote the original software.  If you use this 
00022      software in a product, an acknowledgment in the product 
00023      documentation would be appreciated but is not required.
00024 
00025   3. Altered source versions must be plainly marked as such, and must
00026      not be misrepresented as being the original software.
00027 
00028   4. The name of the author may not be used to endorse or promote 
00029      products derived from this software without specific prior written 
00030      permission.
00031 
00032   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
00033   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00034   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00035   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
00036   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00037   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
00038   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00039   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00040   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00041   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00042   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00043 
00044   Julian Seward, Cambridge, UK.
00045   jseward@bzip.org
00046   bzip2/libbzip2 version 1.0 of 21 March 2000
00047 
00048   This program is based on (at least) the work of:
00049      Mike Burrows
00050      David Wheeler
00051      Peter Fenwick
00052      Alistair Moffat
00053      Radford Neal
00054      Ian H. Witten
00055      Robert Sedgewick
00056      Jon L. Bentley
00057 
00058   For more information on these sources, see the manual.
00059 --*/
00060 
00061 
00062 #include "bzlib_private.h"
00063 
00064 /*---------------------------------------------------*/
00065 #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
00066 #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
00067 #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
00068 
00069 #define ADDWEIGHTS(zw1,zw2)                           \
00070    (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
00071    (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
00072 
00073 #define UPHEAP(z)                                     \
00074 {                                                     \
00075    Int32 zz, tmp;                                     \
00076    zz = z; tmp = heap[zz];                            \
00077    while (weight[tmp] < weight[heap[zz >> 1]]) {      \
00078       heap[zz] = heap[zz >> 1];                       \
00079       zz >>= 1;                                       \
00080    }                                                  \
00081    heap[zz] = tmp;                                    \
00082 }
00083 
00084 #define DOWNHEAP(z)                                   \
00085 {                                                     \
00086    Int32 zz, yy, tmp;                                 \
00087    zz = z; tmp = heap[zz];                            \
00088    while (True) {                                     \
00089       yy = zz << 1;                                   \
00090       if (yy > nHeap) break;                          \
00091       if (yy < nHeap &&                               \
00092           weight[heap[yy+1]] < weight[heap[yy]])      \
00093          yy++;                                        \
00094       if (weight[tmp] < weight[heap[yy]]) break;      \
00095       heap[zz] = heap[yy];                            \
00096       zz = yy;                                        \
00097    }                                                  \
00098    heap[zz] = tmp;                                    \
00099 }
00100 
00101 
00102 /*---------------------------------------------------*/
00103 void BZ2_hbMakeCodeLengths ( UChar *len, 
00104                              Int32 *freq,
00105                              Int32 alphaSize,
00106                              Int32 maxLen )
00107 {
00108    /*--
00109       Nodes and heap entries run from 1.  Entry 0
00110       for both the heap and nodes is a sentinel.
00111    --*/
00112    Int32 nNodes, nHeap, n1, n2, i, j, k;
00113    Bool  tooLong;
00114 
00115    Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
00116    Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
00117    Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 
00118 
00119    for (i = 0; i < alphaSize; i++)
00120       weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
00121 
00122    while (True) {
00123 
00124       nNodes = alphaSize;
00125       nHeap = 0;
00126 
00127       heap[0] = 0;
00128       weight[0] = 0;
00129       parent[0] = -2;
00130 
00131       for (i = 1; i <= alphaSize; i++) {
00132          parent[i] = -1;
00133          nHeap++;
00134          heap[nHeap] = i;
00135          UPHEAP(nHeap);
00136       }
00137 
00138       AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
00139    
00140       while (nHeap > 1) {
00141          n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
00142          n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
00143          nNodes++;
00144          parent[n1] = parent[n2] = nNodes;
00145          weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
00146          parent[nNodes] = -1;
00147          nHeap++;
00148          heap[nHeap] = nNodes;
00149          UPHEAP(nHeap);
00150       }
00151 
00152       AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
00153 
00154       tooLong = False;
00155       for (i = 1; i <= alphaSize; i++) {
00156          j = 0;
00157          k = i;
00158          while (parent[k] >= 0) { k = parent[k]; j++; }
00159          len[i-1] = j;
00160          if (j > maxLen) tooLong = True;
00161       }
00162       
00163       if (! tooLong) break;
00164 
00165       /* 17 Oct 04: keep-going condition for the following loop used
00166          to be 'i < alphaSize', which missed the last element,
00167          theoretically leading to the possibility of the compressor
00168          looping.  However, this count-scaling step is only needed if
00169          one of the generated Huffman code words is longer than
00170          maxLen, which up to and including version 1.0.2 was 20 bits,
00171          which is extremely unlikely.  In version 1.0.3 maxLen was
00172          changed to 17 bits, which has minimal effect on compression
00173          ratio, but does mean this scaling step is used from time to
00174          time, enough to verify that it works.
00175 
00176          This means that bzip2-1.0.3 and later will only produce
00177          Huffman codes with a maximum length of 17 bits.  However, in
00178          order to preserve backwards compatibility with bitstreams
00179          produced by versions pre-1.0.3, the decompressor must still
00180          handle lengths of up to 20. */
00181 
00182       for (i = 1; i <= alphaSize; i++) {
00183          j = weight[i] >> 8;
00184          j = 1 + (j / 2);
00185          weight[i] = j << 8;
00186       }
00187    }
00188 }
00189 
00190 
00191 /*---------------------------------------------------*/
00192 void BZ2_hbAssignCodes ( Int32 *code,
00193                          UChar *length,
00194                          Int32 minLen,
00195                          Int32 maxLen,
00196                          Int32 alphaSize )
00197 {
00198    Int32 n, vec, i;
00199 
00200    vec = 0;
00201    for (n = minLen; n <= maxLen; n++) {
00202       for (i = 0; i < alphaSize; i++)
00203          if (length[i] == n) { code[i] = vec; vec++; };
00204       vec <<= 1;
00205    }
00206 }
00207 
00208 
00209 /*---------------------------------------------------*/
00210 void BZ2_hbCreateDecodeTables ( Int32 *limit,
00211                                 Int32 *base,
00212                                 Int32 *perm,
00213                                 UChar *length,
00214                                 Int32 minLen,
00215                                 Int32 maxLen,
00216                                 Int32 alphaSize )
00217 {
00218    Int32 pp, i, j, vec;
00219 
00220    pp = 0;
00221    for (i = minLen; i <= maxLen; i++)
00222       for (j = 0; j < alphaSize; j++)
00223          if (length[j] == i) { perm[pp] = j; pp++; };
00224 
00225    for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
00226    for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
00227 
00228    for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
00229 
00230    for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
00231    vec = 0;
00232 
00233    for (i = minLen; i <= maxLen; i++) {
00234       vec += (base[i+1] - base[i]);
00235       limit[i] = vec-1;
00236       vec <<= 1;
00237    }
00238    for (i = minLen + 1; i <= maxLen; i++)
00239       base[i] = ((limit[i-1] + 1) << 1) - base[i];
00240 }
00241 
00242 
00243 /*-------------------------------------------------------------*/
00244 /*--- end                                         huffman.c ---*/
00245 /*-------------------------------------------------------------*/