Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* inftrees.c -- generate Huffman trees for efficient decoding |
| 2 | * Copyright (C) 1995-1998 Mark Adler |
| 3 | * For conditions of distribution and use, see copyright notice in zlib.h |
| 4 | */ |
| 5 | |
| 6 | #include <linux/zutil.h> |
| 7 | #include "inftrees.h" |
| 8 | #include "infutil.h" |
| 9 | |
| 10 | static const char inflate_copyright[] __attribute_used__ = |
| 11 | " inflate 1.1.3 Copyright 1995-1998 Mark Adler "; |
| 12 | /* |
| 13 | If you use the zlib library in a product, an acknowledgment is welcome |
| 14 | in the documentation of your product. If for some reason you cannot |
| 15 | include such an acknowledgment, I would appreciate that you keep this |
| 16 | copyright string in the executable of your product. |
| 17 | */ |
| 18 | struct internal_state; |
| 19 | |
| 20 | /* simplify the use of the inflate_huft type with some defines */ |
| 21 | #define exop word.what.Exop |
| 22 | #define bits word.what.Bits |
| 23 | |
| 24 | |
| 25 | static int huft_build ( |
| 26 | uInt *, /* code lengths in bits */ |
| 27 | uInt, /* number of codes */ |
| 28 | uInt, /* number of "simple" codes */ |
| 29 | const uInt *, /* list of base values for non-simple codes */ |
| 30 | const uInt *, /* list of extra bits for non-simple codes */ |
| 31 | inflate_huft **, /* result: starting table */ |
| 32 | uInt *, /* maximum lookup bits (returns actual) */ |
| 33 | inflate_huft *, /* space for trees */ |
| 34 | uInt *, /* hufts used in space */ |
| 35 | uInt * ); /* space for values */ |
| 36 | |
| 37 | /* Tables for deflate from PKZIP's appnote.txt. */ |
| 38 | static const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */ |
| 39 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, |
| 40 | 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; |
| 41 | /* see note #13 above about 258 */ |
| 42 | static const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */ |
| 43 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, |
| 44 | 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */ |
| 45 | static const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */ |
| 46 | 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, |
| 47 | 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, |
| 48 | 8193, 12289, 16385, 24577}; |
| 49 | static const uInt cpdext[30] = { /* Extra bits for distance codes */ |
| 50 | 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, |
| 51 | 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, |
| 52 | 12, 12, 13, 13}; |
| 53 | |
| 54 | /* |
| 55 | Huffman code decoding is performed using a multi-level table lookup. |
| 56 | The fastest way to decode is to simply build a lookup table whose |
| 57 | size is determined by the longest code. However, the time it takes |
| 58 | to build this table can also be a factor if the data being decoded |
| 59 | is not very long. The most common codes are necessarily the |
| 60 | shortest codes, so those codes dominate the decoding time, and hence |
| 61 | the speed. The idea is you can have a shorter table that decodes the |
| 62 | shorter, more probable codes, and then point to subsidiary tables for |
| 63 | the longer codes. The time it costs to decode the longer codes is |
| 64 | then traded against the time it takes to make longer tables. |
| 65 | |
| 66 | This results of this trade are in the variables lbits and dbits |
| 67 | below. lbits is the number of bits the first level table for literal/ |
| 68 | length codes can decode in one step, and dbits is the same thing for |
| 69 | the distance codes. Subsequent tables are also less than or equal to |
| 70 | those sizes. These values may be adjusted either when all of the |
| 71 | codes are shorter than that, in which case the longest code length in |
| 72 | bits is used, or when the shortest code is *longer* than the requested |
| 73 | table size, in which case the length of the shortest code in bits is |
| 74 | used. |
| 75 | |
| 76 | There are two different values for the two tables, since they code a |
| 77 | different number of possibilities each. The literal/length table |
| 78 | codes 286 possible values, or in a flat code, a little over eight |
| 79 | bits. The distance table codes 30 possible values, or a little less |
| 80 | than five bits, flat. The optimum values for speed end up being |
| 81 | about one bit more than those, so lbits is 8+1 and dbits is 5+1. |
| 82 | The optimum values may differ though from machine to machine, and |
| 83 | possibly even between compilers. Your mileage may vary. |
| 84 | */ |
| 85 | |
| 86 | |
| 87 | /* If BMAX needs to be larger than 16, then h and x[] should be uLong. */ |
| 88 | #define BMAX 15 /* maximum bit length of any code */ |
| 89 | |
| 90 | static int huft_build( |
| 91 | uInt *b, /* code lengths in bits (all assumed <= BMAX) */ |
| 92 | uInt n, /* number of codes (assumed <= 288) */ |
| 93 | uInt s, /* number of simple-valued codes (0..s-1) */ |
| 94 | const uInt *d, /* list of base values for non-simple codes */ |
| 95 | const uInt *e, /* list of extra bits for non-simple codes */ |
| 96 | inflate_huft **t, /* result: starting table */ |
| 97 | uInt *m, /* maximum lookup bits, returns actual */ |
| 98 | inflate_huft *hp, /* space for trees */ |
| 99 | uInt *hn, /* hufts used in space */ |
| 100 | uInt *v /* working area: values in order of bit length */ |
| 101 | ) |
| 102 | /* Given a list of code lengths and a maximum table size, make a set of |
| 103 | tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR |
| 104 | if the given code set is incomplete (the tables are still built in this |
| 105 | case), Z_DATA_ERROR if the input is invalid (an over-subscribed set of |
| 106 | lengths), or Z_MEM_ERROR if not enough memory. */ |
| 107 | { |
| 108 | |
| 109 | uInt a; /* counter for codes of length k */ |
| 110 | uInt c[BMAX+1]; /* bit length count table */ |
| 111 | uInt f; /* i repeats in table every f entries */ |
| 112 | int g; /* maximum code length */ |
| 113 | int h; /* table level */ |
| 114 | register uInt i; /* counter, current code */ |
| 115 | register uInt j; /* counter */ |
| 116 | register int k; /* number of bits in current code */ |
| 117 | int l; /* bits per table (returned in m) */ |
| 118 | uInt mask; /* (1 << w) - 1, to avoid cc -O bug on HP */ |
| 119 | register uInt *p; /* pointer into c[], b[], or v[] */ |
| 120 | inflate_huft *q; /* points to current table */ |
| 121 | struct inflate_huft_s r; /* table entry for structure assignment */ |
| 122 | inflate_huft *u[BMAX]; /* table stack */ |
| 123 | register int w; /* bits before this table == (l * h) */ |
| 124 | uInt x[BMAX+1]; /* bit offsets, then code stack */ |
| 125 | uInt *xp; /* pointer into x */ |
| 126 | int y; /* number of dummy codes added */ |
| 127 | uInt z; /* number of entries in current table */ |
| 128 | |
| 129 | |
| 130 | /* Generate counts for each bit length */ |
| 131 | p = c; |
| 132 | #define C0 *p++ = 0; |
| 133 | #define C2 C0 C0 C0 C0 |
| 134 | #define C4 C2 C2 C2 C2 |
| 135 | C4 /* clear c[]--assume BMAX+1 is 16 */ |
| 136 | p = b; i = n; |
| 137 | do { |
| 138 | c[*p++]++; /* assume all entries <= BMAX */ |
| 139 | } while (--i); |
| 140 | if (c[0] == n) /* null input--all zero length codes */ |
| 141 | { |
| 142 | *t = NULL; |
| 143 | *m = 0; |
| 144 | return Z_OK; |
| 145 | } |
| 146 | |
| 147 | |
| 148 | /* Find minimum and maximum length, bound *m by those */ |
| 149 | l = *m; |
| 150 | for (j = 1; j <= BMAX; j++) |
| 151 | if (c[j]) |
| 152 | break; |
| 153 | k = j; /* minimum code length */ |
| 154 | if ((uInt)l < j) |
| 155 | l = j; |
| 156 | for (i = BMAX; i; i--) |
| 157 | if (c[i]) |
| 158 | break; |
| 159 | g = i; /* maximum code length */ |
| 160 | if ((uInt)l > i) |
| 161 | l = i; |
| 162 | *m = l; |
| 163 | |
| 164 | |
| 165 | /* Adjust last length count to fill out codes, if needed */ |
| 166 | for (y = 1 << j; j < i; j++, y <<= 1) |
| 167 | if ((y -= c[j]) < 0) |
| 168 | return Z_DATA_ERROR; |
| 169 | if ((y -= c[i]) < 0) |
| 170 | return Z_DATA_ERROR; |
| 171 | c[i] += y; |
| 172 | |
| 173 | |
| 174 | /* Generate starting offsets into the value table for each length */ |
| 175 | x[1] = j = 0; |
| 176 | p = c + 1; xp = x + 2; |
| 177 | while (--i) { /* note that i == g from above */ |
| 178 | *xp++ = (j += *p++); |
| 179 | } |
| 180 | |
| 181 | |
| 182 | /* Make a table of values in order of bit lengths */ |
| 183 | p = b; i = 0; |
| 184 | do { |
| 185 | if ((j = *p++) != 0) |
| 186 | v[x[j]++] = i; |
| 187 | } while (++i < n); |
| 188 | n = x[g]; /* set n to length of v */ |
| 189 | |
| 190 | |
| 191 | /* Generate the Huffman codes and for each, make the table entries */ |
| 192 | x[0] = i = 0; /* first Huffman code is zero */ |
| 193 | p = v; /* grab values in bit order */ |
| 194 | h = -1; /* no tables yet--level -1 */ |
| 195 | w = -l; /* bits decoded == (l * h) */ |
| 196 | u[0] = NULL; /* just to keep compilers happy */ |
| 197 | q = NULL; /* ditto */ |
| 198 | z = 0; /* ditto */ |
| 199 | |
| 200 | /* go through the bit lengths (k already is bits in shortest code) */ |
| 201 | for (; k <= g; k++) |
| 202 | { |
| 203 | a = c[k]; |
| 204 | while (a--) |
| 205 | { |
| 206 | /* here i is the Huffman code of length k bits for value *p */ |
| 207 | /* make tables up to required level */ |
| 208 | while (k > w + l) |
| 209 | { |
| 210 | h++; |
| 211 | w += l; /* previous table always l bits */ |
| 212 | |
| 213 | /* compute minimum size table less than or equal to l bits */ |
| 214 | z = g - w; |
| 215 | z = z > (uInt)l ? l : z; /* table size upper limit */ |
| 216 | if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ |
| 217 | { /* too few codes for k-w bit table */ |
| 218 | f -= a + 1; /* deduct codes from patterns left */ |
| 219 | xp = c + k; |
| 220 | if (j < z) |
| 221 | while (++j < z) /* try smaller tables up to z bits */ |
| 222 | { |
| 223 | if ((f <<= 1) <= *++xp) |
| 224 | break; /* enough codes to use up j bits */ |
| 225 | f -= *xp; /* else deduct codes from patterns */ |
| 226 | } |
| 227 | } |
| 228 | z = 1 << j; /* table entries for j-bit table */ |
| 229 | |
| 230 | /* allocate new table */ |
| 231 | if (*hn + z > MANY) /* (note: doesn't matter for fixed) */ |
| 232 | return Z_DATA_ERROR; /* overflow of MANY */ |
| 233 | u[h] = q = hp + *hn; |
| 234 | *hn += z; |
| 235 | |
| 236 | /* connect to last table, if there is one */ |
| 237 | if (h) |
| 238 | { |
| 239 | x[h] = i; /* save pattern for backing up */ |
| 240 | r.bits = (Byte)l; /* bits to dump before this table */ |
| 241 | r.exop = (Byte)j; /* bits in this table */ |
| 242 | j = i >> (w - l); |
| 243 | r.base = (uInt)(q - u[h-1] - j); /* offset to this table */ |
| 244 | u[h-1][j] = r; /* connect to last table */ |
| 245 | } |
| 246 | else |
| 247 | *t = q; /* first table is returned result */ |
| 248 | } |
| 249 | |
| 250 | /* set up table entry in r */ |
| 251 | r.bits = (Byte)(k - w); |
| 252 | if (p >= v + n) |
| 253 | r.exop = 128 + 64; /* out of values--invalid code */ |
| 254 | else if (*p < s) |
| 255 | { |
| 256 | r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */ |
| 257 | r.base = *p++; /* simple code is just the value */ |
| 258 | } |
| 259 | else |
| 260 | { |
| 261 | r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */ |
| 262 | r.base = d[*p++ - s]; |
| 263 | } |
| 264 | |
| 265 | /* fill code-like entries with r */ |
| 266 | f = 1 << (k - w); |
| 267 | for (j = i >> w; j < z; j += f) |
| 268 | q[j] = r; |
| 269 | |
| 270 | /* backwards increment the k-bit code i */ |
| 271 | for (j = 1 << (k - 1); i & j; j >>= 1) |
| 272 | i ^= j; |
| 273 | i ^= j; |
| 274 | |
| 275 | /* backup over finished tables */ |
| 276 | mask = (1 << w) - 1; /* needed on HP, cc -O bug */ |
| 277 | while ((i & mask) != x[h]) |
| 278 | { |
| 279 | h--; /* don't need to update q */ |
| 280 | w -= l; |
| 281 | mask = (1 << w) - 1; |
| 282 | } |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | |
| 287 | /* Return Z_BUF_ERROR if we were given an incomplete table */ |
| 288 | return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK; |
| 289 | } |
| 290 | |
| 291 | |
| 292 | int zlib_inflate_trees_bits( |
| 293 | uInt *c, /* 19 code lengths */ |
| 294 | uInt *bb, /* bits tree desired/actual depth */ |
| 295 | inflate_huft **tb, /* bits tree result */ |
| 296 | inflate_huft *hp, /* space for trees */ |
| 297 | z_streamp z /* for messages */ |
| 298 | ) |
| 299 | { |
| 300 | int r; |
| 301 | uInt hn = 0; /* hufts used in space */ |
| 302 | uInt *v; /* work area for huft_build */ |
| 303 | |
| 304 | v = WS(z)->tree_work_area_1; |
| 305 | r = huft_build(c, 19, 19, NULL, NULL, tb, bb, hp, &hn, v); |
| 306 | if (r == Z_DATA_ERROR) |
| 307 | z->msg = (char*)"oversubscribed dynamic bit lengths tree"; |
| 308 | else if (r == Z_BUF_ERROR || *bb == 0) |
| 309 | { |
| 310 | z->msg = (char*)"incomplete dynamic bit lengths tree"; |
| 311 | r = Z_DATA_ERROR; |
| 312 | } |
| 313 | return r; |
| 314 | } |
| 315 | |
| 316 | int zlib_inflate_trees_dynamic( |
| 317 | uInt nl, /* number of literal/length codes */ |
| 318 | uInt nd, /* number of distance codes */ |
| 319 | uInt *c, /* that many (total) code lengths */ |
| 320 | uInt *bl, /* literal desired/actual bit depth */ |
| 321 | uInt *bd, /* distance desired/actual bit depth */ |
| 322 | inflate_huft **tl, /* literal/length tree result */ |
| 323 | inflate_huft **td, /* distance tree result */ |
| 324 | inflate_huft *hp, /* space for trees */ |
| 325 | z_streamp z /* for messages */ |
| 326 | ) |
| 327 | { |
| 328 | int r; |
| 329 | uInt hn = 0; /* hufts used in space */ |
| 330 | uInt *v; /* work area for huft_build */ |
| 331 | |
| 332 | /* allocate work area */ |
| 333 | v = WS(z)->tree_work_area_2; |
| 334 | |
| 335 | /* build literal/length tree */ |
| 336 | r = huft_build(c, nl, 257, cplens, cplext, tl, bl, hp, &hn, v); |
| 337 | if (r != Z_OK || *bl == 0) |
| 338 | { |
| 339 | if (r == Z_DATA_ERROR) |
| 340 | z->msg = (char*)"oversubscribed literal/length tree"; |
| 341 | else if (r != Z_MEM_ERROR) |
| 342 | { |
| 343 | z->msg = (char*)"incomplete literal/length tree"; |
| 344 | r = Z_DATA_ERROR; |
| 345 | } |
| 346 | return r; |
| 347 | } |
| 348 | |
| 349 | /* build distance tree */ |
| 350 | r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, hp, &hn, v); |
| 351 | if (r != Z_OK || (*bd == 0 && nl > 257)) |
| 352 | { |
| 353 | if (r == Z_DATA_ERROR) |
| 354 | z->msg = (char*)"oversubscribed distance tree"; |
| 355 | else if (r == Z_BUF_ERROR) { |
| 356 | #ifdef PKZIP_BUG_WORKAROUND |
| 357 | r = Z_OK; |
| 358 | } |
| 359 | #else |
| 360 | z->msg = (char*)"incomplete distance tree"; |
| 361 | r = Z_DATA_ERROR; |
| 362 | } |
| 363 | else if (r != Z_MEM_ERROR) |
| 364 | { |
| 365 | z->msg = (char*)"empty distance tree with lengths"; |
| 366 | r = Z_DATA_ERROR; |
| 367 | } |
| 368 | return r; |
| 369 | #endif |
| 370 | } |
| 371 | |
| 372 | /* done */ |
| 373 | return Z_OK; |
| 374 | } |
| 375 | |
| 376 | |
| 377 | int zlib_inflate_trees_fixed( |
| 378 | uInt *bl, /* literal desired/actual bit depth */ |
| 379 | uInt *bd, /* distance desired/actual bit depth */ |
| 380 | inflate_huft **tl, /* literal/length tree result */ |
| 381 | inflate_huft **td, /* distance tree result */ |
| 382 | inflate_huft *hp, /* space for trees */ |
| 383 | z_streamp z /* for memory allocation */ |
| 384 | ) |
| 385 | { |
| 386 | int i; /* temporary variable */ |
| 387 | unsigned l[288]; /* length list for huft_build */ |
| 388 | uInt *v; /* work area for huft_build */ |
| 389 | |
| 390 | /* set up literal table */ |
| 391 | for (i = 0; i < 144; i++) |
| 392 | l[i] = 8; |
| 393 | for (; i < 256; i++) |
| 394 | l[i] = 9; |
| 395 | for (; i < 280; i++) |
| 396 | l[i] = 7; |
| 397 | for (; i < 288; i++) /* make a complete, but wrong code set */ |
| 398 | l[i] = 8; |
| 399 | *bl = 9; |
| 400 | v = WS(z)->tree_work_area_1; |
| 401 | if ((i = huft_build(l, 288, 257, cplens, cplext, tl, bl, hp, &i, v)) != 0) |
| 402 | return i; |
| 403 | |
| 404 | /* set up distance table */ |
| 405 | for (i = 0; i < 30; i++) /* make an incomplete code set */ |
| 406 | l[i] = 5; |
| 407 | *bd = 5; |
| 408 | if ((i = huft_build(l, 30, 0, cpdist, cpdext, td, bd, hp, &i, v)) > 1) |
| 409 | return i; |
| 410 | |
| 411 | return Z_OK; |
| 412 | } |