Nick Terrell | 73f3d1b | 2017-08-09 19:35:53 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Huffman encoder, part of New Generation Entropy library |
| 3 | * Copyright (C) 2013-2016, Yann Collet. |
| 4 | * |
| 5 | * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
| 6 | * |
| 7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions are |
| 9 | * met: |
| 10 | * |
| 11 | * * Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * * Redistributions in binary form must reproduce the above |
| 14 | * copyright notice, this list of conditions and the following disclaimer |
| 15 | * in the documentation and/or other materials provided with the |
| 16 | * distribution. |
| 17 | * |
| 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | * |
| 30 | * This program is free software; you can redistribute it and/or modify it under |
| 31 | * the terms of the GNU General Public License version 2 as published by the |
| 32 | * Free Software Foundation. This program is dual-licensed; you may select |
| 33 | * either version 2 of the GNU General Public License ("GPL") or BSD license |
| 34 | * ("BSD"). |
| 35 | * |
| 36 | * You can contact the author at : |
| 37 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy |
| 38 | */ |
| 39 | |
| 40 | /* ************************************************************** |
| 41 | * Includes |
| 42 | ****************************************************************/ |
| 43 | #include "bitstream.h" |
| 44 | #include "fse.h" /* header compression */ |
| 45 | #include "huf.h" |
| 46 | #include <linux/kernel.h> |
| 47 | #include <linux/string.h> /* memcpy, memset */ |
| 48 | |
| 49 | /* ************************************************************** |
| 50 | * Error Management |
| 51 | ****************************************************************/ |
| 52 | #define HUF_STATIC_ASSERT(c) \ |
| 53 | { \ |
| 54 | enum { HUF_static_assert = 1 / (int)(!!(c)) }; \ |
| 55 | } /* use only *after* variable declarations */ |
| 56 | #define CHECK_V_F(e, f) \ |
| 57 | size_t const e = f; \ |
| 58 | if (ERR_isError(e)) \ |
| 59 | return f |
| 60 | #define CHECK_F(f) \ |
| 61 | { \ |
| 62 | CHECK_V_F(_var_err__, f); \ |
| 63 | } |
| 64 | |
| 65 | /* ************************************************************** |
| 66 | * Utils |
| 67 | ****************************************************************/ |
| 68 | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) |
| 69 | { |
| 70 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); |
| 71 | } |
| 72 | |
| 73 | /* ******************************************************* |
| 74 | * HUF : Huffman block compression |
| 75 | *********************************************************/ |
| 76 | /* HUF_compressWeights() : |
| 77 | * Same as FSE_compress(), but dedicated to huff0's weights compression. |
| 78 | * The use case needs much less stack memory. |
| 79 | * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. |
| 80 | */ |
| 81 | #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 |
| 82 | size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize) |
| 83 | { |
| 84 | BYTE *const ostart = (BYTE *)dst; |
| 85 | BYTE *op = ostart; |
| 86 | BYTE *const oend = ostart + dstSize; |
| 87 | |
| 88 | U32 maxSymbolValue = HUF_TABLELOG_MAX; |
| 89 | U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; |
| 90 | |
| 91 | FSE_CTable *CTable; |
| 92 | U32 *count; |
| 93 | S16 *norm; |
| 94 | size_t spaceUsed32 = 0; |
| 95 | |
| 96 | HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32)); |
| 97 | |
| 98 | CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32); |
| 99 | spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX); |
| 100 | count = (U32 *)workspace + spaceUsed32; |
| 101 | spaceUsed32 += HUF_TABLELOG_MAX + 1; |
| 102 | norm = (S16 *)((U32 *)workspace + spaceUsed32); |
| 103 | spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2; |
| 104 | |
| 105 | if ((spaceUsed32 << 2) > workspaceSize) |
| 106 | return ERROR(tableLog_tooLarge); |
| 107 | workspace = (U32 *)workspace + spaceUsed32; |
| 108 | workspaceSize -= (spaceUsed32 << 2); |
| 109 | |
| 110 | /* init conditions */ |
| 111 | if (wtSize <= 1) |
| 112 | return 0; /* Not compressible */ |
| 113 | |
| 114 | /* Scan input and build symbol stats */ |
| 115 | { |
| 116 | CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize)); |
| 117 | if (maxCount == wtSize) |
| 118 | return 1; /* only a single symbol in src : rle */ |
| 119 | if (maxCount == 1) |
| 120 | return 0; /* each symbol present maximum once => not compressible */ |
| 121 | } |
| 122 | |
| 123 | tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); |
| 124 | CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue)); |
| 125 | |
| 126 | /* Write table description header */ |
| 127 | { |
| 128 | CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog)); |
| 129 | op += hSize; |
| 130 | } |
| 131 | |
| 132 | /* Compress */ |
| 133 | CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize)); |
| 134 | { |
| 135 | CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable)); |
| 136 | if (cSize == 0) |
| 137 | return 0; /* not enough space for compressed data */ |
| 138 | op += cSize; |
| 139 | } |
| 140 | |
| 141 | return op - ostart; |
| 142 | } |
| 143 | |
| 144 | struct HUF_CElt_s { |
| 145 | U16 val; |
| 146 | BYTE nbBits; |
| 147 | }; /* typedef'd to HUF_CElt within "huf.h" */ |
| 148 | |
| 149 | /*! HUF_writeCTable_wksp() : |
| 150 | `CTable` : Huffman tree to save, using huf representation. |
| 151 | @return : size of saved CTable */ |
| 152 | size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize) |
| 153 | { |
| 154 | BYTE *op = (BYTE *)dst; |
| 155 | U32 n; |
| 156 | |
| 157 | BYTE *bitsToWeight; |
| 158 | BYTE *huffWeight; |
| 159 | size_t spaceUsed32 = 0; |
| 160 | |
| 161 | bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32); |
| 162 | spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2; |
| 163 | huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); |
| 164 | spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2; |
| 165 | |
| 166 | if ((spaceUsed32 << 2) > workspaceSize) |
| 167 | return ERROR(tableLog_tooLarge); |
| 168 | workspace = (U32 *)workspace + spaceUsed32; |
| 169 | workspaceSize -= (spaceUsed32 << 2); |
| 170 | |
| 171 | /* check conditions */ |
| 172 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) |
| 173 | return ERROR(maxSymbolValue_tooLarge); |
| 174 | |
| 175 | /* convert to weight */ |
| 176 | bitsToWeight[0] = 0; |
| 177 | for (n = 1; n < huffLog + 1; n++) |
| 178 | bitsToWeight[n] = (BYTE)(huffLog + 1 - n); |
| 179 | for (n = 0; n < maxSymbolValue; n++) |
| 180 | huffWeight[n] = bitsToWeight[CTable[n].nbBits]; |
| 181 | |
| 182 | /* attempt weights compression by FSE */ |
| 183 | { |
| 184 | CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize)); |
| 185 | if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */ |
| 186 | op[0] = (BYTE)hSize; |
| 187 | return hSize + 1; |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | /* write raw values as 4-bits (max : 15) */ |
| 192 | if (maxSymbolValue > (256 - 128)) |
| 193 | return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ |
| 194 | if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize) |
| 195 | return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ |
| 196 | op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1)); |
| 197 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ |
| 198 | for (n = 0; n < maxSymbolValue; n += 2) |
| 199 | op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]); |
| 200 | return ((maxSymbolValue + 1) / 2) + 1; |
| 201 | } |
| 202 | |
| 203 | size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) |
| 204 | { |
| 205 | U32 *rankVal; |
| 206 | BYTE *huffWeight; |
| 207 | U32 tableLog = 0; |
| 208 | U32 nbSymbols = 0; |
| 209 | size_t readSize; |
| 210 | size_t spaceUsed32 = 0; |
| 211 | |
| 212 | rankVal = (U32 *)workspace + spaceUsed32; |
| 213 | spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; |
| 214 | huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); |
| 215 | spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; |
| 216 | |
| 217 | if ((spaceUsed32 << 2) > workspaceSize) |
| 218 | return ERROR(tableLog_tooLarge); |
| 219 | workspace = (U32 *)workspace + spaceUsed32; |
| 220 | workspaceSize -= (spaceUsed32 << 2); |
| 221 | |
| 222 | /* get symbol weights */ |
| 223 | readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); |
| 224 | if (ERR_isError(readSize)) |
| 225 | return readSize; |
| 226 | |
| 227 | /* check result */ |
| 228 | if (tableLog > HUF_TABLELOG_MAX) |
| 229 | return ERROR(tableLog_tooLarge); |
| 230 | if (nbSymbols > maxSymbolValue + 1) |
| 231 | return ERROR(maxSymbolValue_tooSmall); |
| 232 | |
| 233 | /* Prepare base value per rank */ |
| 234 | { |
| 235 | U32 n, nextRankStart = 0; |
| 236 | for (n = 1; n <= tableLog; n++) { |
| 237 | U32 curr = nextRankStart; |
| 238 | nextRankStart += (rankVal[n] << (n - 1)); |
| 239 | rankVal[n] = curr; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | /* fill nbBits */ |
| 244 | { |
| 245 | U32 n; |
| 246 | for (n = 0; n < nbSymbols; n++) { |
| 247 | const U32 w = huffWeight[n]; |
| 248 | CTable[n].nbBits = (BYTE)(tableLog + 1 - w); |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | /* fill val */ |
| 253 | { |
| 254 | U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */ |
| 255 | U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0}; |
| 256 | { |
| 257 | U32 n; |
| 258 | for (n = 0; n < nbSymbols; n++) |
| 259 | nbPerRank[CTable[n].nbBits]++; |
| 260 | } |
| 261 | /* determine stating value per rank */ |
| 262 | valPerRank[tableLog + 1] = 0; /* for w==0 */ |
| 263 | { |
| 264 | U16 min = 0; |
| 265 | U32 n; |
| 266 | for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */ |
| 267 | valPerRank[n] = min; /* get starting value within each rank */ |
| 268 | min += nbPerRank[n]; |
| 269 | min >>= 1; |
| 270 | } |
| 271 | } |
| 272 | /* assign value within rank, symbol order */ |
| 273 | { |
| 274 | U32 n; |
| 275 | for (n = 0; n <= maxSymbolValue; n++) |
| 276 | CTable[n].val = valPerRank[CTable[n].nbBits]++; |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | return readSize; |
| 281 | } |
| 282 | |
| 283 | typedef struct nodeElt_s { |
| 284 | U32 count; |
| 285 | U16 parent; |
| 286 | BYTE byte; |
| 287 | BYTE nbBits; |
| 288 | } nodeElt; |
| 289 | |
| 290 | static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits) |
| 291 | { |
| 292 | const U32 largestBits = huffNode[lastNonNull].nbBits; |
| 293 | if (largestBits <= maxNbBits) |
| 294 | return largestBits; /* early exit : no elt > maxNbBits */ |
| 295 | |
| 296 | /* there are several too large elements (at least >= 2) */ |
| 297 | { |
| 298 | int totalCost = 0; |
| 299 | const U32 baseCost = 1 << (largestBits - maxNbBits); |
| 300 | U32 n = lastNonNull; |
| 301 | |
| 302 | while (huffNode[n].nbBits > maxNbBits) { |
| 303 | totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); |
| 304 | huffNode[n].nbBits = (BYTE)maxNbBits; |
| 305 | n--; |
| 306 | } /* n stops at huffNode[n].nbBits <= maxNbBits */ |
| 307 | while (huffNode[n].nbBits == maxNbBits) |
| 308 | n--; /* n end at index of smallest symbol using < maxNbBits */ |
| 309 | |
| 310 | /* renorm totalCost */ |
| 311 | totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ |
| 312 | |
| 313 | /* repay normalized cost */ |
| 314 | { |
| 315 | U32 const noSymbol = 0xF0F0F0F0; |
| 316 | U32 rankLast[HUF_TABLELOG_MAX + 2]; |
| 317 | int pos; |
| 318 | |
| 319 | /* Get pos of last (smallest) symbol per rank */ |
| 320 | memset(rankLast, 0xF0, sizeof(rankLast)); |
| 321 | { |
| 322 | U32 currNbBits = maxNbBits; |
| 323 | for (pos = n; pos >= 0; pos--) { |
| 324 | if (huffNode[pos].nbBits >= currNbBits) |
| 325 | continue; |
| 326 | currNbBits = huffNode[pos].nbBits; /* < maxNbBits */ |
| 327 | rankLast[maxNbBits - currNbBits] = pos; |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | while (totalCost > 0) { |
| 332 | U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; |
| 333 | for (; nBitsToDecrease > 1; nBitsToDecrease--) { |
| 334 | U32 highPos = rankLast[nBitsToDecrease]; |
| 335 | U32 lowPos = rankLast[nBitsToDecrease - 1]; |
| 336 | if (highPos == noSymbol) |
| 337 | continue; |
| 338 | if (lowPos == noSymbol) |
| 339 | break; |
| 340 | { |
| 341 | U32 const highTotal = huffNode[highPos].count; |
| 342 | U32 const lowTotal = 2 * huffNode[lowPos].count; |
| 343 | if (highTotal <= lowTotal) |
| 344 | break; |
| 345 | } |
| 346 | } |
| 347 | /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ |
| 348 | /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ |
| 349 | while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) |
| 350 | nBitsToDecrease++; |
| 351 | totalCost -= 1 << (nBitsToDecrease - 1); |
| 352 | if (rankLast[nBitsToDecrease - 1] == noSymbol) |
| 353 | rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ |
| 354 | huffNode[rankLast[nBitsToDecrease]].nbBits++; |
| 355 | if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ |
| 356 | rankLast[nBitsToDecrease] = noSymbol; |
| 357 | else { |
| 358 | rankLast[nBitsToDecrease]--; |
| 359 | if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease) |
| 360 | rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ |
| 361 | } |
| 362 | } /* while (totalCost > 0) */ |
| 363 | |
| 364 | while (totalCost < 0) { /* Sometimes, cost correction overshoot */ |
| 365 | if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 |
| 366 | (using maxNbBits) */ |
| 367 | while (huffNode[n].nbBits == maxNbBits) |
| 368 | n--; |
| 369 | huffNode[n + 1].nbBits--; |
| 370 | rankLast[1] = n + 1; |
| 371 | totalCost++; |
| 372 | continue; |
| 373 | } |
| 374 | huffNode[rankLast[1] + 1].nbBits--; |
| 375 | rankLast[1]++; |
| 376 | totalCost++; |
| 377 | } |
| 378 | } |
| 379 | } /* there are several too large elements (at least >= 2) */ |
| 380 | |
| 381 | return maxNbBits; |
| 382 | } |
| 383 | |
| 384 | typedef struct { |
| 385 | U32 base; |
| 386 | U32 curr; |
| 387 | } rankPos; |
| 388 | |
| 389 | static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue) |
| 390 | { |
| 391 | rankPos rank[32]; |
| 392 | U32 n; |
| 393 | |
| 394 | memset(rank, 0, sizeof(rank)); |
| 395 | for (n = 0; n <= maxSymbolValue; n++) { |
| 396 | U32 r = BIT_highbit32(count[n] + 1); |
| 397 | rank[r].base++; |
| 398 | } |
| 399 | for (n = 30; n > 0; n--) |
| 400 | rank[n - 1].base += rank[n].base; |
| 401 | for (n = 0; n < 32; n++) |
| 402 | rank[n].curr = rank[n].base; |
| 403 | for (n = 0; n <= maxSymbolValue; n++) { |
| 404 | U32 const c = count[n]; |
| 405 | U32 const r = BIT_highbit32(c + 1) + 1; |
| 406 | U32 pos = rank[r].curr++; |
| 407 | while ((pos > rank[r].base) && (c > huffNode[pos - 1].count)) |
| 408 | huffNode[pos] = huffNode[pos - 1], pos--; |
| 409 | huffNode[pos].count = c; |
| 410 | huffNode[pos].byte = (BYTE)n; |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | /** HUF_buildCTable_wksp() : |
| 415 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. |
| 416 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. |
| 417 | */ |
| 418 | #define STARTNODE (HUF_SYMBOLVALUE_MAX + 1) |
| 419 | typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1]; |
| 420 | size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize) |
| 421 | { |
| 422 | nodeElt *const huffNode0 = (nodeElt *)workSpace; |
| 423 | nodeElt *const huffNode = huffNode0 + 1; |
| 424 | U32 n, nonNullRank; |
| 425 | int lowS, lowN; |
| 426 | U16 nodeNb = STARTNODE; |
| 427 | U32 nodeRoot; |
| 428 | |
| 429 | /* safety checks */ |
| 430 | if (wkspSize < sizeof(huffNodeTable)) |
| 431 | return ERROR(GENERIC); /* workSpace is not large enough */ |
| 432 | if (maxNbBits == 0) |
| 433 | maxNbBits = HUF_TABLELOG_DEFAULT; |
| 434 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) |
| 435 | return ERROR(GENERIC); |
| 436 | memset(huffNode0, 0, sizeof(huffNodeTable)); |
| 437 | |
| 438 | /* sort, decreasing order */ |
| 439 | HUF_sort(huffNode, count, maxSymbolValue); |
| 440 | |
| 441 | /* init for parents */ |
| 442 | nonNullRank = maxSymbolValue; |
| 443 | while (huffNode[nonNullRank].count == 0) |
| 444 | nonNullRank--; |
| 445 | lowS = nonNullRank; |
| 446 | nodeRoot = nodeNb + lowS - 1; |
| 447 | lowN = nodeNb; |
| 448 | huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count; |
| 449 | huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb; |
| 450 | nodeNb++; |
| 451 | lowS -= 2; |
| 452 | for (n = nodeNb; n <= nodeRoot; n++) |
| 453 | huffNode[n].count = (U32)(1U << 30); |
| 454 | huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */ |
| 455 | |
| 456 | /* create parents */ |
| 457 | while (nodeNb <= nodeRoot) { |
| 458 | U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
| 459 | U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
| 460 | huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; |
| 461 | huffNode[n1].parent = huffNode[n2].parent = nodeNb; |
| 462 | nodeNb++; |
| 463 | } |
| 464 | |
| 465 | /* distribute weights (unlimited tree height) */ |
| 466 | huffNode[nodeRoot].nbBits = 0; |
| 467 | for (n = nodeRoot - 1; n >= STARTNODE; n--) |
| 468 | huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1; |
| 469 | for (n = 0; n <= nonNullRank; n++) |
| 470 | huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1; |
| 471 | |
| 472 | /* enforce maxTableLog */ |
| 473 | maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); |
| 474 | |
| 475 | /* fill result into tree (val, nbBits) */ |
| 476 | { |
| 477 | U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0}; |
| 478 | U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0}; |
| 479 | if (maxNbBits > HUF_TABLELOG_MAX) |
| 480 | return ERROR(GENERIC); /* check fit into table */ |
| 481 | for (n = 0; n <= nonNullRank; n++) |
| 482 | nbPerRank[huffNode[n].nbBits]++; |
| 483 | /* determine stating value per rank */ |
| 484 | { |
| 485 | U16 min = 0; |
| 486 | for (n = maxNbBits; n > 0; n--) { |
| 487 | valPerRank[n] = min; /* get starting value within each rank */ |
| 488 | min += nbPerRank[n]; |
| 489 | min >>= 1; |
| 490 | } |
| 491 | } |
| 492 | for (n = 0; n <= maxSymbolValue; n++) |
| 493 | tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ |
| 494 | for (n = 0; n <= maxSymbolValue; n++) |
| 495 | tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ |
| 496 | } |
| 497 | |
| 498 | return maxNbBits; |
| 499 | } |
| 500 | |
| 501 | static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue) |
| 502 | { |
| 503 | size_t nbBits = 0; |
| 504 | int s; |
| 505 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
| 506 | nbBits += CTable[s].nbBits * count[s]; |
| 507 | } |
| 508 | return nbBits >> 3; |
| 509 | } |
| 510 | |
| 511 | static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue) |
| 512 | { |
| 513 | int bad = 0; |
| 514 | int s; |
| 515 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
| 516 | bad |= (count[s] != 0) & (CTable[s].nbBits == 0); |
| 517 | } |
| 518 | return !bad; |
| 519 | } |
| 520 | |
| 521 | static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable) |
| 522 | { |
| 523 | BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); |
| 524 | } |
| 525 | |
| 526 | size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } |
| 527 | |
| 528 | #define HUF_FLUSHBITS(s) BIT_flushBits(s) |
| 529 | |
| 530 | #define HUF_FLUSHBITS_1(stream) \ |
| 531 | if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \ |
| 532 | HUF_FLUSHBITS(stream) |
| 533 | |
| 534 | #define HUF_FLUSHBITS_2(stream) \ |
| 535 | if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \ |
| 536 | HUF_FLUSHBITS(stream) |
| 537 | |
| 538 | size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable) |
| 539 | { |
| 540 | const BYTE *ip = (const BYTE *)src; |
| 541 | BYTE *const ostart = (BYTE *)dst; |
| 542 | BYTE *const oend = ostart + dstSize; |
| 543 | BYTE *op = ostart; |
| 544 | size_t n; |
| 545 | BIT_CStream_t bitC; |
| 546 | |
| 547 | /* init */ |
| 548 | if (dstSize < 8) |
| 549 | return 0; /* not enough space to compress */ |
| 550 | { |
| 551 | size_t const initErr = BIT_initCStream(&bitC, op, oend - op); |
| 552 | if (HUF_isError(initErr)) |
| 553 | return 0; |
| 554 | } |
| 555 | |
| 556 | n = srcSize & ~3; /* join to mod 4 */ |
| 557 | switch (srcSize & 3) { |
| 558 | case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC); |
Gustavo A. R. Silva | 224b44d | 2019-01-24 21:37:43 -0600 | [diff] [blame] | 559 | /* fall through */ |
Nick Terrell | 73f3d1b | 2017-08-09 19:35:53 -0700 | [diff] [blame] | 560 | case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC); |
Gustavo A. R. Silva | 224b44d | 2019-01-24 21:37:43 -0600 | [diff] [blame] | 561 | /* fall through */ |
Nick Terrell | 73f3d1b | 2017-08-09 19:35:53 -0700 | [diff] [blame] | 562 | case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC); |
| 563 | case 0: |
| 564 | default:; |
| 565 | } |
| 566 | |
| 567 | for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */ |
| 568 | HUF_encodeSymbol(&bitC, ip[n - 1], CTable); |
| 569 | HUF_FLUSHBITS_1(&bitC); |
| 570 | HUF_encodeSymbol(&bitC, ip[n - 2], CTable); |
| 571 | HUF_FLUSHBITS_2(&bitC); |
| 572 | HUF_encodeSymbol(&bitC, ip[n - 3], CTable); |
| 573 | HUF_FLUSHBITS_1(&bitC); |
| 574 | HUF_encodeSymbol(&bitC, ip[n - 4], CTable); |
| 575 | HUF_FLUSHBITS(&bitC); |
| 576 | } |
| 577 | |
| 578 | return BIT_closeCStream(&bitC); |
| 579 | } |
| 580 | |
| 581 | size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable) |
| 582 | { |
| 583 | size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */ |
| 584 | const BYTE *ip = (const BYTE *)src; |
| 585 | const BYTE *const iend = ip + srcSize; |
| 586 | BYTE *const ostart = (BYTE *)dst; |
| 587 | BYTE *const oend = ostart + dstSize; |
| 588 | BYTE *op = ostart; |
| 589 | |
| 590 | if (dstSize < 6 + 1 + 1 + 1 + 8) |
| 591 | return 0; /* minimum space to compress successfully */ |
| 592 | if (srcSize < 12) |
| 593 | return 0; /* no saving possible : too small input */ |
| 594 | op += 6; /* jumpTable */ |
| 595 | |
| 596 | { |
| 597 | CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); |
| 598 | if (cSize == 0) |
| 599 | return 0; |
| 600 | ZSTD_writeLE16(ostart, (U16)cSize); |
| 601 | op += cSize; |
| 602 | } |
| 603 | |
| 604 | ip += segmentSize; |
| 605 | { |
| 606 | CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); |
| 607 | if (cSize == 0) |
| 608 | return 0; |
| 609 | ZSTD_writeLE16(ostart + 2, (U16)cSize); |
| 610 | op += cSize; |
| 611 | } |
| 612 | |
| 613 | ip += segmentSize; |
| 614 | { |
| 615 | CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); |
| 616 | if (cSize == 0) |
| 617 | return 0; |
| 618 | ZSTD_writeLE16(ostart + 4, (U16)cSize); |
| 619 | op += cSize; |
| 620 | } |
| 621 | |
| 622 | ip += segmentSize; |
| 623 | { |
| 624 | CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable)); |
| 625 | if (cSize == 0) |
| 626 | return 0; |
| 627 | op += cSize; |
| 628 | } |
| 629 | |
| 630 | return op - ostart; |
| 631 | } |
| 632 | |
| 633 | static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream, |
| 634 | const HUF_CElt *CTable) |
| 635 | { |
| 636 | size_t const cSize = |
| 637 | singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); |
| 638 | if (HUF_isError(cSize)) { |
| 639 | return cSize; |
| 640 | } |
| 641 | if (cSize == 0) { |
| 642 | return 0; |
| 643 | } /* uncompressible */ |
| 644 | op += cSize; |
| 645 | /* check compressibility */ |
| 646 | if ((size_t)(op - ostart) >= srcSize - 1) { |
| 647 | return 0; |
| 648 | } |
| 649 | return op - ostart; |
| 650 | } |
| 651 | |
| 652 | /* `workSpace` must a table of at least 1024 unsigned */ |
| 653 | static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, |
| 654 | unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat) |
| 655 | { |
| 656 | BYTE *const ostart = (BYTE *)dst; |
| 657 | BYTE *const oend = ostart + dstSize; |
| 658 | BYTE *op = ostart; |
| 659 | |
| 660 | U32 *count; |
| 661 | size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1); |
| 662 | HUF_CElt *CTable; |
| 663 | size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1); |
| 664 | |
| 665 | /* checks & inits */ |
| 666 | if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) |
| 667 | return ERROR(GENERIC); |
| 668 | if (!srcSize) |
| 669 | return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ |
| 670 | if (!dstSize) |
| 671 | return 0; /* cannot fit within dst budget */ |
| 672 | if (srcSize > HUF_BLOCKSIZE_MAX) |
| 673 | return ERROR(srcSize_wrong); /* curr block size limit */ |
| 674 | if (huffLog > HUF_TABLELOG_MAX) |
| 675 | return ERROR(tableLog_tooLarge); |
| 676 | if (!maxSymbolValue) |
| 677 | maxSymbolValue = HUF_SYMBOLVALUE_MAX; |
| 678 | if (!huffLog) |
| 679 | huffLog = HUF_TABLELOG_DEFAULT; |
| 680 | |
| 681 | count = (U32 *)workSpace; |
| 682 | workSpace = (BYTE *)workSpace + countSize; |
| 683 | wkspSize -= countSize; |
| 684 | CTable = (HUF_CElt *)workSpace; |
| 685 | workSpace = (BYTE *)workSpace + CTableSize; |
| 686 | wkspSize -= CTableSize; |
| 687 | |
| 688 | /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */ |
| 689 | if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { |
| 690 | return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); |
| 691 | } |
| 692 | |
| 693 | /* Scan input and build symbol stats */ |
| 694 | { |
| 695 | CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace)); |
| 696 | if (largest == srcSize) { |
| 697 | *ostart = ((const BYTE *)src)[0]; |
| 698 | return 1; |
| 699 | } /* single symbol, rle */ |
| 700 | if (largest <= (srcSize >> 7) + 1) |
| 701 | return 0; /* Fast heuristic : not compressible enough */ |
| 702 | } |
| 703 | |
| 704 | /* Check validity of previous table */ |
| 705 | if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) { |
| 706 | *repeat = HUF_repeat_none; |
| 707 | } |
| 708 | /* Heuristic : use existing table for small inputs */ |
| 709 | if (preferRepeat && repeat && *repeat != HUF_repeat_none) { |
| 710 | return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); |
| 711 | } |
| 712 | |
| 713 | /* Build Huffman Tree */ |
| 714 | huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); |
| 715 | { |
| 716 | CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize)); |
| 717 | huffLog = (U32)maxBits; |
| 718 | /* Zero the unused symbols so we can check it for validity */ |
| 719 | memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt)); |
| 720 | } |
| 721 | |
| 722 | /* Write table description header */ |
| 723 | { |
| 724 | CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize)); |
| 725 | /* Check if using the previous table will be beneficial */ |
| 726 | if (repeat && *repeat != HUF_repeat_none) { |
| 727 | size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue); |
| 728 | size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue); |
| 729 | if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { |
| 730 | return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); |
| 731 | } |
| 732 | } |
| 733 | /* Use the new table */ |
| 734 | if (hSize + 12ul >= srcSize) { |
| 735 | return 0; |
| 736 | } |
| 737 | op += hSize; |
| 738 | if (repeat) { |
| 739 | *repeat = HUF_repeat_none; |
| 740 | } |
| 741 | if (oldHufTable) { |
| 742 | memcpy(oldHufTable, CTable, CTableSize); |
| 743 | } /* Save the new table */ |
| 744 | } |
| 745 | return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable); |
| 746 | } |
| 747 | |
| 748 | size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, |
| 749 | size_t wkspSize) |
| 750 | { |
| 751 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0); |
| 752 | } |
| 753 | |
| 754 | size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, |
| 755 | size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat) |
| 756 | { |
| 757 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, |
| 758 | preferRepeat); |
| 759 | } |
| 760 | |
| 761 | size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, |
| 762 | size_t wkspSize) |
| 763 | { |
| 764 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0); |
| 765 | } |
| 766 | |
| 767 | size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, |
| 768 | size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat) |
| 769 | { |
| 770 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, |
| 771 | preferRepeat); |
| 772 | } |