blob: 2b0a643c32c4b2bf24e56a0325d955a59b4f5cdd [file] [log] [blame]
Nick Terrell73f3d1b2017-08-09 19:35:53 -07001/*
2 * Common functions of New Generation Entropy library
3 * Copyright (C) 2016, Yann Collet.
4 *
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * This program is free software; you can redistribute it and/or modify it under
31 * the terms of the GNU General Public License version 2 as published by the
32 * Free Software Foundation. This program is dual-licensed; you may select
33 * either version 2 of the GNU General Public License ("GPL") or BSD license
34 * ("BSD").
35 *
36 * You can contact the author at :
37 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
38 */
39
40/* *************************************
41* Dependencies
42***************************************/
43#include "error_private.h" /* ERR_*, ERROR */
44#include "fse.h"
45#include "huf.h"
46#include "mem.h"
47
48/*=== Version ===*/
49unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
50
51/*=== Error Management ===*/
52unsigned FSE_isError(size_t code) { return ERR_isError(code); }
53
54unsigned HUF_isError(size_t code) { return ERR_isError(code); }
55
56/*-**************************************************************
57* FSE NCount encoding-decoding
58****************************************************************/
59size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize)
60{
61 const BYTE *const istart = (const BYTE *)headerBuffer;
62 const BYTE *const iend = istart + hbSize;
63 const BYTE *ip = istart;
64 int nbBits;
65 int remaining;
66 int threshold;
67 U32 bitStream;
68 int bitCount;
69 unsigned charnum = 0;
70 int previous0 = 0;
71
72 if (hbSize < 4)
73 return ERROR(srcSize_wrong);
74 bitStream = ZSTD_readLE32(ip);
75 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
76 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX)
77 return ERROR(tableLog_tooLarge);
78 bitStream >>= 4;
79 bitCount = 4;
80 *tableLogPtr = nbBits;
81 remaining = (1 << nbBits) + 1;
82 threshold = 1 << nbBits;
83 nbBits++;
84
85 while ((remaining > 1) & (charnum <= *maxSVPtr)) {
86 if (previous0) {
87 unsigned n0 = charnum;
88 while ((bitStream & 0xFFFF) == 0xFFFF) {
89 n0 += 24;
90 if (ip < iend - 5) {
91 ip += 2;
92 bitStream = ZSTD_readLE32(ip) >> bitCount;
93 } else {
94 bitStream >>= 16;
95 bitCount += 16;
96 }
97 }
98 while ((bitStream & 3) == 3) {
99 n0 += 3;
100 bitStream >>= 2;
101 bitCount += 2;
102 }
103 n0 += bitStream & 3;
104 bitCount += 2;
105 if (n0 > *maxSVPtr)
106 return ERROR(maxSymbolValue_tooSmall);
107 while (charnum < n0)
108 normalizedCounter[charnum++] = 0;
109 if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
110 ip += bitCount >> 3;
111 bitCount &= 7;
112 bitStream = ZSTD_readLE32(ip) >> bitCount;
113 } else {
114 bitStream >>= 2;
115 }
116 }
117 {
118 int const max = (2 * threshold - 1) - remaining;
119 int count;
120
121 if ((bitStream & (threshold - 1)) < (U32)max) {
122 count = bitStream & (threshold - 1);
123 bitCount += nbBits - 1;
124 } else {
125 count = bitStream & (2 * threshold - 1);
126 if (count >= threshold)
127 count -= max;
128 bitCount += nbBits;
129 }
130
131 count--; /* extra accuracy */
132 remaining -= count < 0 ? -count : count; /* -1 means +1 */
133 normalizedCounter[charnum++] = (short)count;
134 previous0 = !count;
135 while (remaining < threshold) {
136 nbBits--;
137 threshold >>= 1;
138 }
139
140 if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
141 ip += bitCount >> 3;
142 bitCount &= 7;
143 } else {
144 bitCount -= (int)(8 * (iend - 4 - ip));
145 ip = iend - 4;
146 }
147 bitStream = ZSTD_readLE32(ip) >> (bitCount & 31);
148 }
149 } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
150 if (remaining != 1)
151 return ERROR(corruption_detected);
152 if (bitCount > 32)
153 return ERROR(corruption_detected);
154 *maxSVPtr = charnum - 1;
155
156 ip += (bitCount + 7) >> 3;
157 return ip - istart;
158}
159
160/*! HUF_readStats() :
161 Read compact Huffman tree, saved by HUF_writeCTable().
162 `huffWeight` is destination buffer.
163 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
164 @return : size read from `src` , or an error Code .
165 Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
166*/
167size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
168{
169 U32 weightTotal;
170 const BYTE *ip = (const BYTE *)src;
171 size_t iSize;
172 size_t oSize;
173
174 if (!srcSize)
175 return ERROR(srcSize_wrong);
176 iSize = ip[0];
177 /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */
178
179 if (iSize >= 128) { /* special header */
180 oSize = iSize - 127;
181 iSize = ((oSize + 1) / 2);
182 if (iSize + 1 > srcSize)
183 return ERROR(srcSize_wrong);
184 if (oSize >= hwSize)
185 return ERROR(corruption_detected);
186 ip += 1;
187 {
188 U32 n;
189 for (n = 0; n < oSize; n += 2) {
190 huffWeight[n] = ip[n / 2] >> 4;
191 huffWeight[n + 1] = ip[n / 2] & 15;
192 }
193 }
194 } else { /* header compressed with FSE (normal case) */
195 if (iSize + 1 > srcSize)
196 return ERROR(srcSize_wrong);
197 oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */
198 if (FSE_isError(oSize))
199 return oSize;
200 }
201
202 /* collect weight stats */
203 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
204 weightTotal = 0;
205 {
206 U32 n;
207 for (n = 0; n < oSize; n++) {
208 if (huffWeight[n] >= HUF_TABLELOG_MAX)
209 return ERROR(corruption_detected);
210 rankStats[huffWeight[n]]++;
211 weightTotal += (1 << huffWeight[n]) >> 1;
212 }
213 }
214 if (weightTotal == 0)
215 return ERROR(corruption_detected);
216
217 /* get last non-null symbol weight (implied, total must be 2^n) */
218 {
219 U32 const tableLog = BIT_highbit32(weightTotal) + 1;
220 if (tableLog > HUF_TABLELOG_MAX)
221 return ERROR(corruption_detected);
222 *tableLogPtr = tableLog;
223 /* determine last weight */
224 {
225 U32 const total = 1 << tableLog;
226 U32 const rest = total - weightTotal;
227 U32 const verif = 1 << BIT_highbit32(rest);
228 U32 const lastWeight = BIT_highbit32(rest) + 1;
229 if (verif != rest)
230 return ERROR(corruption_detected); /* last value must be a clean power of 2 */
231 huffWeight[oSize] = (BYTE)lastWeight;
232 rankStats[lastWeight]++;
233 }
234 }
235
236 /* check tree construction validity */
237 if ((rankStats[1] < 2) || (rankStats[1] & 1))
238 return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
239
240 /* results */
241 *nbSymbolsPtr = (U32)(oSize + 1);
242 return iSize + 1;
243}