Blame - lib/decompress_bunzip2.c - SHIFTPHONES/mainline/linux

blob: 7c4932eed748165b758d1f99b19a8d56cb25540f [file] [log] [blame]

Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	1	/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
				2
				3	Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
				4	which also acknowledges contributions by Mike Burrows, David Wheeler,
				5	Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
				6	Robert Sedgewick, and Jon L. Bentley.
				7
				8	This code is licensed under the LGPLv2:
				9	LGPL (http://www.gnu.org/copyleft/lgpl.html
				10	*/
				11
				12	/*
				13	Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
				14
				15	More efficient reading of Huffman codes, a streamlined read_bunzip()
				16	function, and various other tweaks. In (limited) tests, approximately
				17	20% faster than bzcat on x86 and about 10% faster on arm.
				18
				19	Note that about 2/3 of the time is spent in read_unzip() reversing
				20	the Burrows-Wheeler transformation. Much of that time is delay
				21	resulting from cache misses.
				22
				23	I would ask that anyone benefiting from this work, especially those
				24	using it in commercial products, consider making a donation to my local
				25	non-profit hospice organization in the name of the woman I loved, who
				26	passed away Feb. 12, 2003.
				27
				28	In memory of Toni W. Hagan
				29
				30	Hospice of Acadiana, Inc.
				31	2600 Johnston St., Suite 200
				32	Lafayette, LA 70503-3240
				33
				34	Phone (337) 232-1234 or 1-800-738-2226
				35	Fax (337) 232-1297
				36
				37	http://www.hospiceacadiana.com/
				38
				39	Manuel
				40	*/
				41
				42	/*
				43	Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
				44	*/
				45
				46
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	47	#ifdef STATIC
				48	#define PREBOOT
				49	#else
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	50	#include <linux/decompress/bunzip2.h>
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	51	#endif /* STATIC */
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	52
				53	#include <linux/decompress/mm.h>
Krzysztof Kozlowski	faa16bc	2018-07-17 18:05:41 +0200	[diff] [blame]	54	#include <linux/crc32poly.h>
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	55
				56	#ifndef INT_MAX
				57	#define INT_MAX 0x7fffffff
				58	#endif
				59
				60	/* Constants for Huffman coding */
				61	#define MAX_GROUPS 6
				62	#define GROUP_SIZE 50 /* 64 would have been more efficient */
				63	#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
				64	#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
				65	#define SYMBOL_RUNA 0
				66	#define SYMBOL_RUNB 1
				67
				68	/* Status return values */
				69	#define RETVAL_OK 0
				70	#define RETVAL_LAST_BLOCK (-1)
				71	#define RETVAL_NOT_BZIP_DATA (-2)
				72	#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
				73	#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
				74	#define RETVAL_DATA_ERROR (-5)
				75	#define RETVAL_OUT_OF_MEMORY (-6)
				76	#define RETVAL_OBSOLETE_INPUT (-7)
				77
				78	/* Other housekeeping constants */
				79	#define BZIP2_IOBUF_SIZE 4096
				80
				81	/* This is what we know about each Huffman coding group */
				82	struct group_data {
				83	/* We have an extra slot at the end of limit[] for a sentinal value. */
				84	int limit[MAX_HUFCODE_BITS+1];
				85	int base[MAX_HUFCODE_BITS];
				86	int permute[MAX_SYMBOLS];
				87	int minLen, maxLen;
				88	};
				89
				90	/* Structure holding all the housekeeping data, including IO buffers and
				91	memory that persists between calls to bunzip */
				92	struct bunzip_data {
				93	/* State for interrupting output loop */
				94	int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
				95	/* I/O tracking data (file handles, buffers, positions, etc.) */
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	96	long (fill)(void, unsigned long);
				97	long inbufCount, inbufPos /, outbufPos/;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	98	unsigned char inbuf /,outbuf/;
				99	unsigned int inbufBitCount, inbufBits;
				100	/* The CRC values stored in the block header and calculated from the
				101	data */
				102	unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
				103	/* Intermediate buffer and its size (in bytes) */
				104	unsigned int *dbuf, dbufSize;
				105	/* These things are a bit too big to go on the stack */
				106	unsigned char selectors[32768]; /* nSelectors = 15 bits */
				107	struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
				108	int io_error; /* non-zero if we have IO error */
Prarit Bhargava	dd21e9b	2010-08-10 18:03:40 -0700	[diff] [blame]	109	int byteCount[256];
				110	unsigned char symToByte[256], mtfSymbol[256];
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	111	};
				112
				113
				114	/* Return the next nnn bits of input. All reads from the compressed input
				115	are done through this function. All reads are big endian */
				116	static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
				117	{
				118	unsigned int bits = 0;
				119
				120	/* If we need to get more data from the byte buffer, do so.
				121	(Loop getting one byte at a time to enforce endianness and avoid
				122	unaligned access.) */
				123	while (bd->inbufBitCount < bits_wanted) {
				124	/* If we need to read more data from file into byte buffer, do
				125	so */
				126	if (bd->inbufPos == bd->inbufCount) {
				127	if (bd->io_error)
				128	return 0;
				129	bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
				130	if (bd->inbufCount <= 0) {
				131	bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
				132	return 0;
				133	}
				134	bd->inbufPos = 0;
				135	}
				136	/* Avoid 32-bit overflow (dump bit buffer to top of output) */
				137	if (bd->inbufBitCount >= 24) {
				138	bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
				139	bits_wanted -= bd->inbufBitCount;
				140	bits <<= bits_wanted;
				141	bd->inbufBitCount = 0;
				142	}
				143	/* Grab next 8 bits of input from buffer. */
				144	bd->inbufBits = (bd->inbufBits << 8)\|bd->inbuf[bd->inbufPos++];
				145	bd->inbufBitCount += 8;
				146	}
				147	/* Calculate result */
				148	bd->inbufBitCount -= bits_wanted;
				149	bits \|= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
				150
				151	return bits;
				152	}
				153
				154	/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
				155
				156	static int INIT get_next_block(struct bunzip_data *bd)
				157	{
				158	struct group_data *hufGroup = NULL;
				159	int *base = NULL;
				160	int *limit = NULL;
				161	int dbufCount, nextSym, dbufSize, groupCount, selector,
Prarit Bhargava	dd21e9b	2010-08-10 18:03:40 -0700	[diff] [blame]	162	i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
				163	unsigned char uc, symToByte, mtfSymbol, *selectors;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	164	unsigned int *dbuf, origPtr;
				165
				166	dbuf = bd->dbuf;
				167	dbufSize = bd->dbufSize;
				168	selectors = bd->selectors;
Prarit Bhargava	dd21e9b	2010-08-10 18:03:40 -0700	[diff] [blame]	169	byteCount = bd->byteCount;
				170	symToByte = bd->symToByte;
				171	mtfSymbol = bd->mtfSymbol;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	172
				173	/* Read in header signature and CRC, then validate signature.
				174	(last block signature means CRC is for whole file, return now) */
				175	i = get_bits(bd, 24);
				176	j = get_bits(bd, 24);
				177	bd->headerCRC = get_bits(bd, 32);
				178	if ((i == 0x177245) && (j == 0x385090))
				179	return RETVAL_LAST_BLOCK;
				180	if ((i != 0x314159) \|\| (j != 0x265359))
				181	return RETVAL_NOT_BZIP_DATA;
				182	/* We can add support for blockRandomised if anybody complains.
				183	There was some code for this in busybox 1.0.0-pre3, but nobody ever
				184	noticed that it didn't actually work. */
				185	if (get_bits(bd, 1))
				186	return RETVAL_OBSOLETE_INPUT;
				187	origPtr = get_bits(bd, 24);
Dan Carpenter	b5c8afe	2014-12-12 16:58:05 -0800	[diff] [blame]	188	if (origPtr >= dbufSize)
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	189	return RETVAL_DATA_ERROR;
				190	/* mapping table: if some byte values are never used (encoding things
				191	like ascii text), the compression code removes the gaps to have fewer
				192	symbols to deal with, and writes a sparse bitfield indicating which
				193	values were present. We make a translation table to convert the
				194	symbols back to the corresponding bytes. */
				195	t = get_bits(bd, 16);
				196	symTotal = 0;
				197	for (i = 0; i < 16; i++) {
				198	if (t&(1 << (15-i))) {
				199	k = get_bits(bd, 16);
				200	for (j = 0; j < 16; j++)
				201	if (k&(1 << (15-j)))
				202	symToByte[symTotal++] = (16*i)+j;
				203	}
				204	}
				205	/* How many different Huffman coding groups does this block use? */
				206	groupCount = get_bits(bd, 3);
				207	if (groupCount < 2 \|\| groupCount > MAX_GROUPS)
				208	return RETVAL_DATA_ERROR;
				209	/* nSelectors: Every GROUP_SIZE many symbols we select a new
				210	Huffman coding group. Read in the group selector list,
				211	which is stored as MTF encoded bit runs. (MTF = Move To
				212	Front, as each value is used it's moved to the start of the
				213	list.) */
				214	nSelectors = get_bits(bd, 15);
				215	if (!nSelectors)
				216	return RETVAL_DATA_ERROR;
				217	for (i = 0; i < groupCount; i++)
				218	mtfSymbol[i] = i;
				219	for (i = 0; i < nSelectors; i++) {
				220	/* Get next value */
				221	for (j = 0; get_bits(bd, 1); j++)
				222	if (j >= groupCount)
				223	return RETVAL_DATA_ERROR;
				224	/* Decode MTF to get the next selector */
				225	uc = mtfSymbol[j];
				226	for (; j; j--)
				227	mtfSymbol[j] = mtfSymbol[j-1];
				228	mtfSymbol[0] = selectors[i] = uc;
				229	}
				230	/* Read the Huffman coding tables for each group, which code
				231	for symTotal literal symbols, plus two run symbols (RUNA,
				232	RUNB) */
				233	symCount = symTotal+2;
				234	for (j = 0; j < groupCount; j++) {
				235	unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
				236	int minLen, maxLen, pp;
				237	/* Read Huffman code lengths for each symbol. They're
				238	stored in a way similar to mtf; record a starting
				239	value for the first symbol, and an offset from the
				240	previous value for everys symbol after that.
				241	(Subtracting 1 before the loop and then adding it
				242	back at the end is an optimization that makes the
				243	test inside the loop simpler: symbol length 0
				244	becomes negative, so an unsigned inequality catches
				245	it.) */
				246	t = get_bits(bd, 5)-1;
				247	for (i = 0; i < symCount; i++) {
				248	for (;;) {
				249	if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
				250	return RETVAL_DATA_ERROR;
				251
				252	/* If first bit is 0, stop. Else
				253	second bit indicates whether to
				254	increment or decrement the value.
				255	Optimization: grab 2 bits and unget
				256	the second if the first was 0. */
				257
				258	k = get_bits(bd, 2);
				259	if (k < 2) {
				260	bd->inbufBitCount++;
				261	break;
				262	}
				263	/* Add one if second bit 1, else
				264	* subtract 1. Avoids if/else */
				265	t += (((k+1)&2)-1);
				266	}
				267	/* Correct for the initial -1, to get the
				268	* final symbol length */
				269	length[i] = t+1;
				270	}
				271	/* Find largest and smallest lengths in this group */
				272	minLen = maxLen = length[0];
				273
				274	for (i = 1; i < symCount; i++) {
				275	if (length[i] > maxLen)
				276	maxLen = length[i];
				277	else if (length[i] < minLen)
				278	minLen = length[i];
				279	}
				280
				281	/* Calculate permute[], base[], and limit[] tables from
				282	* length[].
				283	*
				284	* permute[] is the lookup table for converting
				285	* Huffman coded symbols into decoded symbols. base[]
				286	* is the amount to subtract from the value of a
				287	* Huffman symbol of a given length when using
				288	* permute[].
				289	*
				290	* limit[] indicates the largest numerical value a
				291	* symbol with a given number of bits can have. This
				292	* is how the Huffman codes can vary in length: each
				293	* code with a value > limit[length] needs another
				294	* bit.
				295	*/
				296	hufGroup = bd->groups+j;
				297	hufGroup->minLen = minLen;
				298	hufGroup->maxLen = maxLen;
				299	/* Note that minLen can't be smaller than 1, so we
				300	adjust the base and limit array pointers so we're
				301	not always wasting the first entry. We do this
				302	again when using them (during symbol decoding).*/
				303	base = hufGroup->base-1;
				304	limit = hufGroup->limit-1;
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	305	/* Calculate permute[]. Concurrently, initialize
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	306	* temp[] and limit[]. */
				307	pp = 0;
				308	for (i = minLen; i <= maxLen; i++) {
				309	temp[i] = limit[i] = 0;
				310	for (t = 0; t < symCount; t++)
				311	if (length[t] == i)
				312	hufGroup->permute[pp++] = t;
				313	}
				314	/* Count symbols coded for at each bit length */
				315	for (i = 0; i < symCount; i++)
				316	temp[length[i]]++;
				317	/* Calculate limit[] (the largest symbol-coding value
				318	*at each bit length, which is (previous limit <<
				319	*1)+symbols at this level), and base[] (number of
				320	*symbols to ignore at each bit length, which is limit
				321	*minus the cumulative count of symbols coded for
				322	already). /
				323	pp = t = 0;
				324	for (i = minLen; i < maxLen; i++) {
				325	pp += temp[i];
				326	/* We read the largest possible symbol size
				327	and then unget bits after determining how
				328	many we need, and those extra bits could be
				329	set to anything. (They're noise from
				330	future symbols.) At each level we're
				331	really only interested in the first few
				332	bits, so here we set all the trailing
				333	to-be-ignored bits to 1 so they don't
				334	affect the value > limit[length]
				335	comparison. */
				336	limit[i] = (pp << (maxLen - i)) - 1;
				337	pp <<= 1;
				338	base[i+1] = pp-(t += temp[i]);
				339	}
				340	limit[maxLen+1] = INT_MAX; /* Sentinal value for
				341	* reading next sym. */
				342	limit[maxLen] = pp+temp[maxLen]-1;
				343	base[minLen] = 0;
				344	}
				345	/* We've finished reading and digesting the block header. Now
				346	read this block's Huffman coded symbols from the file and
				347	undo the Huffman coding and run length encoding, saving the
				348	result into dbuf[dbufCount++] = uc */
				349
				350	/* Initialize symbol occurrence counters and symbol Move To
				351	* Front table */
				352	for (i = 0; i < 256; i++) {
				353	byteCount[i] = 0;
				354	mtfSymbol[i] = (unsigned char)i;
				355	}
				356	/* Loop through compressed symbols. */
				357	runPos = dbufCount = symCount = selector = 0;
				358	for (;;) {
				359	/* Determine which Huffman coding group to use. */
				360	if (!(symCount--)) {
				361	symCount = GROUP_SIZE-1;
				362	if (selector >= nSelectors)
				363	return RETVAL_DATA_ERROR;
				364	hufGroup = bd->groups+selectors[selector++];
				365	base = hufGroup->base-1;
				366	limit = hufGroup->limit-1;
				367	}
				368	/* Read next Huffman-coded symbol. */
				369	/* Note: It is far cheaper to read maxLen bits and
				370	back up than it is to read minLen bits and then an
				371	additional bit at a time, testing as we go.
				372	Because there is a trailing last block (with file
				373	CRC), there is no danger of the overread causing an
				374	unexpected EOF for a valid compressed file. As a
				375	further optimization, we do the read inline
				376	(falling back to a call to get_bits if the buffer
				377	runs dry). The following (up to got_huff_bits:) is
				378	equivalent to j = get_bits(bd, hufGroup->maxLen);
				379	*/
				380	while (bd->inbufBitCount < hufGroup->maxLen) {
				381	if (bd->inbufPos == bd->inbufCount) {
				382	j = get_bits(bd, hufGroup->maxLen);
				383	goto got_huff_bits;
				384	}
				385	bd->inbufBits =
				386	(bd->inbufBits << 8)\|bd->inbuf[bd->inbufPos++];
				387	bd->inbufBitCount += 8;
				388	};
				389	bd->inbufBitCount -= hufGroup->maxLen;
				390	j = (bd->inbufBits >> bd->inbufBitCount)&
				391	((1 << hufGroup->maxLen)-1);
				392	got_huff_bits:
				393	/* Figure how how many bits are in next symbol and
				394	* unget extras */
				395	i = hufGroup->minLen;
				396	while (j > limit[i])
				397	++i;
				398	bd->inbufBitCount += (hufGroup->maxLen - i);
				399	/* Huffman decode value to get nextSym (with bounds checking) */
				400	if ((i > hufGroup->maxLen)
				401	\|\| (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
				402	>= MAX_SYMBOLS))
				403	return RETVAL_DATA_ERROR;
				404	nextSym = hufGroup->permute[j];
				405	/* We have now decoded the symbol, which indicates
				406	either a new literal byte, or a repeated run of the
				407	most recent literal byte. First, check if nextSym
				408	indicates a repeated run, and if so loop collecting
				409	how many times to repeat the last literal. */
				410	if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
				411	/* If this is the start of a new run, zero out
				412	* counter */
				413	if (!runPos) {
				414	runPos = 1;
				415	t = 0;
				416	}
				417	/* Neat trick that saves 1 symbol: instead of
				418	or-ing 0 or 1 at each bit position, add 1
				419	or 2 instead. For example, 1011 is 1 << 0
				420	+ 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1
				421	+ 1 << 2. You can make any bit pattern
				422	that way using 1 less symbol than the basic
				423	or 0/1 method (except all bits 0, which
				424	would use no symbols, but a run of length 0
				425	doesn't mean anything in this context).
				426	Thus space is saved. */
				427	t += (runPos << nextSym);
				428	/* +runPos if RUNA; +2runPos if RUNB /
				429
				430	runPos <<= 1;
				431	continue;
				432	}
				433	/* When we hit the first non-run symbol after a run,
				434	we now know how many times to repeat the last
				435	literal, so append that many copies to our buffer
				436	of decoded symbols (dbuf) now. (The last literal
				437	used is the one at the head of the mtfSymbol
				438	array.) */
				439	if (runPos) {
				440	runPos = 0;
				441	if (dbufCount+t >= dbufSize)
				442	return RETVAL_DATA_ERROR;
				443
				444	uc = symToByte[mtfSymbol[0]];
				445	byteCount[uc] += t;
				446	while (t--)
				447	dbuf[dbufCount++] = uc;
				448	}
				449	/* Is this the terminating symbol? */
				450	if (nextSym > symTotal)
				451	break;
				452	/* At this point, nextSym indicates a new literal
				453	character. Subtract one to get the position in the
				454	MTF array at which this literal is currently to be
				455	found. (Note that the result can't be -1 or 0,
				456	because 0 and 1 are RUNA and RUNB. But another
				457	instance of the first symbol in the mtf array,
				458	position 0, would have been handled as part of a
				459	run above. Therefore 1 unused mtf position minus 2
				460	non-literal nextSym values equals -1.) */
				461	if (dbufCount >= dbufSize)
				462	return RETVAL_DATA_ERROR;
				463	i = nextSym - 1;
				464	uc = mtfSymbol[i];
				465	/* Adjust the MTF array. Since we typically expect to
				466	*move only a small number of symbols, and are bound
				467	*by 256 in any case, using memmove here would
				468	*typically be bigger and slower due to function call
				469	overhead and other assorted setup costs. /
				470	do {
				471	mtfSymbol[i] = mtfSymbol[i-1];
				472	} while (--i);
				473	mtfSymbol[0] = uc;
				474	uc = symToByte[uc];
				475	/* We have our literal byte. Save it into dbuf. */
				476	byteCount[uc]++;
				477	dbuf[dbufCount++] = (unsigned int)uc;
				478	}
				479	/* At this point, we've read all the Huffman-coded symbols
				480	(and repeated runs) for this block from the input stream,
				481	and decoded them into the intermediate buffer. There are
				482	dbufCount many decoded bytes in dbuf[]. Now undo the
				483	Burrows-Wheeler transform on dbuf. See
				484	http://dogma.net/markn/articles/bwt/bwt.htm
				485	*/
				486	/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
				487	j = 0;
				488	for (i = 0; i < 256; i++) {
				489	k = j+byteCount[i];
				490	byteCount[i] = j;
				491	j = k;
				492	}
				493	/* Figure out what order dbuf would be in if we sorted it. */
				494	for (i = 0; i < dbufCount; i++) {
				495	uc = (unsigned char)(dbuf[i] & 0xff);
				496	dbuf[byteCount[uc]] \|= (i << 8);
				497	byteCount[uc]++;
				498	}
				499	/* Decode first byte by hand to initialize "previous" byte.
				500	Note that it doesn't get output, and if the first three
				501	characters are identical it doesn't qualify as a run (hence
				502	writeRunCountdown = 5). */
				503	if (dbufCount) {
				504	if (origPtr >= dbufCount)
				505	return RETVAL_DATA_ERROR;
				506	bd->writePos = dbuf[origPtr];
				507	bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
				508	bd->writePos >>= 8;
				509	bd->writeRunCountdown = 5;
				510	}
				511	bd->writeCount = dbufCount;
				512
				513	return RETVAL_OK;
				514	}
				515
				516	/* Undo burrows-wheeler transform on intermediate buffer to produce output.
				517	If start_bunzip was initialized with out_fd =-1, then up to len bytes of
				518	data are written to outbuf. Return value is number of bytes written or
				519	error (all errors are negative numbers). If out_fd!=-1, outbuf and len
				520	are ignored, data is written to out_fd and return is RETVAL_OK or error.
				521	*/
				522
				523	static int INIT read_bunzip(struct bunzip_data bd, char outbuf, int len)
				524	{
				525	const unsigned int *dbuf;
				526	int pos, xcurrent, previous, gotcount;
				527
				528	/* If last read was short due to end of file, return last block now */
				529	if (bd->writeCount < 0)
				530	return bd->writeCount;
				531
				532	gotcount = 0;
				533	dbuf = bd->dbuf;
				534	pos = bd->writePos;
				535	xcurrent = bd->writeCurrent;
				536
				537	/* We will always have pending decoded data to write into the output
				538	buffer unless this is the very first call (in which case we haven't
				539	Huffman-decoded a block into the intermediate buffer yet). */
				540
				541	if (bd->writeCopies) {
				542	/* Inside the loop, writeCopies means extra copies (beyond 1) */
				543	--bd->writeCopies;
				544	/* Loop outputting bytes */
				545	for (;;) {
				546	/* If the output buffer is full, snapshot
				547	* state and return */
				548	if (gotcount >= len) {
				549	bd->writePos = pos;
				550	bd->writeCurrent = xcurrent;
				551	bd->writeCopies++;
				552	return len;
				553	}
				554	/* Write next byte into output buffer, updating CRC */
				555	outbuf[gotcount++] = xcurrent;
				556	bd->writeCRC = (((bd->writeCRC) << 8)
				557	^bd->crc32Table[((bd->writeCRC) >> 24)
				558	^xcurrent]);
				559	/* Loop now if we're outputting multiple
				560	* copies of this byte */
				561	if (bd->writeCopies) {
				562	--bd->writeCopies;
				563	continue;
				564	}
				565	decode_next_byte:
				566	if (!bd->writeCount--)
				567	break;
				568	/* Follow sequence vector to undo
				569	* Burrows-Wheeler transform */
				570	previous = xcurrent;
				571	pos = dbuf[pos];
				572	xcurrent = pos&0xff;
				573	pos >>= 8;
				574	/* After 3 consecutive copies of the same
				575	byte, the 4th is a repeat count. We count
				576	down from 4 instead *of counting up because
				577	testing for non-zero is faster */
				578	if (--bd->writeRunCountdown) {
				579	if (xcurrent != previous)
				580	bd->writeRunCountdown = 4;
				581	} else {
				582	/* We have a repeated run, this byte
				583	* indicates the count */
				584	bd->writeCopies = xcurrent;
				585	xcurrent = previous;
				586	bd->writeRunCountdown = 5;
				587	/* Sometimes there are just 3 bytes
				588	* (run length 0) */
				589	if (!bd->writeCopies)
				590	goto decode_next_byte;
				591	/* Subtract the 1 copy we'd output
				592	* anyway to get extras */
				593	--bd->writeCopies;
				594	}
				595	}
				596	/* Decompression of this block completed successfully */
				597	bd->writeCRC = ~bd->writeCRC;
				598	bd->totalCRC = ((bd->totalCRC << 1) \|
				599	(bd->totalCRC >> 31)) ^ bd->writeCRC;
				600	/* If this block had a CRC error, force file level CRC error. */
				601	if (bd->writeCRC != bd->headerCRC) {
				602	bd->totalCRC = bd->headerCRC+1;
				603	return RETVAL_LAST_BLOCK;
				604	}
				605	}
				606
				607	/* Refill the intermediate buffer by Huffman-decoding next
				608	* block of input */
				609	/* (previous is just a convenient unused temp variable here) */
				610	previous = get_next_block(bd);
				611	if (previous) {
				612	bd->writeCount = previous;
				613	return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
				614	}
				615	bd->writeCRC = 0xffffffffUL;
				616	pos = bd->writePos;
				617	xcurrent = bd->writeCurrent;
				618	goto decode_next_byte;
				619	}
				620
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	621	static long INIT nofill(void *buf, unsigned long len)
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	622	{
				623	return -1;
				624	}
				625
				626	/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
				627	a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
				628	ignored, and data is read from file handle into temporary buffer. */
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	629	static int INIT start_bunzip(struct bunzip_data *bdp, void inbuf, long len,
				630	long (fill)(void, unsigned long))
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	631	{
				632	struct bunzip_data *bd;
				633	unsigned int i, j, c;
				634	const unsigned int BZh0 =
				635	(((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
				636	+(((unsigned int)'h') << 8)+(unsigned int)'0';
				637
				638	/* Figure out how much data to allocate */
				639	i = sizeof(struct bunzip_data);
				640
				641	/* Allocate bunzip_data. Most fields initialize to zero. */
				642	bd = *bdp = malloc(i);
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	643	if (!bd)
				644	return RETVAL_OUT_OF_MEMORY;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	645	memset(bd, 0, sizeof(struct bunzip_data));
				646	/* Setup input buffer */
				647	bd->inbuf = inbuf;
				648	bd->inbufCount = len;
				649	if (fill != NULL)
				650	bd->fill = fill;
				651	else
				652	bd->fill = nofill;
				653
				654	/* Init the CRC32 table (big endian) */
				655	for (i = 0; i < 256; i++) {
				656	c = i << 24;
				657	for (j = 8; j; j--)
Krzysztof Kozlowski	faa16bc	2018-07-17 18:05:41 +0200	[diff] [blame]	658	c = c&0x80000000 ? (c << 1)^(CRC32_POLY_BE) : (c << 1);
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	659	bd->crc32Table[i] = c;
				660	}
				661
				662	/* Ensure that file starts with "BZh['1'-'9']." */
				663	i = get_bits(bd, 32);
				664	if (((unsigned int)(i-BZh0-1)) >= 9)
				665	return RETVAL_NOT_BZIP_DATA;
				666
				667	/* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
				668	uncompressed data. Allocate intermediate buffer for block. */
				669	bd->dbufSize = 100000*(i-BZh0);
				670
				671	bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	672	if (!bd->dbuf)
				673	return RETVAL_OUT_OF_MEMORY;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	674	return RETVAL_OK;
				675	}
				676
				677	/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
				678	not end of file.) */
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	679	STATIC int INIT bunzip2(unsigned char *buf, long len,
				680	long (fill)(void, unsigned long),
				681	long (flush)(void, unsigned long),
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	682	unsigned char *outbuf,
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	683	long *pos,
Lasse Collin	93685ad2	2011-01-12 17:01:14 -0800	[diff] [blame]	684	void(error)(char x))
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	685	{
				686	struct bunzip_data *bd;
				687	int i = -1;
				688	unsigned char *inbuf;
				689
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	690	if (flush)
				691	outbuf = malloc(BZIP2_IOBUF_SIZE);
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	692
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	693	if (!outbuf) {
Paul Bolle	90802ed	2011-12-05 13:00:34 +0100	[diff] [blame]	694	error("Could not allocate output buffer");
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	695	return RETVAL_OUT_OF_MEMORY;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	696	}
				697	if (buf)
				698	inbuf = buf;
				699	else
				700	inbuf = malloc(BZIP2_IOBUF_SIZE);
				701	if (!inbuf) {
Paul Bolle	90802ed	2011-12-05 13:00:34 +0100	[diff] [blame]	702	error("Could not allocate input buffer");
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	703	i = RETVAL_OUT_OF_MEMORY;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	704	goto exit_0;
				705	}
				706	i = start_bunzip(&bd, inbuf, len, fill);
				707	if (!i) {
				708	for (;;) {
				709	i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
				710	if (i <= 0)
				711	break;
				712	if (!flush)
				713	outbuf += i;
				714	else
				715	if (i != flush(outbuf, i)) {
				716	i = RETVAL_UNEXPECTED_OUTPUT_EOF;
				717	break;
				718	}
				719	}
				720	}
				721	/* Check CRC and release memory */
				722	if (i == RETVAL_LAST_BLOCK) {
				723	if (bd->headerCRC != bd->totalCRC)
				724	error("Data integrity error when decompressing.");
				725	else
				726	i = RETVAL_OK;
				727	} else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
				728	error("Compressed file ends unexpectedly");
				729	}
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	730	if (!bd)
				731	goto exit_1;
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	732	if (bd->dbuf)
				733	large_free(bd->dbuf);
				734	if (pos)
				735	*pos = bd->inbufPos;
				736	free(bd);
Phillip Lougher	d452986	2009-12-14 21:45:19 +0000	[diff] [blame]	737	exit_1:
Alain Knaff	bc22c17	2009-01-04 22:46:16 +0100	[diff] [blame]	738	if (!buf)
				739	free(inbuf);
				740	exit_0:
				741	if (flush)
				742	free(outbuf);
				743	return i;
				744	}
				745
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	746	#ifdef PREBOOT
Yinghai Lu	2d3862d	2015-09-09 15:39:12 -0700	[diff] [blame]	747	STATIC int INIT __decompress(unsigned char *buf, long len,
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	748	long (fill)(void, unsigned long),
				749	long (flush)(void, unsigned long),
Yinghai Lu	2d3862d	2015-09-09 15:39:12 -0700	[diff] [blame]	750	unsigned char *outbuf, long olen,
Yinghai Lu	d97b07c	2014-08-08 14:23:14 -0700	[diff] [blame]	751	long *pos,
Yinghai Lu	2d3862d	2015-09-09 15:39:12 -0700	[diff] [blame]	752	void (error)(char x))
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	753	{
Lasse Collin	93685ad2	2011-01-12 17:01:14 -0800	[diff] [blame]	754	return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error);
Phillip Lougher	b1af431	2009-08-06 15:09:31 -0700	[diff] [blame]	755	}
				756	#endif