Blame - lib/zstd/decompress/zstd_decompress_block.c - SHIFTPHONES/mainline/linux

blob: 2d101d9a842ecaafd663be8f1d19b72cb7cc9a01 [file] [log] [blame]

Nick Terrell	e0c1b49f	2020-09-11 16:37:08 -0700	[diff] [blame]	1	/*
				2	* Copyright (c) Yann Collet, Facebook, Inc.
				3	* All rights reserved.
				4	*
				5	* This source code is licensed under both the BSD-style license (found in the
				6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
				7	* in the COPYING file in the root directory of this source tree).
				8	* You may select, at your option, one of the above-listed licenses.
				9	*/
				10
				11	/* zstd_decompress_block :
				12	* this module takes care of decompressing _compressed_ block */
				13
				14	/-******************************************************
				15	* Dependencies
				16	*********************************************************/
				17	#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
				18	#include "../common/compiler.h" /* prefetch */
				19	#include "../common/cpu.h" /* bmi2 */
				20	#include "../common/mem.h" /* low level memory routines */
				21	#define FSE_STATIC_LINKING_ONLY
				22	#include "../common/fse.h"
				23	#define HUF_STATIC_LINKING_ONLY
				24	#include "../common/huf.h"
				25	#include "../common/zstd_internal.h"
				26	#include "zstd_decompress_internal.h" /* ZSTD_DCtx */
				27	#include "zstd_ddict.h" /* ZSTD_DDictDictContent */
				28	#include "zstd_decompress_block.h"
				29
				30	/_******************************************************
				31	* Macros
				32	**********************************************************/
				33
				34	/* These two optional macros force the use one way or another of the two
				35	* ZSTD_decompressSequences implementations. You can't force in both directions
				36	* at the same time.
				37	*/
				38	#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				39	defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				40	#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
				41	#endif
				42
				43
				44	/_******************************************************
				45	* Memory operations
				46	**********************************************************/
				47	static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
				48
				49
				50	/-************************************************************
				51	* Block decoding
				52	***************************************************************/
				53
				54	/*! ZSTD_getcBlockSize() :
				55	* Provides the size of compressed block from block header `src` */
				56	size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
				57	blockProperties_t* bpPtr)
				58	{
				59	RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
				60
				61	{ U32 const cBlockHeader = MEM_readLE24(src);
				62	U32 const cSize = cBlockHeader >> 3;
				63	bpPtr->lastBlock = cBlockHeader & 1;
				64	bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
				65	bpPtr->origSize = cSize; /* only useful for RLE */
				66	if (bpPtr->blockType == bt_rle) return 1;
				67	RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
				68	return cSize;
				69	}
				70	}
				71
				72
				73	/* Hidden declaration for fullbench */
				74	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
				75	const void* src, size_t srcSize);
				76	/*! ZSTD_decodeLiteralsBlock() :
				77	* @return : nb of bytes read from src (< srcSize )
				78	* note : symbol not declared but exposed for fullbench */
				79	size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
				80	const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
				81	{
				82	DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
				83	RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
				84
				85	{ const BYTE* const istart = (const BYTE*) src;
				86	symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
				87
				88	switch(litEncType)
				89	{
				90	case set_repeat:
				91	DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
				92	RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
				93	ZSTD_FALLTHROUGH;
				94
				95	case set_compressed:
				96	RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
				97	{ size_t lhSize, litSize, litCSize;
				98	U32 singleStream=0;
				99	U32 const lhlCode = (istart[0] >> 2) & 3;
				100	U32 const lhc = MEM_readLE32(istart);
				101	size_t hufSuccess;
				102	switch(lhlCode)
				103	{
				104	case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
				105	/* 2 - 2 - 10 - 10 */
				106	singleStream = !lhlCode;
				107	lhSize = 3;
				108	litSize = (lhc >> 4) & 0x3FF;
				109	litCSize = (lhc >> 14) & 0x3FF;
				110	break;
				111	case 2:
				112	/* 2 - 2 - 14 - 14 */
				113	lhSize = 4;
				114	litSize = (lhc >> 4) & 0x3FFF;
				115	litCSize = lhc >> 18;
				116	break;
				117	case 3:
				118	/* 2 - 2 - 18 - 18 */
				119	lhSize = 5;
				120	litSize = (lhc >> 4) & 0x3FFFF;
				121	litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
				122	break;
				123	}
				124	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
				125	RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
				126
				127	/* prefetch huffman table if cold */
				128	if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
				129	PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
				130	}
				131
				132	if (litEncType==set_repeat) {
				133	if (singleStream) {
				134	hufSuccess = HUF_decompress1X_usingDTable_bmi2(
				135	dctx->litBuffer, litSize, istart+lhSize, litCSize,
				136	dctx->HUFptr, dctx->bmi2);
				137	} else {
				138	hufSuccess = HUF_decompress4X_usingDTable_bmi2(
				139	dctx->litBuffer, litSize, istart+lhSize, litCSize,
				140	dctx->HUFptr, dctx->bmi2);
				141	}
				142	} else {
				143	if (singleStream) {
				144	#if defined(HUF_FORCE_DECOMPRESS_X2)
				145	hufSuccess = HUF_decompress1X_DCtx_wksp(
				146	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				147	istart+lhSize, litCSize, dctx->workspace,
				148	sizeof(dctx->workspace));
				149	#else
				150	hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
				151	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				152	istart+lhSize, litCSize, dctx->workspace,
				153	sizeof(dctx->workspace), dctx->bmi2);
				154	#endif
				155	} else {
				156	hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
				157	dctx->entropy.hufTable, dctx->litBuffer, litSize,
				158	istart+lhSize, litCSize, dctx->workspace,
				159	sizeof(dctx->workspace), dctx->bmi2);
				160	}
				161	}
				162
				163	RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
				164
				165	dctx->litPtr = dctx->litBuffer;
				166	dctx->litSize = litSize;
				167	dctx->litEntropy = 1;
				168	if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
				169	ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
				170	return litCSize + lhSize;
				171	}
				172
				173	case set_basic:
				174	{ size_t litSize, lhSize;
				175	U32 const lhlCode = ((istart[0]) >> 2) & 3;
				176	switch(lhlCode)
				177	{
				178	case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
				179	lhSize = 1;
				180	litSize = istart[0] >> 3;
				181	break;
				182	case 1:
				183	lhSize = 2;
				184	litSize = MEM_readLE16(istart) >> 4;
				185	break;
				186	case 3:
				187	lhSize = 3;
				188	litSize = MEM_readLE24(istart) >> 4;
				189	break;
				190	}
				191
				192	if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
				193	RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
				194	ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
				195	dctx->litPtr = dctx->litBuffer;
				196	dctx->litSize = litSize;
				197	ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
				198	return lhSize+litSize;
				199	}
				200	/* direct reference into compressed stream */
				201	dctx->litPtr = istart+lhSize;
				202	dctx->litSize = litSize;
				203	return lhSize+litSize;
				204	}
				205
				206	case set_rle:
				207	{ U32 const lhlCode = ((istart[0]) >> 2) & 3;
				208	size_t litSize, lhSize;
				209	switch(lhlCode)
				210	{
				211	case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
				212	lhSize = 1;
				213	litSize = istart[0] >> 3;
				214	break;
				215	case 1:
				216	lhSize = 2;
				217	litSize = MEM_readLE16(istart) >> 4;
				218	break;
				219	case 3:
				220	lhSize = 3;
				221	litSize = MEM_readLE24(istart) >> 4;
				222	RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
				223	break;
				224	}
				225	RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
				226	ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
				227	dctx->litPtr = dctx->litBuffer;
				228	dctx->litSize = litSize;
				229	return lhSize+1;
				230	}
				231	default:
				232	RETURN_ERROR(corruption_detected, "impossible");
				233	}
				234	}
				235	}
				236
				237	/* Default FSE distribution tables.
				238	* These are pre-calculated FSE decoding tables using default distributions as defined in specification :
				239	* https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
				240	* They were generated programmatically with following method :
				241	* - start from default distributions, present in /lib/common/zstd_internal.h
				242	* - generate tables normally, using ZSTD_buildFSETable()
				243	* - printout the content of tables
				244	* - pretify output, report below, test with fuzzer to ensure it's correct */
				245
				246	/* Default FSE distribution table for Literal Lengths */
				247	static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
				248	{ 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				249	/* nextState, nbAddBits, nbBits, baseVal */
				250	{ 0, 0, 4, 0}, { 16, 0, 4, 0},
				251	{ 32, 0, 5, 1}, { 0, 0, 5, 3},
				252	{ 0, 0, 5, 4}, { 0, 0, 5, 6},
				253	{ 0, 0, 5, 7}, { 0, 0, 5, 9},
				254	{ 0, 0, 5, 10}, { 0, 0, 5, 12},
				255	{ 0, 0, 6, 14}, { 0, 1, 5, 16},
				256	{ 0, 1, 5, 20}, { 0, 1, 5, 22},
				257	{ 0, 2, 5, 28}, { 0, 3, 5, 32},
				258	{ 0, 4, 5, 48}, { 32, 6, 5, 64},
				259	{ 0, 7, 5, 128}, { 0, 8, 6, 256},
				260	{ 0, 10, 6, 1024}, { 0, 12, 6, 4096},
				261	{ 32, 0, 4, 0}, { 0, 0, 4, 1},
				262	{ 0, 0, 5, 2}, { 32, 0, 5, 4},
				263	{ 0, 0, 5, 5}, { 32, 0, 5, 7},
				264	{ 0, 0, 5, 8}, { 32, 0, 5, 10},
				265	{ 0, 0, 5, 11}, { 0, 0, 6, 13},
				266	{ 32, 1, 5, 16}, { 0, 1, 5, 18},
				267	{ 32, 1, 5, 22}, { 0, 2, 5, 24},
				268	{ 32, 3, 5, 32}, { 0, 3, 5, 40},
				269	{ 0, 6, 4, 64}, { 16, 6, 4, 64},
				270	{ 32, 7, 5, 128}, { 0, 9, 6, 512},
				271	{ 0, 11, 6, 2048}, { 48, 0, 4, 0},
				272	{ 16, 0, 4, 1}, { 32, 0, 5, 2},
				273	{ 32, 0, 5, 3}, { 32, 0, 5, 5},
				274	{ 32, 0, 5, 6}, { 32, 0, 5, 8},
				275	{ 32, 0, 5, 9}, { 32, 0, 5, 11},
				276	{ 32, 0, 5, 12}, { 0, 0, 6, 15},
				277	{ 32, 1, 5, 18}, { 32, 1, 5, 20},
				278	{ 32, 2, 5, 24}, { 32, 2, 5, 28},
				279	{ 32, 3, 5, 40}, { 32, 4, 5, 48},
				280	{ 0, 16, 6,65536}, { 0, 15, 6,32768},
				281	{ 0, 14, 6,16384}, { 0, 13, 6, 8192},
				282	}; /* LL_defaultDTable */
				283
				284	/* Default FSE distribution table for Offset Codes */
				285	static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
				286	{ 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				287	/* nextState, nbAddBits, nbBits, baseVal */
				288	{ 0, 0, 5, 0}, { 0, 6, 4, 61},
				289	{ 0, 9, 5, 509}, { 0, 15, 5,32765},
				290	{ 0, 21, 5,2097149}, { 0, 3, 5, 5},
				291	{ 0, 7, 4, 125}, { 0, 12, 5, 4093},
				292	{ 0, 18, 5,262141}, { 0, 23, 5,8388605},
				293	{ 0, 5, 5, 29}, { 0, 8, 4, 253},
				294	{ 0, 14, 5,16381}, { 0, 20, 5,1048573},
				295	{ 0, 2, 5, 1}, { 16, 7, 4, 125},
				296	{ 0, 11, 5, 2045}, { 0, 17, 5,131069},
				297	{ 0, 22, 5,4194301}, { 0, 4, 5, 13},
				298	{ 16, 8, 4, 253}, { 0, 13, 5, 8189},
				299	{ 0, 19, 5,524285}, { 0, 1, 5, 1},
				300	{ 16, 6, 4, 61}, { 0, 10, 5, 1021},
				301	{ 0, 16, 5,65533}, { 0, 28, 5,268435453},
				302	{ 0, 27, 5,134217725}, { 0, 26, 5,67108861},
				303	{ 0, 25, 5,33554429}, { 0, 24, 5,16777213},
				304	}; /* OF_defaultDTable */
				305
				306
				307	/* Default FSE distribution table for Match Lengths */
				308	static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
				309	{ 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
				310	/* nextState, nbAddBits, nbBits, baseVal */
				311	{ 0, 0, 6, 3}, { 0, 0, 4, 4},
				312	{ 32, 0, 5, 5}, { 0, 0, 5, 6},
				313	{ 0, 0, 5, 8}, { 0, 0, 5, 9},
				314	{ 0, 0, 5, 11}, { 0, 0, 6, 13},
				315	{ 0, 0, 6, 16}, { 0, 0, 6, 19},
				316	{ 0, 0, 6, 22}, { 0, 0, 6, 25},
				317	{ 0, 0, 6, 28}, { 0, 0, 6, 31},
				318	{ 0, 0, 6, 34}, { 0, 1, 6, 37},
				319	{ 0, 1, 6, 41}, { 0, 2, 6, 47},
				320	{ 0, 3, 6, 59}, { 0, 4, 6, 83},
				321	{ 0, 7, 6, 131}, { 0, 9, 6, 515},
				322	{ 16, 0, 4, 4}, { 0, 0, 4, 5},
				323	{ 32, 0, 5, 6}, { 0, 0, 5, 7},
				324	{ 32, 0, 5, 9}, { 0, 0, 5, 10},
				325	{ 0, 0, 6, 12}, { 0, 0, 6, 15},
				326	{ 0, 0, 6, 18}, { 0, 0, 6, 21},
				327	{ 0, 0, 6, 24}, { 0, 0, 6, 27},
				328	{ 0, 0, 6, 30}, { 0, 0, 6, 33},
				329	{ 0, 1, 6, 35}, { 0, 1, 6, 39},
				330	{ 0, 2, 6, 43}, { 0, 3, 6, 51},
				331	{ 0, 4, 6, 67}, { 0, 5, 6, 99},
				332	{ 0, 8, 6, 259}, { 32, 0, 4, 4},
				333	{ 48, 0, 4, 4}, { 16, 0, 4, 5},
				334	{ 32, 0, 5, 7}, { 32, 0, 5, 8},
				335	{ 32, 0, 5, 10}, { 32, 0, 5, 11},
				336	{ 0, 0, 6, 14}, { 0, 0, 6, 17},
				337	{ 0, 0, 6, 20}, { 0, 0, 6, 23},
				338	{ 0, 0, 6, 26}, { 0, 0, 6, 29},
				339	{ 0, 0, 6, 32}, { 0, 16, 6,65539},
				340	{ 0, 15, 6,32771}, { 0, 14, 6,16387},
				341	{ 0, 13, 6, 8195}, { 0, 12, 6, 4099},
				342	{ 0, 11, 6, 2051}, { 0, 10, 6, 1027},
				343	}; /* ML_defaultDTable */
				344
				345
				346	static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
				347	{
				348	void* ptr = dt;
				349	ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
				350	ZSTD_seqSymbol* const cell = dt + 1;
				351
				352	DTableH->tableLog = 0;
				353	DTableH->fastMode = 0;
				354
				355	cell->nbBits = 0;
				356	cell->nextState = 0;
				357	assert(nbAddBits < 255);
				358	cell->nbAdditionalBits = (BYTE)nbAddBits;
				359	cell->baseValue = baseValue;
				360	}
				361
				362
				363	/* ZSTD_buildFSETable() :
				364	* generate FSE decoding table for one symbol (ll, ml or off)
				365	* cannot fail if input is valid =>
				366	* all inputs are presumed validated at this stage */
				367	FORCE_INLINE_TEMPLATE
				368	void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
				369	const short* normalizedCounter, unsigned maxSymbolValue,
				370	const U32* baseValue, const U32* nbAdditionalBits,
				371	unsigned tableLog, void* wksp, size_t wkspSize)
				372	{
				373	ZSTD_seqSymbol* const tableDecode = dt+1;
				374	U32 const maxSV1 = maxSymbolValue + 1;
				375	U32 const tableSize = 1 << tableLog;
				376
				377	U16* symbolNext = (U16*)wksp;
				378	BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
				379	U32 highThreshold = tableSize - 1;
				380
				381
				382	/* Sanity Checks */
				383	assert(maxSymbolValue <= MaxSeq);
				384	assert(tableLog <= MaxFSELog);
				385	assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
				386	(void)wkspSize;
				387	/* Init, lay down lowprob symbols */
				388	{ ZSTD_seqSymbol_header DTableH;
				389	DTableH.tableLog = tableLog;
				390	DTableH.fastMode = 1;
				391	{ S16 const largeLimit= (S16)(1 << (tableLog-1));
				392	U32 s;
				393	for (s=0; s<maxSV1; s++) {
				394	if (normalizedCounter[s]==-1) {
				395	tableDecode[highThreshold--].baseValue = s;
				396	symbolNext[s] = 1;
				397	} else {
				398	if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
				399	assert(normalizedCounter[s]>=0);
				400	symbolNext[s] = (U16)normalizedCounter[s];
				401	} } }
				402	ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
				403	}
				404
				405	/* Spread symbols */
				406	assert(tableSize <= 512);
				407	/* Specialized symbol spreading for the case when there are
				408	* no low probability (-1 count) symbols. When compressing
				409	* small blocks we avoid low probability symbols to hit this
				410	* case, since header decoding speed matters more.
				411	*/
				412	if (highThreshold == tableSize - 1) {
				413	size_t const tableMask = tableSize-1;
				414	size_t const step = FSE_TABLESTEP(tableSize);
				415	/* First lay down the symbols in order.
				416	* We use a uint64_t to lay down 8 bytes at a time. This reduces branch
				417	* misses since small blocks generally have small table logs, so nearly
				418	* all symbols have counts <= 8. We ensure we have 8 bytes at the end of
				419	* our buffer to handle the over-write.
				420	*/
				421	{
				422	U64 const add = 0x0101010101010101ull;
				423	size_t pos = 0;
				424	U64 sv = 0;
				425	U32 s;
				426	for (s=0; s<maxSV1; ++s, sv += add) {
				427	int i;
				428	int const n = normalizedCounter[s];
				429	MEM_write64(spread + pos, sv);
				430	for (i = 8; i < n; i += 8) {
				431	MEM_write64(spread + pos + i, sv);
				432	}
				433	pos += n;
				434	}
				435	}
				436	/* Now we spread those positions across the table.
				437	* The benefit of doing it in two stages is that we avoid the the
				438	* variable size inner loop, which caused lots of branch misses.
				439	* Now we can run through all the positions without any branch misses.
				440	* We unroll the loop twice, since that is what emperically worked best.
				441	*/
				442	{
				443	size_t position = 0;
				444	size_t s;
				445	size_t const unroll = 2;
				446	assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
				447	for (s = 0; s < (size_t)tableSize; s += unroll) {
				448	size_t u;
				449	for (u = 0; u < unroll; ++u) {
				450	size_t const uPosition = (position + (u * step)) & tableMask;
				451	tableDecode[uPosition].baseValue = spread[s + u];
				452	}
				453	position = (position + (unroll * step)) & tableMask;
				454	}
				455	assert(position == 0);
				456	}
				457	} else {
				458	U32 const tableMask = tableSize-1;
				459	U32 const step = FSE_TABLESTEP(tableSize);
				460	U32 s, position = 0;
				461	for (s=0; s<maxSV1; s++) {
				462	int i;
				463	int const n = normalizedCounter[s];
				464	for (i=0; i<n; i++) {
				465	tableDecode[position].baseValue = s;
				466	position = (position + step) & tableMask;
				467	while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
				468	} }
				469	assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
				470	}
				471
				472	/* Build Decoding table */
				473	{
				474	U32 u;
				475	for (u=0; u<tableSize; u++) {
				476	U32 const symbol = tableDecode[u].baseValue;
				477	U32 const nextState = symbolNext[symbol]++;
				478	tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
				479	tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
				480	assert(nbAdditionalBits[symbol] < 255);
				481	tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
				482	tableDecode[u].baseValue = baseValue[symbol];
				483	}
				484	}
				485	}
				486
				487	/* Avoids the FORCE_INLINE of the _body() function. */
				488	static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
				489	const short* normalizedCounter, unsigned maxSymbolValue,
				490	const U32* baseValue, const U32* nbAdditionalBits,
				491	unsigned tableLog, void* wksp, size_t wkspSize)
				492	{
				493	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
				494	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				495	}
				496
				497	#if DYNAMIC_BMI2
				498	TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
				499	const short* normalizedCounter, unsigned maxSymbolValue,
				500	const U32* baseValue, const U32* nbAdditionalBits,
				501	unsigned tableLog, void* wksp, size_t wkspSize)
				502	{
				503	ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
				504	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				505	}
				506	#endif
				507
				508	void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
				509	const short* normalizedCounter, unsigned maxSymbolValue,
				510	const U32* baseValue, const U32* nbAdditionalBits,
				511	unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
				512	{
				513	#if DYNAMIC_BMI2
				514	if (bmi2) {
				515	ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
				516	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				517	return;
				518	}
				519	#endif
				520	(void)bmi2;
				521	ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
				522	baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
				523	}
				524
				525
				526	/*! ZSTD_buildSeqTable() :
				527	* @return : nb bytes read from src,
				528	* or an error code if it fails */
				529	static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
				530	symbolEncodingType_e type, unsigned max, U32 maxLog,
				531	const void* src, size_t srcSize,
				532	const U32* baseValue, const U32* nbAdditionalBits,
				533	const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
				534	int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
				535	int bmi2)
				536	{
				537	switch(type)
				538	{
				539	case set_rle :
				540	RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
				541	RETURN_ERROR_IF(((const BYTE)src) > max, corruption_detected, "");
				542	{ U32 const symbol = (const BYTE)src;
				543	U32 const baseline = baseValue[symbol];
				544	U32 const nbBits = nbAdditionalBits[symbol];
				545	ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
				546	}
				547	*DTablePtr = DTableSpace;
				548	return 1;
				549	case set_basic :
				550	*DTablePtr = defaultTable;
				551	return 0;
				552	case set_repeat:
				553	RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
				554	/* prefetch FSE table if used */
				555	if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
				556	const void* const pStart = *DTablePtr;
				557	size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
				558	PREFETCH_AREA(pStart, pSize);
				559	}
				560	return 0;
				561	case set_compressed :
				562	{ unsigned tableLog;
				563	S16 norm[MaxSeq+1];
				564	size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
				565	RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
				566	RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
				567	ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
				568	*DTablePtr = DTableSpace;
				569	return headerSize;
				570	}
				571	default :
				572	assert(0);
				573	RETURN_ERROR(GENERIC, "impossible");
				574	}
				575	}
				576
				577	size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
				578	const void* src, size_t srcSize)
				579	{
				580	const BYTE* const istart = (const BYTE*)src;
				581	const BYTE* const iend = istart + srcSize;
				582	const BYTE* ip = istart;
				583	int nbSeq;
				584	DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
				585
				586	/* check */
				587	RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
				588
				589	/* SeqHead */
				590	nbSeq = *ip++;
				591	if (!nbSeq) {
				592	*nbSeqPtr=0;
				593	RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
				594	return 1;
				595	}
				596	if (nbSeq > 0x7F) {
				597	if (nbSeq == 0xFF) {
				598	RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
				599	nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
				600	ip+=2;
				601	} else {
				602	RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
				603	nbSeq = ((nbSeq-0x80)<<8) + *ip++;
				604	}
				605	}
				606	*nbSeqPtr = nbSeq;
				607
				608	/* FSE table descriptors */
				609	RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
				610	{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
				611	symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
				612	symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
				613	ip++;
				614
				615	/* Build DTables */
				616	{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
				617	LLtype, MaxLL, LLFSELog,
				618	ip, iend-ip,
				619	LL_base, LL_bits,
				620	LL_defaultDTable, dctx->fseEntropy,
				621	dctx->ddictIsCold, nbSeq,
				622	dctx->workspace, sizeof(dctx->workspace),
				623	dctx->bmi2);
				624	RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				625	ip += llhSize;
				626	}
				627
				628	{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
				629	OFtype, MaxOff, OffFSELog,
				630	ip, iend-ip,
				631	OF_base, OF_bits,
				632	OF_defaultDTable, dctx->fseEntropy,
				633	dctx->ddictIsCold, nbSeq,
				634	dctx->workspace, sizeof(dctx->workspace),
				635	dctx->bmi2);
				636	RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				637	ip += ofhSize;
				638	}
				639
				640	{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
				641	MLtype, MaxML, MLFSELog,
				642	ip, iend-ip,
				643	ML_base, ML_bits,
				644	ML_defaultDTable, dctx->fseEntropy,
				645	dctx->ddictIsCold, nbSeq,
				646	dctx->workspace, sizeof(dctx->workspace),
				647	dctx->bmi2);
				648	RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
				649	ip += mlhSize;
				650	}
				651	}
				652
				653	return ip-istart;
				654	}
				655
				656
				657	typedef struct {
				658	size_t litLength;
				659	size_t matchLength;
				660	size_t offset;
				661	const BYTE* match;
				662	} seq_t;
				663
				664	typedef struct {
				665	size_t state;
				666	const ZSTD_seqSymbol* table;
				667	} ZSTD_fseState;
				668
				669	typedef struct {
				670	BIT_DStream_t DStream;
				671	ZSTD_fseState stateLL;
				672	ZSTD_fseState stateOffb;
				673	ZSTD_fseState stateML;
				674	size_t prevOffset[ZSTD_REP_NUM];
				675	const BYTE* prefixStart;
				676	const BYTE* dictEnd;
				677	size_t pos;
				678	} seqState_t;
				679
				680	/*! ZSTD_overlapCopy8() :
				681	* Copies 8 bytes from ip to op and updates op and ip where ip <= op.
				682	* If the offset is < 8 then the offset is spread to at least 8 bytes.
				683	*
				684	* Precondition: ip <= op
				685	* Postcondition: op - op >= 8
				686	*/
				687	HINT_INLINE void ZSTD_overlapCopy8(BYTE op, BYTE const ip, size_t offset) {
				688	assert(ip <= op);
				689	if (offset < 8) {
				690	/* close range match, overlap */
				691	static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
				692	static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
				693	int const sub2 = dec64table[offset];
				694	(op)[0] = (ip)[0];
				695	(op)[1] = (ip)[1];
				696	(op)[2] = (ip)[2];
				697	(op)[3] = (ip)[3];
				698	*ip += dec32table[offset];
				699	ZSTD_copy4(op+4, ip);
				700	*ip -= sub2;
				701	} else {
				702	ZSTD_copy8(op, ip);
				703	}
				704	*ip += 8;
				705	*op += 8;
				706	assert(op - ip >= 8);
				707	}
				708
				709	/*! ZSTD_safecopy() :
				710	* Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
				711	* and write up to 16 bytes past oend_w (op >= oend_w is allowed).
				712	* This function is only called in the uncommon case where the sequence is near the end of the block. It
				713	* should be fast for a single long sequence, but can be slow for several short sequences.
				714	*
				715	* @param ovtype controls the overlap detection
				716	* - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
				717	* - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
				718	* The src buffer must be before the dst buffer.
				719	*/
				720	static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
				721	ptrdiff_t const diff = op - ip;
				722	BYTE* const oend = op + length;
				723
				724	assert((ovtype == ZSTD_no_overlap && (diff <= -8 \|\| diff >= 8 \|\| op >= oend_w)) \|\|
				725	(ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
				726
				727	if (length < 8) {
				728	/* Handle short lengths. */
				729	while (op < oend) op++ = ip++;
				730	return;
				731	}
				732	if (ovtype == ZSTD_overlap_src_before_dst) {
				733	/* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
				734	assert(length >= 8);
				735	ZSTD_overlapCopy8(&op, &ip, diff);
				736	assert(op - ip >= 8);
				737	assert(op <= oend);
				738	}
				739
				740	if (oend <= oend_w) {
				741	/* No risk of overwrite. */
				742	ZSTD_wildcopy(op, ip, length, ovtype);
				743	return;
				744	}
				745	if (op <= oend_w) {
				746	/* Wildcopy until we get close to the end. */
				747	assert(oend > oend_w);
				748	ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
				749	ip += oend_w - op;
				750	op = oend_w;
				751	}
				752	/* Handle the leftovers. */
				753	while (op < oend) op++ = ip++;
				754	}
				755
				756	/* ZSTD_execSequenceEnd():
				757	* This version handles cases that are near the end of the output buffer. It requires
				758	* more careful checks to make sure there is no overflow. By separating out these hard
				759	* and unlikely cases, we can speed up the common cases.
				760	*
				761	* NOTE: This function needs to be fast for a single long sequence, but doesn't need
				762	* to be optimized for many small sequences, since those fall into ZSTD_execSequence().
				763	*/
				764	FORCE_NOINLINE
				765	size_t ZSTD_execSequenceEnd(BYTE* op,
				766	BYTE* const oend, seq_t sequence,
				767	const BYTE** litPtr, const BYTE* const litLimit,
				768	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				769	{
				770	BYTE* const oLitEnd = op + sequence.litLength;
				771	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				772	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				773	const BYTE* match = oLitEnd - sequence.offset;
				774	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
				775
				776	/* bounds checks : careful of address space overflow in 32-bit mode */
				777	RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
				778	RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
				779	assert(op < op + sequenceLength);
				780	assert(oLitEnd < op + sequenceLength);
				781
				782	/* copy literals */
				783	ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
				784	op = oLitEnd;
				785	*litPtr = iLitEnd;
				786
				787	/* copy Match */
				788	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				789	/* offset beyond prefix */
				790	RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
				791	match = dictEnd - (prefixStart-match);
				792	if (match + sequence.matchLength <= dictEnd) {
				793	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				794	return sequenceLength;
				795	}
				796	/* span extDict & currentPrefixSegment */
				797	{ size_t const length1 = dictEnd - match;
				798	ZSTD_memmove(oLitEnd, match, length1);
				799	op = oLitEnd + length1;
				800	sequence.matchLength -= length1;
				801	match = prefixStart;
				802	} }
				803	ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
				804	return sequenceLength;
				805	}
				806
				807	HINT_INLINE
				808	size_t ZSTD_execSequence(BYTE* op,
				809	BYTE* const oend, seq_t sequence,
				810	const BYTE** litPtr, const BYTE* const litLimit,
				811	const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
				812	{
				813	BYTE* const oLitEnd = op + sequence.litLength;
				814	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
				815	BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
				816	BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
				817	const BYTE* const iLitEnd = *litPtr + sequence.litLength;
				818	const BYTE* match = oLitEnd - sequence.offset;
				819
				820	assert(op != NULL /* Precondition */);
				821	assert(oend_w < oend /* No underflow */);
				822	/* Handle edge cases in a slow path:
				823	* - Read beyond end of literals
				824	* - Match end is within WILDCOPY_OVERLIMIT of oend
				825	* - 32-bit mode and the match length overflows
				826	*/
				827	if (UNLIKELY(
				828	iLitEnd > litLimit \|\|
				829	oMatchEnd > oend_w \|\|
				830	(MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
				831	return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
				832
				833	/* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
				834	assert(op <= oLitEnd /* No overflow */);
				835	assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
				836	assert(oMatchEnd <= oend /* No underflow */);
				837	assert(iLitEnd <= litLimit /* Literal length is in bounds */);
				838	assert(oLitEnd <= oend_w /* Can wildcopy literals */);
				839	assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
				840
				841	/* Copy Literals:
				842	* Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
				843	* We likely don't need the full 32-byte wildcopy.
				844	*/
				845	assert(WILDCOPY_OVERLENGTH >= 16);
				846	ZSTD_copy16(op, (*litPtr));
				847	if (UNLIKELY(sequence.litLength > 16)) {
				848	ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
				849	}
				850	op = oLitEnd;
				851	litPtr = iLitEnd; / update for next sequence */
				852
				853	/* Copy Match */
				854	if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
				855	/* offset beyond prefix -> go into extDict */
				856	RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
				857	match = dictEnd + (match - prefixStart);
				858	if (match + sequence.matchLength <= dictEnd) {
				859	ZSTD_memmove(oLitEnd, match, sequence.matchLength);
				860	return sequenceLength;
				861	}
				862	/* span extDict & currentPrefixSegment */
				863	{ size_t const length1 = dictEnd - match;
				864	ZSTD_memmove(oLitEnd, match, length1);
				865	op = oLitEnd + length1;
				866	sequence.matchLength -= length1;
				867	match = prefixStart;
				868	} }
				869	/* Match within prefix of 1 or more bytes */
				870	assert(op <= oMatchEnd);
				871	assert(oMatchEnd <= oend_w);
				872	assert(match >= prefixStart);
				873	assert(sequence.matchLength >= 1);
				874
				875	/* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
				876	* without overlap checking.
				877	*/
				878	if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
				879	/* We bet on a full wildcopy for matches, since we expect matches to be
				880	* longer than literals (in general). In silesia, ~10% of matches are longer
				881	* than 16 bytes.
				882	*/
				883	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
				884	return sequenceLength;
				885	}
				886	assert(sequence.offset < WILDCOPY_VECLEN);
				887
				888	/* Copy 8 bytes and spread the offset to be >= 8. */
				889	ZSTD_overlapCopy8(&op, &match, sequence.offset);
				890
				891	/* If the match length is > 8 bytes, then continue with the wildcopy. */
				892	if (sequence.matchLength > 8) {
				893	assert(op < oMatchEnd);
				894	ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
				895	}
				896	return sequenceLength;
				897	}
				898
				899	static void
				900	ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
				901	{
				902	const void* ptr = dt;
				903	const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
				904	DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
				905	DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
				906	(U32)DStatePtr->state, DTableH->tableLog);
				907	BIT_reloadDStream(bitD);
				908	DStatePtr->table = dt + 1;
				909	}
				910
				911	FORCE_INLINE_TEMPLATE void
				912	ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
				913	{
				914	ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
				915	U32 const nbBits = DInfo.nbBits;
				916	size_t const lowBits = BIT_readBits(bitD, nbBits);
				917	DStatePtr->state = DInfo.nextState + lowBits;
				918	}
				919
				920	FORCE_INLINE_TEMPLATE void
				921	ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
				922	{
				923	U32 const nbBits = DInfo.nbBits;
				924	size_t const lowBits = BIT_readBits(bitD, nbBits);
				925	DStatePtr->state = DInfo.nextState + lowBits;
				926	}
				927
				928	/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
				929	* offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
				930	* bits before reloading. This value is the maximum number of bytes we read
				931	* after reloading when we are decoding long offsets.
				932	*/
				933	#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
				934	(ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
				935	? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
				936	: 0)
				937
				938	typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
				939	typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
				940
				941	FORCE_INLINE_TEMPLATE seq_t
				942	ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
				943	{
				944	seq_t seq;
				945	ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
				946	ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
				947	ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
				948	U32 const llBase = llDInfo.baseValue;
				949	U32 const mlBase = mlDInfo.baseValue;
				950	U32 const ofBase = ofDInfo.baseValue;
				951	BYTE const llBits = llDInfo.nbAdditionalBits;
				952	BYTE const mlBits = mlDInfo.nbAdditionalBits;
				953	BYTE const ofBits = ofDInfo.nbAdditionalBits;
				954	BYTE const totalBits = llBits+mlBits+ofBits;
				955
				956	/* sequence */
				957	{ size_t offset;
				958	if (ofBits > 1) {
				959	ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
				960	ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
				961	assert(ofBits <= MaxOff);
				962	if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
				963	U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
				964	offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
				965	BIT_reloadDStream(&seqState->DStream);
				966	if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
				967	assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
				968	} else {
				969	offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/>0/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
				970	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
				971	}
				972	seqState->prevOffset[2] = seqState->prevOffset[1];
				973	seqState->prevOffset[1] = seqState->prevOffset[0];
				974	seqState->prevOffset[0] = offset;
				975	} else {
				976	U32 const ll0 = (llBase == 0);
				977	if (LIKELY((ofBits == 0))) {
				978	if (LIKELY(!ll0))
				979	offset = seqState->prevOffset[0];
				980	else {
				981	offset = seqState->prevOffset[1];
				982	seqState->prevOffset[1] = seqState->prevOffset[0];
				983	seqState->prevOffset[0] = offset;
				984	}
				985	} else {
				986	offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
				987	{ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
				988	temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
				989	if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
				990	seqState->prevOffset[1] = seqState->prevOffset[0];
				991	seqState->prevOffset[0] = offset = temp;
				992	} } }
				993	seq.offset = offset;
				994	}
				995
				996	seq.matchLength = mlBase;
				997	if (mlBits > 0)
				998	seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/>0/);
				999
				1000	if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
				1001	BIT_reloadDStream(&seqState->DStream);
				1002	if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
				1003	BIT_reloadDStream(&seqState->DStream);
				1004	/* Ensure there are enough bits to read the rest of data in 64-bit mode. */
				1005	ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
				1006
				1007	seq.litLength = llBase;
				1008	if (llBits > 0)
				1009	seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/>0/);
				1010
				1011	if (MEM_32bits())
				1012	BIT_reloadDStream(&seqState->DStream);
				1013
				1014	DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
				1015	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
				1016
				1017	if (prefetch == ZSTD_p_prefetch) {
				1018	size_t const pos = seqState->pos + seq.litLength;
				1019	const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
				1020	seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
				1021	* No consequence though : no memory access will occur, offset is only used for prefetching */
				1022	seqState->pos = pos + seq.matchLength;
				1023	}
				1024
				1025	/* ANS state update
				1026	* gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
				1027	* clang-9.2.0 does 7% worse with ZSTD_updateFseState().
				1028	* Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
				1029	* better option, so it is the default for other compilers. But, if you
				1030	* measure that it is worse, please put up a pull request.
				1031	*/
				1032	{
				1033	#if !defined(__clang__)
				1034	const int kUseUpdateFseState = 1;
				1035	#else
				1036	const int kUseUpdateFseState = 0;
				1037	#endif
				1038	if (kUseUpdateFseState) {
				1039	ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
				1040	ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
				1041	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
				1042	ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
				1043	} else {
				1044	ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
				1045	ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
				1046	if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
				1047	ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
				1048	}
				1049	}
				1050
				1051	return seq;
				1052	}
				1053
				1054	#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
				1055	MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
				1056	{
				1057	size_t const windowSize = dctx->fParams.windowSize;
				1058	/* No dictionary used. */
				1059	if (dctx->dictContentEndForFuzzing == NULL) return 0;
				1060	/* Dictionary is our prefix. */
				1061	if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
				1062	/* Dictionary is not our ext-dict. */
				1063	if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
				1064	/* Dictionary is not within our window size. */
				1065	if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
				1066	/* Dictionary is active. */
				1067	return 1;
				1068	}
				1069
				1070	MEM_STATIC void ZSTD_assertValidSequence(
				1071	ZSTD_DCtx const* dctx,
				1072	BYTE const* op, BYTE const* oend,
				1073	seq_t const seq,
				1074	BYTE const* prefixStart, BYTE const* virtualStart)
				1075	{
				1076	#if DEBUGLEVEL >= 1
				1077	size_t const windowSize = dctx->fParams.windowSize;
				1078	size_t const sequenceSize = seq.litLength + seq.matchLength;
				1079	BYTE const* const oLitEnd = op + seq.litLength;
				1080	DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
				1081	(U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
				1082	assert(op <= oend);
				1083	assert((size_t)(oend - op) >= sequenceSize);
				1084	assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
				1085	if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
				1086	size_t const dictSize = (size_t)((char const)dctx->dictContentEndForFuzzing - (char const)dctx->dictContentBeginForFuzzing);
				1087	/* Offset must be within the dictionary. */
				1088	assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
				1089	assert(seq.offset <= windowSize + dictSize);
				1090	} else {
				1091	/* Offset must be within our window. */
				1092	assert(seq.offset <= windowSize);
				1093	}
				1094	#else
				1095	(void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
				1096	#endif
				1097	}
				1098	#endif
				1099
				1100	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1101	FORCE_INLINE_TEMPLATE size_t
				1102	DONT_VECTORIZE
				1103	ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
				1104	void* dst, size_t maxDstSize,
				1105	const void* seqStart, size_t seqSize, int nbSeq,
				1106	const ZSTD_longOffset_e isLongOffset,
				1107	const int frame)
				1108	{
				1109	const BYTE* ip = (const BYTE*)seqStart;
				1110	const BYTE* const iend = ip + seqSize;
				1111	BYTE* const ostart = (BYTE*)dst;
				1112	BYTE* const oend = ostart + maxDstSize;
				1113	BYTE* op = ostart;
				1114	const BYTE* litPtr = dctx->litPtr;
				1115	const BYTE* const litEnd = litPtr + dctx->litSize;
				1116	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
				1117	const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
				1118	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
				1119	DEBUGLOG(5, "ZSTD_decompressSequences_body");
				1120	(void)frame;
				1121
				1122	/* Regen sequences */
				1123	if (nbSeq) {
				1124	seqState_t seqState;
				1125	size_t error = 0;
				1126	dctx->fseEntropy = 1;
				1127	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
				1128	RETURN_ERROR_IF(
				1129	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
				1130	corruption_detected, "");
				1131	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
				1132	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
				1133	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
				1134	assert(dst != NULL);
				1135
				1136	ZSTD_STATIC_ASSERT(
				1137	BIT_DStream_unfinished < BIT_DStream_completed &&
				1138	BIT_DStream_endOfBuffer < BIT_DStream_completed &&
				1139	BIT_DStream_completed < BIT_DStream_overflow);
				1140
				1141	#if defined(__x86_64__)
				1142	/* Align the decompression loop to 32 + 16 bytes.
				1143	*
				1144	* zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
				1145	* speed swings based on the alignment of the decompression loop. This
				1146	* performance swing is caused by parts of the decompression loop falling
				1147	* out of the DSB. The entire decompression loop should fit in the DSB,
				1148	* when it can't we get much worse performance. You can measure if you've
				1149	* hit the good case or the bad case with this perf command for some
				1150	* compressed file test.zst:
				1151	*
				1152	* perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
				1153	* -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
				1154	*
				1155	* If you see most cycles served out of the MITE you've hit the bad case.
				1156	* If you see most cycles served out of the DSB you've hit the good case.
				1157	* If it is pretty even then you may be in an okay case.
				1158	*
				1159	* I've been able to reproduce this issue on the following CPUs:
				1160	* - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
				1161	* Use Instruments->Counters to get DSB/MITE cycles.
				1162	* I never got performance swings, but I was able to
				1163	* go from the good case of mostly DSB to half of the
				1164	* cycles served from MITE.
				1165	* - Coffeelake: Intel i9-9900k
				1166	*
				1167	* I haven't been able to reproduce the instability or DSB misses on any
				1168	* of the following CPUS:
				1169	* - Haswell
				1170	* - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
				1171	* - Skylake
				1172	*
				1173	* If you are seeing performance stability this script can help test.
				1174	* It tests on 4 commits in zstd where I saw performance change.
				1175	*
				1176	* https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
				1177	*/
				1178	__asm__(".p2align 5");
				1179	__asm__("nop");
				1180	__asm__(".p2align 4");
				1181	#endif
				1182	for ( ; ; ) {
				1183	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
				1184	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
				1185	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1186	assert(!ZSTD_isError(oneSeqSize));
				1187	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
				1188	#endif
				1189	DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
				1190	BIT_reloadDStream(&(seqState.DStream));
				1191	op += oneSeqSize;
				1192	/* gcc and clang both don't like early returns in this loop.
				1193	* Instead break and check for an error at the end of the loop.
				1194	*/
				1195	if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
				1196	error = oneSeqSize;
				1197	break;
				1198	}
				1199	if (UNLIKELY(!--nbSeq)) break;
				1200	}
				1201
				1202	/* check if reached exact end */
				1203	DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
				1204	if (ZSTD_isError(error)) return error;
				1205	RETURN_ERROR_IF(nbSeq, corruption_detected, "");
				1206	RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
				1207	/* save reps for next block */
				1208	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
				1209	}
				1210
				1211	/* last literal segment */
				1212	{ size_t const lastLLSize = litEnd - litPtr;
				1213	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
				1214	if (op != NULL) {
				1215	ZSTD_memcpy(op, litPtr, lastLLSize);
				1216	op += lastLLSize;
				1217	}
				1218	}
				1219
				1220	return op-ostart;
				1221	}
				1222
				1223	static size_t
				1224	ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
				1225	void* dst, size_t maxDstSize,
				1226	const void* seqStart, size_t seqSize, int nbSeq,
				1227	const ZSTD_longOffset_e isLongOffset,
				1228	const int frame)
				1229	{
				1230	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1231	}
				1232	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1233
				1234	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1235	FORCE_INLINE_TEMPLATE size_t
				1236	ZSTD_decompressSequencesLong_body(
				1237	ZSTD_DCtx* dctx,
				1238	void* dst, size_t maxDstSize,
				1239	const void* seqStart, size_t seqSize, int nbSeq,
				1240	const ZSTD_longOffset_e isLongOffset,
				1241	const int frame)
				1242	{
				1243	const BYTE* ip = (const BYTE*)seqStart;
				1244	const BYTE* const iend = ip + seqSize;
				1245	BYTE* const ostart = (BYTE*)dst;
				1246	BYTE* const oend = ostart + maxDstSize;
				1247	BYTE* op = ostart;
				1248	const BYTE* litPtr = dctx->litPtr;
				1249	const BYTE* const litEnd = litPtr + dctx->litSize;
				1250	const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
				1251	const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
				1252	const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
				1253	(void)frame;
				1254
				1255	/* Regen sequences */
				1256	if (nbSeq) {
				1257	#define STORED_SEQS 4
				1258	#define STORED_SEQS_MASK (STORED_SEQS-1)
				1259	#define ADVANCED_SEQS 4
				1260	seq_t sequences[STORED_SEQS];
				1261	int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
				1262	seqState_t seqState;
				1263	int seqNb;
				1264	dctx->fseEntropy = 1;
				1265	{ int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
				1266	seqState.prefixStart = prefixStart;
				1267	seqState.pos = (size_t)(op-prefixStart);
				1268	seqState.dictEnd = dictEnd;
				1269	assert(dst != NULL);
				1270	assert(iend >= ip);
				1271	RETURN_ERROR_IF(
				1272	ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
				1273	corruption_detected, "");
				1274	ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
				1275	ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
				1276	ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
				1277
				1278	/* prepare in advance */
				1279	for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
				1280	sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
				1281	PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
				1282	}
				1283	RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
				1284
				1285	/* decode and decompress */
				1286	for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
				1287	seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
				1288	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
				1289	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1290	assert(!ZSTD_isError(oneSeqSize));
				1291	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
				1292	#endif
				1293	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1294	PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
				1295	sequences[seqNb & STORED_SEQS_MASK] = sequence;
				1296	op += oneSeqSize;
				1297	}
				1298	RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
				1299
				1300	/* finish queue */
				1301	seqNb -= seqAdvance;
				1302	for ( ; seqNb<nbSeq ; seqNb++) {
				1303	size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
				1304	#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
				1305	assert(!ZSTD_isError(oneSeqSize));
				1306	if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
				1307	#endif
				1308	if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
				1309	op += oneSeqSize;
				1310	}
				1311
				1312	/* save reps for next block */
				1313	{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
				1314	}
				1315
				1316	/* last literal segment */
				1317	{ size_t const lastLLSize = litEnd - litPtr;
				1318	RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
				1319	if (op != NULL) {
				1320	ZSTD_memcpy(op, litPtr, lastLLSize);
				1321	op += lastLLSize;
				1322	}
				1323	}
				1324
				1325	return op-ostart;
				1326	}
				1327
				1328	static size_t
				1329	ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
				1330	void* dst, size_t maxDstSize,
				1331	const void* seqStart, size_t seqSize, int nbSeq,
				1332	const ZSTD_longOffset_e isLongOffset,
				1333	const int frame)
				1334	{
				1335	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1336	}
				1337	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1338
				1339
				1340
				1341	#if DYNAMIC_BMI2
				1342
				1343	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1344	static TARGET_ATTRIBUTE("bmi2") size_t
				1345	DONT_VECTORIZE
				1346	ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
				1347	void* dst, size_t maxDstSize,
				1348	const void* seqStart, size_t seqSize, int nbSeq,
				1349	const ZSTD_longOffset_e isLongOffset,
				1350	const int frame)
				1351	{
				1352	return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1353	}
				1354	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1355
				1356	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1357	static TARGET_ATTRIBUTE("bmi2") size_t
				1358	ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
				1359	void* dst, size_t maxDstSize,
				1360	const void* seqStart, size_t seqSize, int nbSeq,
				1361	const ZSTD_longOffset_e isLongOffset,
				1362	const int frame)
				1363	{
				1364	return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1365	}
				1366	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1367
				1368	#endif /* DYNAMIC_BMI2 */
				1369
				1370	typedef size_t (*ZSTD_decompressSequences_t)(
				1371	ZSTD_DCtx* dctx,
				1372	void* dst, size_t maxDstSize,
				1373	const void* seqStart, size_t seqSize, int nbSeq,
				1374	const ZSTD_longOffset_e isLongOffset,
				1375	const int frame);
				1376
				1377	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1378	static size_t
				1379	ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
				1380	const void* seqStart, size_t seqSize, int nbSeq,
				1381	const ZSTD_longOffset_e isLongOffset,
				1382	const int frame)
				1383	{
				1384	DEBUGLOG(5, "ZSTD_decompressSequences");
				1385	#if DYNAMIC_BMI2
				1386	if (dctx->bmi2) {
				1387	return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1388	}
				1389	#endif
				1390	return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1391	}
				1392	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
				1393
				1394
				1395	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1396	/* ZSTD_decompressSequencesLong() :
				1397	* decompression function triggered when a minimum share of offsets is considered "long",
				1398	* aka out of cache.
				1399	* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
				1400	* This function will try to mitigate main memory latency through the use of prefetching */
				1401	static size_t
				1402	ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
				1403	void* dst, size_t maxDstSize,
				1404	const void* seqStart, size_t seqSize, int nbSeq,
				1405	const ZSTD_longOffset_e isLongOffset,
				1406	const int frame)
				1407	{
				1408	DEBUGLOG(5, "ZSTD_decompressSequencesLong");
				1409	#if DYNAMIC_BMI2
				1410	if (dctx->bmi2) {
				1411	return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1412	}
				1413	#endif
				1414	return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
				1415	}
				1416	#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
				1417
				1418
				1419
				1420	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				1421	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				1422	/* ZSTD_getLongOffsetsShare() :
				1423	* condition : offTable must be valid
				1424	* @return : "share" of long offsets (arbitrarily defined as > (1<<23))
				1425	* compared to maximum possible of (1<<OffFSELog) */
				1426	static unsigned
				1427	ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
				1428	{
				1429	const void* ptr = offTable;
				1430	U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
				1431	const ZSTD_seqSymbol* table = offTable + 1;
				1432	U32 const max = 1 << tableLog;
				1433	U32 u, total = 0;
				1434	DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
				1435
				1436	assert(max <= (1 << OffFSELog)); /* max not too large */
				1437	for (u=0; u<max; u++) {
				1438	if (table[u].nbAdditionalBits > 22) total += 1;
				1439	}
				1440
				1441	assert(tableLog <= OffFSELog);
				1442	total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
				1443
				1444	return total;
				1445	}
				1446	#endif
				1447
				1448	size_t
				1449	ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
				1450	void* dst, size_t dstCapacity,
				1451	const void* src, size_t srcSize, const int frame)
				1452	{ /* blockType == blockCompressed */
				1453	const BYTE* ip = (const BYTE*)src;
				1454	/* isLongOffset must be true if there are long offsets.
				1455	* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
				1456	* We don't expect that to be the case in 64-bit mode.
				1457	* In block mode, window size is not known, so we have to be conservative.
				1458	* (note: but it could be evaluated from current-lowLimit)
				1459	*/
				1460	ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame \|\| (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
				1461	DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
				1462
				1463	RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
				1464
				1465	/* Decode literals section */
				1466	{ size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
				1467	DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
				1468	if (ZSTD_isError(litCSize)) return litCSize;
				1469	ip += litCSize;
				1470	srcSize -= litCSize;
				1471	}
				1472
				1473	/* Build Decoding Tables */
				1474	{
				1475	/* These macros control at build-time which decompressor implementation
				1476	* we use. If neither is defined, we do some inspection and dispatch at
				1477	* runtime.
				1478	*/
				1479	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				1480	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				1481	int usePrefetchDecoder = dctx->ddictIsCold;
				1482	#endif
				1483	int nbSeq;
				1484	size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
				1485	if (ZSTD_isError(seqHSize)) return seqHSize;
				1486	ip += seqHSize;
				1487	srcSize -= seqHSize;
				1488
				1489	RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
				1490
				1491	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				1492	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				1493	if ( !usePrefetchDecoder
				1494	&& (!frame \|\| (dctx->fParams.windowSize > (1<<24)))
				1495	&& (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
				1496	U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
				1497	U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
				1498	usePrefetchDecoder = (shareLongOffsets >= minShare);
				1499	}
				1500	#endif
				1501
				1502	dctx->ddictIsCold = 0;
				1503
				1504	#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
				1505	!defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
				1506	if (usePrefetchDecoder)
				1507	#endif
				1508	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
				1509	return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
				1510	#endif
				1511
				1512	#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
				1513	/* else */
				1514	return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
				1515	#endif
				1516	}
				1517	}
				1518
				1519
				1520	void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
				1521	{
				1522	if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
				1523	dctx->dictEnd = dctx->previousDstEnd;
				1524	dctx->virtualStart = (const char)dst - ((const char)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
				1525	dctx->prefixStart = dst;
				1526	dctx->previousDstEnd = dst;
				1527	}
				1528	}
				1529
				1530
				1531	size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
				1532	void* dst, size_t dstCapacity,
				1533	const void* src, size_t srcSize)
				1534	{
				1535	size_t dSize;
				1536	ZSTD_checkContinuity(dctx, dst, dstCapacity);
				1537	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
				1538	dctx->previousDstEnd = (char*)dst + dSize;
				1539	return dSize;
				1540	}