Update zstd to version 1.4.2

This commit is contained in:
Tino Reichardt
2019-07-29 09:03:32 +02:00
parent f8d5879212
commit 1364288df0
26 changed files with 1466 additions and 1079 deletions

View File

@@ -127,6 +127,13 @@
} \
}
/* vectorization */
#if !defined(__clang__) && defined(__GNUC__)
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
#else
# define DONT_VECTORIZE
#endif
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
# include <intrin.h> /* For Visual 2005 */

View File

@@ -71,7 +71,7 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 4
#define ZSTD_VERSION_RELEASE 0
#define ZSTD_VERSION_RELEASE 2
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
@@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library v
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
/***************************************
* Default constant
***************************************/
/* *************************************
* Default constant
***************************************/
#ifndef ZSTD_CLEVEL_DEFAULT
# define ZSTD_CLEVEL_DEFAULT 3
#endif
/***************************************
* Constants
***************************************/
/* *************************************
* Constants
***************************************/
/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */
@@ -183,9 +183,14 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres
***************************************/
/*= Compression context
* When compressing many times,
* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
* it is recommended to allocate a context just once,
* and re-use it for each successive compression operation.
* This will make workload friendlier for system's memory.
* Use one context per thread for parallel execution in multi-threaded environments. */
* Note : re-using context is just a speed / resource optimization.
* It doesn't change the compression ratio, which remains identical.
* Note 2 : In multi-threaded environments,
* use one different context per thread for parallel execution.
*/
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
@@ -380,6 +385,7 @@ typedef enum {
* ZSTD_c_forceMaxWindow
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
@@ -389,6 +395,7 @@ typedef enum {
ZSTD_c_experimentalParam3=1000,
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003,
} ZSTD_cParameter;
typedef struct {
@@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
/*******************************************************************************
* This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and
* ZSTD_compressStream2(). It is redundant, but is still fully supported.
/* These buffer sizes are softly recommended.
* They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
* Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
* reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
*
* However, note that these recommendations are from the perspective of a C caller program.
* If the streaming interface is invoked from some other language,
* especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
* a major performance rule is to reduce crossing such interface to an absolute minimum.
* It's not rare that performance ends being spent more into the interface, rather than compression itself.
* In which cases, prefer using large buffers, as large as practical,
* for both input and output, to reduce the nb of roundtrips.
*/
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
/* *****************************************************************************
* This following is a legacy streaming API.
* It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
* It is redundant, but remains fully supported.
* Advanced parameters and dictionary compression can only be used through the
* new API.
******************************************************************************/
/**
/*!
* Equivalent to:
*
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
@@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
*/
ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
/**
/*!
* Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
* NOTE: The return value is different. ZSTD_compressStream() returns a hint for
* the next read size (if non-zero and not an error). ZSTD_compressStream2()
* returns the number of bytes left to flush (if non-zero and not an error).
* returns the minimum nb of bytes left to flush (if non-zero and not an error).
*/
ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
@@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#endif /* ZSTD_H_235446 */
/****************************************************************************************
/* **************************************************************************************
* ADVANCED AND EXPERIMENTAL FUNCTIONS
****************************************************************************************
* The definitions in the following section are considered experimental.
@@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
#define ZSTD_LDM_HASHRATELOG_MIN 0
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
/* Advanced parameter bounds */
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
/* internal */
#define ZSTD_HASHLOG3_MAX 17
@@ -1162,7 +1189,7 @@ typedef enum {
* however it does mean that all frame data must be present and valid. */
ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
/** ZSTD_decompressBound() :
/*! ZSTD_decompressBound() :
* `src` should point to the start of a series of ZSTD encoded and/or skippable frames
* `srcSize` must be the _exact_ size of this series
* (i.e. there should be a frame boundary at `src + srcSize`)
@@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
*/
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
/* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size (default:0) */
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
/*! ZSTD_CCtx_getParameter() :
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
* and store it into int* value.
@@ -1843,7 +1875,7 @@ typedef struct {
unsigned checksumFlag;
} ZSTD_frameHeader;
/** ZSTD_getFrameHeader() :
/*! ZSTD_getFrameHeader() :
* decode Frame Header, or requires larger `srcSize`.
* @return : 0, `zfhPtr` is correctly filled,
* >0, `srcSize` is too small, value is wanted `srcSize` amount,

View File

File diff suppressed because it is too large Load Diff

View File

@@ -33,13 +33,13 @@ extern "C" {
***************************************/
#define kSearchStrength 8
#define HASH_READ_SIZE 8
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
It's not a big deal though : candidate will just be sorted again.
Additionally, candidate position 1 will be lost.
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
/*-*************************************
@@ -128,21 +128,20 @@ typedef struct {
BYTE const* base; /* All regular indexes relative to this position */
BYTE const* dictBase; /* extDict indexes relative to this position */
U32 dictLimit; /* below that point, need extDict */
U32 lowLimit; /* below that point, no more data */
U32 lowLimit; /* below that point, no more valid data */
} ZSTD_window_t;
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
struct ZSTD_matchState_t {
ZSTD_window_t window; /* State for window round buffer management */
U32 loadedDictEnd; /* index of end of dictionary */
U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
U32 nextToUpdate; /* index from which to continue table update */
U32 nextToUpdate3; /* index from which to continue table update */
U32 hashLog3; /* dispatch table : larger == faster, more memory */
U32* hashTable;
U32* hashTable3;
U32* chainTable;
optState_t opt; /* optimal parser state */
const ZSTD_matchState_t * dictMatchState;
const ZSTD_matchState_t* dictMatchState;
ZSTD_compressionParameters cParams;
};
@@ -195,6 +194,9 @@ struct ZSTD_CCtx_params_s {
int compressionLevel;
int forceWindow; /* force back-references to respect limit of
* 1<<wLog, even for dictionary */
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
* No target when targetCBlockSize == 0.
* There is no guarantee on compressed block size */
ZSTD_dictAttachPref_e attachDictPref;
ZSTD_literalCompressionMode_e literalCompressionMode;
@@ -305,6 +307,30 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
}
/* ZSTD_cParam_withinBounds:
* @return 1 if value is within cParam bounds,
* 0 otherwise */
MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
{
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
if (ZSTD_isError(bounds.error)) return 0;
if (value < bounds.lowerBound) return 0;
if (value > bounds.upperBound) return 0;
return 1;
}
/* ZSTD_minGain() :
* minimum compression required
* to generate a compress block or a compressed literals section.
* note : use same formula for both situations */
MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
{
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
return (srcSize >> minlog) + 2;
}
/*! ZSTD_storeSeq() :
* Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
* `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
@@ -324,7 +350,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
/* copy Literals */
assert(seqStorePtr->maxNbLit <= 128 KB);
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
seqStorePtr->lit += litLength;
/* literal Length */
@@ -564,6 +590,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
/*-*************************************
* Round buffer management
***************************************/
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
#endif
/* Max current allowed */
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
/* Maximum chunk size before overflow correction needs to be called again */
@@ -675,31 +704,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
* Updates lowLimit so that:
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
*
* This allows a simple check that index >= lowLimit to see if index is valid.
* This must be called before a block compression call, with srcEnd as the block
* source end.
* It ensures index is valid as long as index >= lowLimit.
* This must be called before a block compression call.
*
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
* This is because dictionaries are allowed to be referenced as long as the last
* byte of the dictionary is in the window, but once they are out of range,
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
* loadedDictEnd == 0.
* loadedDictEnd is only defined if a dictionary is in use for current compression.
* As the name implies, loadedDictEnd represents the index at end of dictionary.
* The value lies within context's referential, it can be directly compared to blockEndIdx.
*
* In normal dict mode, the dict is between lowLimit and dictLimit. In
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
* is below them. forceWindow and dictMatchState are therefore incompatible.
* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
* This is because dictionaries are allowed to be referenced fully
* as long as the last byte of the dictionary is in the window.
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
*
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
* In dictMatchState mode, lowLimit and dictLimit are the same,
* and the dictionary is below them.
* forceWindow and dictMatchState are therefore incompatible.
*/
MEM_STATIC void
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
void const* srcEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
(unsigned)blockEndIdx, (unsigned)maxDist);
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
/* - When there is no dictionary : loadedDictEnd == 0.
In which case, the test (blockEndIdx > maxDist) is merely to avoid
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
- When there is a standard dictionary :
Index referential is copied from the dictionary,
which means it starts from 0.
In which case, loadedDictEnd == dictSize,
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
since `blockEndIdx` also starts from zero.
- When there is an attached dictionary :
loadedDictEnd is expressed within the referential of the context,
so it can be directly compared against blockEndIdx.
*/
if (blockEndIdx > maxDist + loadedDictEnd) {
U32 const newLowLimit = blockEndIdx - maxDist;
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@@ -708,10 +755,31 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
window->dictLimit = window->lowLimit;
}
if (loadedDictEndPtr)
*loadedDictEndPtr = 0;
if (dictMatchStatePtr)
*dictMatchStatePtr = NULL;
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
}
/* Similar to ZSTD_window_enforceMaxDist(),
* but only invalidates dictionary
* when input progresses beyond window size. */
MEM_STATIC void
ZSTD_checkDictValidity(ZSTD_window_t* window,
const void* blockEnd,
U32 maxDist,
U32* loadedDictEndPtr,
const ZSTD_matchState_t** dictMatchStatePtr)
{
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
/* On reaching window size, dictionaries are invalidated */
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
}
}

View File

@@ -0,0 +1,149 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-*************************************
* Dependencies
***************************************/
#include "zstd_compress_literals.h"
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE* const)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
break;
case 2: /* 2 - 2 - 12 */
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
break;
case 3: /* 2 - 2 - 20 */
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
break;
default: /* not necessary : flSize is {1,2,3} */
assert(0);
}
memcpy(ostart + flSize, src, srcSize);
return srcSize + flSize;
}
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
BYTE* const ostart = (BYTE* const)dst;
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
switch(flSize)
{
case 1: /* 2 - 1 - 5 */
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
break;
case 2: /* 2 - 2 - 12 */
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
break;
case 3: /* 2 - 2 - 20 */
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
break;
default: /* not necessary : flSize is {1,2,3} */
assert(0);
}
ostart[flSize] = *(const BYTE*)src;
return flSize+1;
}
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* workspace, size_t wkspSize,
const int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize, strategy);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
BYTE* const ostart = (BYTE*)dst;
U32 singleStream = srcSize < 256;
symbolEncodingType_e hType = set_compressed;
size_t cLitSize;
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
disableLiteralCompression);
/* Prepare nextEntropy assuming reusing the existing table */
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
if (disableLiteralCompression)
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */
# define COMPRESS_LITERALS_SIZE_MIN 63
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
{ HUF_repeat repeat = prevHuf->repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
if (repeat != HUF_repeat_none) {
/* reused the existing table */
hType = set_repeat;
}
}
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (cLitSize==1) {
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
}
if (hType == set_compressed) {
/* using a newly constructed table */
nextHuf->repeatMode = HUF_repeat_check;
}
/* Build header */
switch(lhSize)
{
case 3: /* 2 - 2 - 10 - 10 */
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
MEM_writeLE24(ostart, lhc);
break;
}
case 4: /* 2 - 2 - 14 - 14 */
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
MEM_writeLE32(ostart, lhc);
break;
}
case 5: /* 2 - 2 - 18 - 18 */
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
MEM_writeLE32(ostart, lhc);
ostart[4] = (BYTE)(cLitSize >> 10);
break;
}
default: /* not possible : lhSize is {3,4,5} */
assert(0);
}
return lhSize+cLitSize;
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_COMPRESS_LITERALS_H
#define ZSTD_COMPRESS_LITERALS_H
#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
ZSTD_hufCTables_t* nextHuf,
ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
void* workspace, size_t wkspSize,
const int bmi2);
#endif /* ZSTD_COMPRESS_LITERALS_H */

View File

@@ -0,0 +1,415 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/*-*************************************
* Dependencies
***************************************/
#include "zstd_compress_sequences.h"
/**
* -log2(x / 256) lookup table for x in [0, 256).
* If x == 0: Return 0
* Else: Return floor(-log2(x / 256) * 256)
*/
static unsigned const kInverseProbabilityLog256[256] = {
0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
5, 4, 2, 1,
};
static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
void const* ptr = ctable;
U16 const* u16ptr = (U16 const*)ptr;
U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
return maxSymbolValue;
}
/**
* Returns the cost in bytes of encoding the normalized count header.
* Returns an error if any of the helper functions return an error.
*/
static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
size_t const nbSeq, unsigned const FSELog)
{
BYTE wksp[FSE_NCOUNTBOUND];
S16 norm[MaxSeq + 1];
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
}
/**
* Returns the cost in bits of encoding the distribution described by count
* using the entropy bound.
*/
static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
{
unsigned cost = 0;
unsigned s;
for (s = 0; s <= max; ++s) {
unsigned norm = (unsigned)((256 * count[s]) / total);
if (count[s] != 0 && norm == 0)
norm = 1;
assert(count[s] < total);
cost += count[s] * kInverseProbabilityLog256[norm];
}
return cost >> 8;
}
/**
* Returns the cost in bits of encoding the distribution in count using ctable.
* Returns an error if ctable cannot represent all the symbols in count.
*/
static size_t ZSTD_fseBitCost(
FSE_CTable const* ctable,
unsigned const* count,
unsigned const max)
{
unsigned const kAccuracyLog = 8;
size_t cost = 0;
unsigned s;
FSE_CState_t cstate;
FSE_initCState(&cstate, ctable);
RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
"Repeat FSE_CTable has maxSymbolValue %u < %u",
ZSTD_getFSEMaxSymbolValue(ctable), max);
for (s = 0; s <= max; ++s) {
unsigned const tableLog = cstate.stateLog;
unsigned const badCost = (tableLog + 1) << kAccuracyLog;
unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
if (count[s] == 0)
continue;
RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
"Repeat FSE_CTable has Prob[%u] == 0", s);
cost += count[s] * bitCost;
}
return cost >> kAccuracyLog;
}
/**
* Returns the cost in bits of encoding the distribution in count using the
* table described by norm. The max symbol support by norm is assumed >= max.
* norm must be valid for every symbol with non-zero probability in count.
*/
static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
unsigned const* count, unsigned const max)
{
unsigned const shift = 8 - accuracyLog;
size_t cost = 0;
unsigned s;
assert(accuracyLog <= 8);
for (s = 0; s <= max; ++s) {
unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
unsigned const norm256 = normAcc << shift;
assert(norm256 > 0);
assert(norm256 < 256);
cost += count[s] * kInverseProbabilityLog256[norm256];
}
return cost >> 8;
}
symbolEncodingType_e
ZSTD_selectEncodingType(
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy)
{
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
if (mostFrequent == nbSeq) {
*repeatMode = FSE_repeat_none;
if (isDefaultAllowed && nbSeq <= 2) {
/* Prefer set_basic over set_rle when there are 2 or less symbols,
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
* If basic encoding isn't possible, always choose RLE.
*/
DEBUGLOG(5, "Selected set_basic");
return set_basic;
}
DEBUGLOG(5, "Selected set_rle");
return set_rle;
}
if (strategy < ZSTD_lazy) {
if (isDefaultAllowed) {
size_t const staticFse_nbSeq_max = 1000;
size_t const mult = 10 - strategy;
size_t const baseLog = 3;
size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
assert(mult <= 9 && mult >= 7);
if ( (*repeatMode == FSE_repeat_valid)
&& (nbSeq < staticFse_nbSeq_max) ) {
DEBUGLOG(5, "Selected set_repeat");
return set_repeat;
}
if ( (nbSeq < dynamicFse_nbSeq_min)
|| (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
DEBUGLOG(5, "Selected set_basic");
/* The format allows default tables to be repeated, but it isn't useful.
* When using simple heuristics to select encoding type, we don't want
* to confuse these tables with dictionaries. When running more careful
* analysis, we don't need to waste time checking both repeating tables
* and default tables.
*/
*repeatMode = FSE_repeat_none;
return set_basic;
}
}
} else {
size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
if (isDefaultAllowed) {
assert(!ZSTD_isError(basicCost));
assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
}
assert(!ZSTD_isError(NCountCost));
assert(compressedCost < ERROR(maxCode));
DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
(unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
if (basicCost <= repeatCost && basicCost <= compressedCost) {
DEBUGLOG(5, "Selected set_basic");
assert(isDefaultAllowed);
*repeatMode = FSE_repeat_none;
return set_basic;
}
if (repeatCost <= compressedCost) {
DEBUGLOG(5, "Selected set_repeat");
assert(!ZSTD_isError(repeatCost));
return set_repeat;
}
assert(compressedCost < basicCost && compressedCost < repeatCost);
}
DEBUGLOG(5, "Selected set_compressed");
*repeatMode = FSE_repeat_check;
return set_compressed;
}
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
unsigned* count, U32 max,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize,
void* workspace, size_t workspaceSize)
{
BYTE* op = (BYTE*)dst;
const BYTE* const oend = op + dstCapacity;
DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
switch (type) {
case set_rle:
FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
*op = codeTable[0];
return 1;
case set_repeat:
memcpy(nextCTable, prevCTable, prevCTableSize);
return 0;
case set_basic:
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
size_t nbSeq_1 = nbSeq;
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
if (count[codeTable[nbSeq-1]] > 1) {
count[codeTable[nbSeq-1]]--;
nbSeq_1--;
}
assert(nbSeq_1 > 1);
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
FORWARD_IF_ERROR(NCountSize);
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
return NCountSize;
}
}
default: assert(0); RETURN_ERROR(GENERIC);
}
}
FORCE_INLINE_TEMPLATE size_t
ZSTD_encodeSequences_body(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
{
BIT_CStream_t blockStream;
FSE_CState_t stateMatchLength;
FSE_CState_t stateOffsetBits;
FSE_CState_t stateLitLength;
RETURN_ERROR_IF(
ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
dstSize_tooSmall, "not enough space remaining");
DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
(int)(blockStream.endPtr - blockStream.startPtr),
(unsigned)dstCapacity);
/* first symbols */
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
if (MEM_32bits()) BIT_flushBits(&blockStream);
if (longOffsets) {
U32 const ofBits = ofCodeTable[nbSeq-1];
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
BIT_flushBits(&blockStream);
}
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
ofBits - extraBits);
} else {
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
}
BIT_flushBits(&blockStream);
{ size_t n;
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
BYTE const llCode = llCodeTable[n];
BYTE const ofCode = ofCodeTable[n];
BYTE const mlCode = mlCodeTable[n];
U32 const llBits = LL_bits[llCode];
U32 const ofBits = ofCode;
U32 const mlBits = ML_bits[mlCode];
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
(unsigned)sequences[n].litLength,
(unsigned)sequences[n].matchLength + MINMATCH,
(unsigned)sequences[n].offset);
/* 32b*/ /* 64b*/
/* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
BIT_flushBits(&blockStream); /* (7)*/
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
if (longOffsets) {
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
if (extraBits) {
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
BIT_flushBits(&blockStream); /* (7)*/
}
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
ofBits - extraBits); /* 31 */
} else {
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
}
BIT_flushBits(&blockStream); /* (7)*/
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
} }
DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
FSE_flushCState(&blockStream, &stateMatchLength);
DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
FSE_flushCState(&blockStream, &stateOffsetBits);
DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
FSE_flushCState(&blockStream, &stateLitLength);
{ size_t const streamSize = BIT_closeCStream(&blockStream);
RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
return streamSize;
}
}
static size_t
ZSTD_encodeSequences_default(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
{
return ZSTD_encodeSequences_body(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
}
#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t
ZSTD_encodeSequences_bmi2(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets)
{
return ZSTD_encodeSequences_body(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
}
#endif
size_t ZSTD_encodeSequences(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
{
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
#if DYNAMIC_BMI2
if (bmi2) {
return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
}
#endif
(void)bmi2;
return ZSTD_encodeSequences_default(dst, dstCapacity,
CTable_MatchLength, mlCodeTable,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq, longOffsets);
}

View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_COMPRESS_SEQUENCES_H
#define ZSTD_COMPRESS_SEQUENCES_H
#include "fse.h" /* FSE_repeat, FSE_CTable */
#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
typedef enum {
ZSTD_defaultDisallowed = 0,
ZSTD_defaultAllowed = 1
} ZSTD_defaultPolicy_e;
symbolEncodingType_e
ZSTD_selectEncodingType(
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
FSE_CTable const* prevCTable,
short const* defaultNorm, U32 defaultNormLog,
ZSTD_defaultPolicy_e const isDefaultAllowed,
ZSTD_strategy const strategy);
size_t
ZSTD_buildCTable(void* dst, size_t dstCapacity,
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
unsigned* count, U32 max,
const BYTE* codeTable, size_t nbSeq,
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
const FSE_CTable* prevCTable, size_t prevCTableSize,
void* workspace, size_t workspaceSize);
size_t ZSTD_encodeSequences(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
#endif /* ZSTD_COMPRESS_SEQUENCES_H */

View File

@@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
frameParameter_unsupported);
return skippableHeaderSize + sizeU32;
{
size_t const skippableSize = skippableHeaderSize + sizeU32;
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
return skippableSize;
}
}
/** ZSTD_findDecompressedSize() :
@@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize))
return skippableSize;
if (srcSize < skippableSize) {
if (ZSTD_isError(skippableSize)) {
return ZSTD_CONTENTSIZE_ERROR;
}
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
@@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
frameSizeInfo.compressedSize <= srcSize);
return frameSizeInfo;
} else {
const BYTE* ip = (const BYTE*)src;
@@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
return frameSizeInfo.compressedSize;
}
/** ZSTD_decompressBound() :
* compatible with legacy mode
* `src` must point to the start of a ZSTD frame or a skippeable frame
@@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
return ZSTD_CONTENTSIZE_ERROR;
assert(srcSize >= compressedSize);
src = (const BYTE*)src + compressedSize;
srcSize -= compressedSize;
bound += decompressedBound;
@@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
if (ZSTD_isError(skippableSize))
return skippableSize;
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
FORWARD_IF_ERROR(skippableSize);
assert(skippableSize <= srcSize);
src = (const BYTE *)src + skippableSize;
srcSize -= skippableSize;
@@ -906,6 +909,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{ blockProperties_t bp;
size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
if (ZSTD_isError(cBlockSize)) return cBlockSize;
RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
dctx->expected = cBlockSize;
dctx->bType = bp.blockType;
dctx->rleSize = bp.origSize;
@@ -950,6 +954,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
RETURN_ERROR(corruption_detected);
}
if (ZSTD_isError(rSize)) return rSize;
RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
dctx->decodedSize += rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);

View File

@@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
*nbSeqPtr = nbSeq;
/* FSE table descriptors */
RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op,
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
/* copy Literals */
ZSTD_copy8(op, *litPtr);
if (sequence.litLength > 8)
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr);
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
@@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op,
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op);
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
}
return sequenceLength;
}
@@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op,
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
/* copy Literals */
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
if (sequence.litLength > 8)
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
else
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
op = oLitEnd;
*litPtr = iLitEnd; /* update for next sequence */
@@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
if (oMatchEnd > oend-(16-MINMATCH)) {
if (op < oend_w) {
ZSTD_wildcopy(op, match, oend_w - op);
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
match += oend_w - op;
op = oend_w;
}
while (op < oMatchEnd) *op++ = *match++;
} else {
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
}
return sequenceLength;
}
@@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
}
FORCE_INLINE_TEMPLATE size_t
DONT_VECTORIZE
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,
@@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
ZSTD_STATIC_ASSERT(
BIT_DStream_unfinished < BIT_DStream_completed &&
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
BIT_DStream_completed < BIT_DStream_overflow);
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
nbSeq--;
{ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
@@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
/* check if reached exact end */
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
RETURN_ERROR_IF(nbSeq, corruption_detected);
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
}
@@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static TARGET_ATTRIBUTE("bmi2") size_t
DONT_VECTORIZE
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,

View File

@@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
/* Only load extra positions for ZSTD_dtlm_full */
if (dtlm == ZSTD_dtlm_fast)
break;
}
}
} }
}
@@ -63,7 +62,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 prefixLowestIndex = ms->window.dictLimit;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 lowestValid = ms->window.dictLimit;
const U32 maxDistance = 1U << cParams->windowLog;
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const BYTE* const prefixLowest = base + prefixLowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -95,8 +97,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
dictCParams->chainLog : hBitsS;
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
/* if a dictionary is attached, it must be within window range */
if (dictMode == ZSTD_dictMatchState) {
assert(lowestValid + maxDistance >= endIndex);
}
/* init */
ip += (dictAndPrefixLength == 0);
if (dictMode == ZSTD_noDict) {
@@ -138,7 +147,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
goto _match_stored;
}
@@ -147,7 +156,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
goto _match_stored;
}
@@ -170,8 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
goto _match_found;
}
}
} }
if (matchIndexS > prefixLowestIndex) {
/* check prefix short match */
@@ -186,16 +194,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
goto _search_next_long;
}
}
} }
ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
_search_next_long:
{
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
U32 const matchIndexL3 = hashLong[hl3];
const BYTE* matchL3 = base + matchIndexL3;
@@ -221,9 +227,7 @@ _search_next_long:
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
goto _match_found;
}
}
}
} } }
/* if no long +1 match, explore the short match we found */
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@@ -242,7 +246,7 @@ _match_found:
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
_match_stored:
/* match found */
@@ -250,11 +254,14 @@ _match_stored:
anchor = ip;
if (ip <= ilimit) {
/* Fill Table */
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
/* Complementary insertion */
/* done after iLimit test, as candidates could be > iend-8 */
{ U32 const indexToInsert = current+2;
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
}
/* check immediate repcode */
if (dictMode == ZSTD_dictMatchState) {
@@ -278,8 +285,7 @@ _match_stored:
continue;
}
break;
}
}
} }
if (dictMode == ZSTD_noDict) {
while ( (ip <= ilimit)
@@ -294,14 +300,15 @@ _match_stored:
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } } }
} } }
} /* while (ip < ilimit) */
/* save reps for next block */
rep[0] = offset_1 ? offset_1 : offsetSaved;
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
@@ -360,10 +367,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
const U32 prefixStartIndex = ms->window.dictLimit;
const BYTE* const base = ms->window.base;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowestValid = ms->window.lowLimit;
const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
const U32 dictStartIndex = lowLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const U32 dictStartIndex = ms->window.lowLimit;
const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictStart = dictBase + dictStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
@@ -371,6 +383,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@@ -396,7 +412,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -407,7 +423,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -432,23 +448,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else {
ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
} }
/* found a match : store it */
/* move to next sequence start */
ip += mLength;
anchor = ip;
if (ip <= ilimit) {
/* Fill Table */
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
/* Complementary insertion */
/* done after iLimit test, as candidates could be > iend-8 */
{ U32 const indexToInsert = current+2;
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
}
/* check immediate repcode */
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
@@ -475,7 +495,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}

View File

@@ -13,7 +13,8 @@
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
const void* const end,
ZSTD_dictTableLoadMethod_e dtlm)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32* const hashTable = ms->hashTable;
@@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
} } } }
}
FORCE_INLINE_TEMPLATE
size_t ZSTD_compressBlock_fast_generic(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic(
const BYTE* ip0 = istart;
const BYTE* ip1;
const BYTE* anchor = istart;
const U32 prefixStartIndex = ms->window.dictLimit;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validStartIndex = ms->window.dictLimit;
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -165,7 +170,7 @@ _match: /* Requires: ip0, match0, offcode */
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
@@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
const U32 dictHLog = dictCParams->hashLog;
/* otherwise, we would get index underflow when translating a dict index
* into a local index */
/* if a dictionary is still attached, it necessarily means that
* it is within window size. So we just check it. */
const U32 maxDistance = 1U << cParams->windowLog;
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
assert(endIndex - prefixStartIndex <= maxDistance);
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
/* ensure there will be no no underflow
* when translating a dict index into a local index */
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
/* init */
@@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else if ( (matchIndex <= prefixStartIndex) ) {
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
} /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
} else if (MEM_read32(match) != MEM_read32(ip)) {
/* it's not a match, and we're not going to check the dictionary */
@@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
@@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
size_t ZSTD_compressBlock_fast_dictMatchState(
@@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const U32 dictStartIndex = ms->window.lowLimit;
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 validLow = ms->window.lowLimit;
const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
const U32 dictStartIndex = lowLimit;
const BYTE* const dictStart = dictBase + dictStartIndex;
const U32 prefixStartIndex = ms->window.dictLimit;
const U32 dictLimit = ms->window.dictLimit;
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
const BYTE* const prefixStart = base + prefixStartIndex;
const BYTE* const dictEnd = dictBase + prefixStartIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
U32 offset_1=rep[0], offset_2=rep[1];
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
ip++;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
} else {
if ( (matchIndex < dictStartIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
@@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
/* found a match : store it */
@@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}

View File

@@ -34,7 +34,6 @@
#endif
#include "xxhash.h" /* XXH_reset, update, digest */
#if defined (__cplusplus)
extern "C" {
#endif
@@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
* Shared functions to include for inlining
*********************************************/
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
#define WILDCOPY_OVERLENGTH 8
#define VECLEN 16
typedef enum {
ZSTD_no_overlap,
ZSTD_overlap_src_before_dst,
/* ZSTD_overlap_dst_before_src, */
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
#define WILDCOPY_OVERLENGTH 8
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
do
COPY8(op, ip)
while (op < oend);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
}
while (op < oend);
}
}
/*! ZSTD_wildcopy_16min() :
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
assert(length >= 8);
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
do
COPY8(op, ip)
while (op < oend);
}
else {
if ((length & 8) == 0)
COPY8(op, ip);
do {
COPY16(op, ip);
}
while (op < oend);
}
}
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */

View File

@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
U32* largerPtr = smallerPtr + 1;
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
U32 const windowValid = ms->window.lowLimit;
U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
current, dictLimit, windowLow);
@@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
U32 const windowLow = ms->window.lowLimit;
U32 const maxDistance = 1U << cParams->windowLog;
U32 const windowValid = ms->window.lowLimit;
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
U32* const bt = ms->chainTable;
U32 const btLog = cParams->chainLog - 1;
@@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const U32 lowLimit = ms->window.lowLimit;
const U32 current = (U32)(ip-base);
const U32 maxDistance = 1U << cParams->windowLog;
const U32 lowValid = ms->window.lowLimit;
const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
const U32 minChain = current > chainSize ? current - chainSize : 0;
U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1;
@@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(
/* init */
ip += (dictAndPrefixLength == 0);
ms->nextToUpdate3 = ms->nextToUpdate;
if (dictMode == ZSTD_noDict) {
U32 const maxRep = (U32)(ip - prefixLowest);
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
@@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
U32 offset_1 = rep[0], offset_2 = rep[1];
/* init */
ms->nextToUpdate3 = ms->nextToUpdate;
ip += (ip == prefixStart);
/* Match Loop */

View File

@@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, src);
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
}
/* 2. We enforce the maximum offset allowed.

View File

@@ -238,6 +238,10 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
break;
}
if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
}
return frameSizeInfo;
}

View File

@@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
* to provide a cost which is directly comparable to a match ending at same position */
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
{
if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
/* dynamic statistics */
{ U32 const llCode = ZSTD_LLcode(litLength);
int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
+ WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
+ (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
- (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
#if 1
return contribution;
#else
@@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
const optState_t* const optPtr,
int optLevel)
{
int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
+ ZSTD_litLengthContribution(litLength, optPtr, optLevel);
return contribution;
}
@@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
/* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* const ip)
{
U32* const hashTable3 = ms->hashTable3;
U32 const hashLog3 = ms->hashLog3;
const BYTE* const base = ms->window.base;
U32 idx = ms->nextToUpdate3;
U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
U32 idx = *nextToUpdate3;
U32 const target = (U32)(ip - base);
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
assert(hashLog3 > 0);
@@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
idx++;
}
*nextToUpdate3 = target;
return hashTable3[hash3];
}
@@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1(
} }
*smallerPtr = *largerPtr = 0;
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
assert(matchEndIdx > current + 8);
return matchEndIdx - (current + 8);
{ U32 positions = 0;
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
assert(matchEndIdx > current + 8);
return MAX(positions, matchEndIdx - (current + 8));
}
}
FORCE_INLINE_TEMPLATE
@@ -520,8 +525,13 @@ void ZSTD_updateTree_internal(
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
idx, target, dictMode);
while(idx < target)
idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
while(idx < target) {
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
assert(idx < (U32)(idx + forward));
idx += forward;
}
assert((size_t)(ip - base) <= (size_t)(U32)(-1));
assert((size_t)(iend - base) <= (size_t)(U32)(-1));
ms->nextToUpdate = target;
}
@@ -531,16 +541,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches (
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
U32 rep[ZSTD_REP_NUM],
const U32 rep[ZSTD_REP_NUM],
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
ZSTD_match_t* matches,
const U32 lengthToBeat,
U32 const mls /* template */)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
U32 const maxDistance = 1U << cParams->windowLog;
const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
U32 const hashLog = cParams->hashLog;
@@ -556,8 +568,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
U32 const dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
U32 const btLow = btMask >= current ? 0 : current - btMask;
U32 const windowLow = ms->window.lowLimit;
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
U32 const windowValid = ms->window.lowLimit;
U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
U32 const matchLow = windowLow ? windowLow : 1;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
@@ -627,7 +640,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
/* HC3 match finder */
if ((mls == 3) /*static*/ && (bestLength < mls)) {
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
if ((matchIndex3 >= matchLow)
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
size_t mlen;
@@ -653,9 +666,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
(ip+mlen == iLimit) ) { /* best possible length */
ms->nextToUpdate = current+1; /* skip insertion */
return 1;
}
}
}
} } }
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
}
@@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
ZSTD_matchState_t* ms,
U32* nextToUpdate3,
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
U32 rep[ZSTD_REP_NUM], U32 const ll0,
ZSTD_match_t* matches, U32 const lengthToBeat)
const U32 rep[ZSTD_REP_NUM],
U32 const ll0,
U32 const lengthToBeat)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
U32 const matchLengthSearch = cParams->minMatch;
@@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
switch(matchLengthSearch)
{
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
default :
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
case 7 :
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
}
}
@@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
U32 nextToUpdate3 = ms->nextToUpdate;
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
@@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
assert(optLevel <= 2);
ms->nextToUpdate3 = ms->nextToUpdate;
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
ip += (ip==prefixStart);
@@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
/* find first match */
{ U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen;
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */
@@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
U32 const previousPrice = opt[cur].price;
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
U32 matchNb;
if (!nbMatches) {
DEBUGLOG(7, "rPos:%u : no match found", cur);
@@ -1094,7 +1108,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
} /* while (ip < ilimit) */
/* Return the last literals size */
return iend - anchor;
return (size_t)(iend - anchor);
}
@@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
ms->window.dictLimit += (U32)srcSize;
ms->window.lowLimit = ms->window.dictLimit;
ms->nextToUpdate = ms->window.dictLimit;
ms->nextToUpdate3 = ms->window.dictLimit;
/* re-inforce weight of collected statistics */
ZSTD_upscaleStats(&ms->opt);

View File

@@ -1073,99 +1073,102 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
const void* cSrc, size_t cSrcSize,
const U16* DTable)
{
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const omax = op + maxDstSize;
BYTE* const olimit = omax-15;
const void* ptr = DTable;
const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
const U32 dtLog = DTable[0];
size_t errorCode;
U32 reloadStatus;
/* Init */
const U16* jumpTable = (const U16*)cSrc;
const size_t length1 = FSE_readLE16(jumpTable);
const size_t length2 = FSE_readLE16(jumpTable+1);
const size_t length3 = FSE_readLE16(jumpTable+2);
const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
const char* const start1 = (const char*)(cSrc) + 6;
const char* const start2 = start1 + length1;
const char* const start3 = start2 + length2;
const char* const start4 = start3 + length3;
FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
errorCode = FSE_initDStream(&bitD1, start1, length1);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD2, start2, length2);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD3, start3, length3);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD4, start4, length4);
if (FSE_isError(errorCode)) return errorCode;
reloadStatus=FSE_reloadDStream(&bitD2);
/* 16 symbols per loop */
for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
{
#define HUF_DECODE_SYMBOL_0(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
BYTE* const ostart = (BYTE*) dst;
BYTE* op = ostart;
BYTE* const omax = op + maxDstSize;
BYTE* const olimit = omax-15;
#define HUF_DECODE_SYMBOL_1(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
const void* ptr = DTable;
const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
const U32 dtLog = DTable[0];
size_t errorCode;
U32 reloadStatus;
#define HUF_DECODE_SYMBOL_2(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
if (FSE_32bits()) FSE_reloadDStream(&Dstream)
/* Init */
HUF_DECODE_SYMBOL_1( 0, bitD1);
HUF_DECODE_SYMBOL_1( 1, bitD2);
HUF_DECODE_SYMBOL_1( 2, bitD3);
HUF_DECODE_SYMBOL_1( 3, bitD4);
HUF_DECODE_SYMBOL_2( 4, bitD1);
HUF_DECODE_SYMBOL_2( 5, bitD2);
HUF_DECODE_SYMBOL_2( 6, bitD3);
HUF_DECODE_SYMBOL_2( 7, bitD4);
HUF_DECODE_SYMBOL_1( 8, bitD1);
HUF_DECODE_SYMBOL_1( 9, bitD2);
HUF_DECODE_SYMBOL_1(10, bitD3);
HUF_DECODE_SYMBOL_1(11, bitD4);
HUF_DECODE_SYMBOL_0(12, bitD1);
HUF_DECODE_SYMBOL_0(13, bitD2);
HUF_DECODE_SYMBOL_0(14, bitD3);
HUF_DECODE_SYMBOL_0(15, bitD4);
}
const U16* jumpTable = (const U16*)cSrc;
const size_t length1 = FSE_readLE16(jumpTable);
const size_t length2 = FSE_readLE16(jumpTable+1);
const size_t length3 = FSE_readLE16(jumpTable+2);
const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
const char* const start1 = (const char*)(cSrc) + 6;
const char* const start2 = start1 + length1;
const char* const start3 = start2 + length2;
const char* const start4 = start3 + length3;
FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
return (size_t)-FSE_ERROR_corruptionDetected;
if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
/* tail */
{
// bitTail = bitD1; // *much* slower : -20% !??!
FSE_DStream_t bitTail;
bitTail.ptr = bitD1.ptr;
bitTail.bitsConsumed = bitD1.bitsConsumed;
bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
bitTail.start = start1;
for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
errorCode = FSE_initDStream(&bitD1, start1, length1);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD2, start2, length2);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD3, start3, length3);
if (FSE_isError(errorCode)) return errorCode;
errorCode = FSE_initDStream(&bitD4, start4, length4);
if (FSE_isError(errorCode)) return errorCode;
reloadStatus=FSE_reloadDStream(&bitD2);
/* 16 symbols per loop */
for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
{
HUF_DECODE_SYMBOL_0(0, bitTail);
#define HUF_DECODE_SYMBOL_0(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
#define HUF_DECODE_SYMBOL_1(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
#define HUF_DECODE_SYMBOL_2(n, Dstream) \
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
if (FSE_32bits()) FSE_reloadDStream(&Dstream)
HUF_DECODE_SYMBOL_1( 0, bitD1);
HUF_DECODE_SYMBOL_1( 1, bitD2);
HUF_DECODE_SYMBOL_1( 2, bitD3);
HUF_DECODE_SYMBOL_1( 3, bitD4);
HUF_DECODE_SYMBOL_2( 4, bitD1);
HUF_DECODE_SYMBOL_2( 5, bitD2);
HUF_DECODE_SYMBOL_2( 6, bitD3);
HUF_DECODE_SYMBOL_2( 7, bitD4);
HUF_DECODE_SYMBOL_1( 8, bitD1);
HUF_DECODE_SYMBOL_1( 9, bitD2);
HUF_DECODE_SYMBOL_1(10, bitD3);
HUF_DECODE_SYMBOL_1(11, bitD4);
HUF_DECODE_SYMBOL_0(12, bitD1);
HUF_DECODE_SYMBOL_0(13, bitD2);
HUF_DECODE_SYMBOL_0(14, bitD3);
HUF_DECODE_SYMBOL_0(15, bitD4);
}
if (FSE_endOfDStream(&bitTail))
return op-ostart;
if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
return (size_t)-FSE_ERROR_corruptionDetected;
/* tail */
{
// bitTail = bitD1; // *much* slower : -20% !??!
FSE_DStream_t bitTail;
bitTail.ptr = bitD1.ptr;
bitTail.bitsConsumed = bitD1.bitsConsumed;
bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
bitTail.start = start1;
for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
{
HUF_DECODE_SYMBOL_0(0, bitTail);
}
if (FSE_endOfDStream(&bitTail))
return op-ostart;
}
if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
return (size_t)-FSE_ERROR_corruptionDetected;
}
if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
return (size_t)-FSE_ERROR_corruptionDetected;
}
@@ -1355,8 +1358,6 @@ static unsigned ZSTD_isLittleEndian(void)
static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
@@ -1381,16 +1382,9 @@ static U16 ZSTD_readLE16(const void* memPtr)
}
}
static U32 ZSTD_readLE32(const void* memPtr)
static U32 ZSTD_readLE24(const void* memPtr)
{
if (ZSTD_isLittleEndian())
return ZSTD_read32(memPtr);
else
{
const BYTE* p = (const BYTE*)memPtr;
return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
}
return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
static U32 ZSTD_readBE32(const void* memPtr)
@@ -1704,13 +1698,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
seqState->prevOffset = seq->offset;
if (litLength == MaxLL)
{
U32 add = dumps<de ? *dumps++ : 0;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) litLength += add;
else
{
if (dumps<=(de-3))
{
litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
litLength = ZSTD_readLE24(dumps);
dumps += 3;
}
}
@@ -1732,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML)
{
U32 add = dumps<de ? *dumps++ : 0;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else
{
if (dumps<=(de-3))
{
matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
matchLength = ZSTD_readLE24(dumps);
dumps += 3;
}
}

View File

@@ -217,6 +217,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
}
}
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
{
if (MEM_isLittleEndian())
@@ -3043,11 +3048,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
seqState->prevOffset = seq->offset;
if (litLength == MaxLL)
{
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) litLength += add;
else
else if (dumps + 3 <= de)
{
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
litLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
@@ -3073,11 +3078,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML)
{
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else
else if (dumps + 3 <= de)
{
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
matchLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */

View File

@@ -219,6 +219,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
}
}
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
{
if (MEM_isLittleEndian())
@@ -2684,11 +2689,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
seqState->prevOffset = seq->offset;
if (litLength == MaxLL)
{
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) litLength += add;
else
else if (dumps + 3 <= de)
{
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
litLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
@@ -2714,11 +2719,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML)
{
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else
else if (dumps + 3 <= de)
{
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
matchLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */

View File

@@ -189,6 +189,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
}
}
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
{
if (MEM_isLittleEndian())
@@ -2808,13 +2813,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
prevOffset = litLength ? seq->offset : seqState->prevOffset;
if (litLength == MaxLL) {
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) litLength += add;
else {
litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
else if (dumps + 3 <= de) {
litLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
@@ -2837,13 +2841,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
/* MatchLength */
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML) {
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else {
matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
else if (dumps + 3 <= de){
matchLength = MEM_readLE24(dumps);
dumps += 3;
}
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
matchLength += MINMATCH;

View File

@@ -1998,91 +1998,92 @@ size_t HUFv05_decompress4X2_usingDTable(
const void* cSrc, size_t cSrcSize,
const U16* DTable)
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable;
const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
const U32 dtLog = DTable[0];
size_t errorCode;
/* Init */
BITv05_DStream_t bitD1;
BITv05_DStream_t bitD2;
BITv05_DStream_t bitD3;
BITv05_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
/* Check */
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
{
const BYTE* const istart = (const BYTE*) cSrc;
BYTE* const ostart = (BYTE*) dst;
BYTE* const oend = ostart + dstSize;
const void* const dtPtr = DTable;
const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
const U32 dtLog = DTable[0];
size_t errorCode;
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BITv05_initDStream(&bitD1, istart1, length1);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD2, istart2, length2);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD3, istart3, length3);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD4, istart4, length4);
if (HUFv05_isError(errorCode)) return errorCode;
/* Init */
BITv05_DStream_t bitD1;
BITv05_DStream_t bitD2;
BITv05_DStream_t bitD3;
BITv05_DStream_t bitD4;
const size_t length1 = MEM_readLE16(istart);
const size_t length2 = MEM_readLE16(istart+2);
const size_t length3 = MEM_readLE16(istart+4);
size_t length4;
const BYTE* const istart1 = istart + 6; /* jumpTable */
const BYTE* const istart2 = istart1 + length1;
const BYTE* const istart3 = istart2 + length2;
const BYTE* const istart4 = istart3 + length3;
const size_t segmentSize = (dstSize+3) / 4;
BYTE* const opStart2 = ostart + segmentSize;
BYTE* const opStart3 = opStart2 + segmentSize;
BYTE* const opStart4 = opStart3 + segmentSize;
BYTE* op1 = ostart;
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
U32 endSignal;
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
length4 = cSrcSize - (length1 + length2 + length3 + 6);
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
errorCode = BITv05_initDStream(&bitD1, istart1, length1);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD2, istart2, length2);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD3, istart3, length3);
if (HUFv05_isError(errorCode)) return errorCode;
errorCode = BITv05_initDStream(&bitD4, istart4, length4);
if (HUFv05_isError(errorCode)) return errorCode;
/* 16-32 symbols per loop (4-8 symbols per stream) */
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
/* check corruption */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
/* note : op4 supposed already verified within main loop */
/* finish bitStreams one by one */
HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
if (!endSignal) return ERROR(corruption_detected);
/* decoded size */
return dstSize;
}
@@ -3150,14 +3151,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
litLength = FSEv05_peakSymbol(&(seqState->stateLL));
prevOffset = litLength ? seq->offset : seqState->prevOffset;
if (litLength == MaxLL) {
U32 add = *dumps++;
const U32 add = *dumps++;
if (add < 255) litLength += add;
else {
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */
if (litLength&1) litLength>>=1, dumps += 3;
else litLength = (U16)(litLength)>>1, dumps += 2;
else if (dumps + 2 <= de) {
litLength = MEM_readLE16(dumps);
dumps += 2;
if ((litLength & 1) && dumps < de) {
litLength += *dumps << 16;
dumps += 1;
}
litLength>>=1;
}
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
@@ -3184,14 +3188,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
/* MatchLength */
matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
if (matchLength == MaxML) {
U32 add = *dumps++;
const U32 add = dumps<de ? *dumps++ : 0;
if (add < 255) matchLength += add;
else {
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
if (matchLength&1) matchLength>>=1, dumps += 3;
else matchLength = (U16)(matchLength)>>1, dumps += 2;
else if (dumps + 2 <= de) {
matchLength = MEM_readLE16(dumps);
dumps += 2;
if ((matchLength & 1) && dumps < de) {
matchLength += *dumps << 16;
dumps += 1;
}
matchLength >>= 1;
}
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
}
matchLength += MINMATCH;

View File

@@ -3242,14 +3242,12 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
}
/* FSE table descriptors */
if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
{ U32 const LLtype = *ip >> 6;
U32 const Offtype = (*ip >> 4) & 3;
U32 const MLtype = (*ip >> 2) & 3;
ip++;
/* check */
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
/* Build DTables */
{ size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected);
@@ -3672,7 +3670,7 @@ void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS
blockProperties_t blockProperties = { bt_compressed, 0 };
/* Frame Header */
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize);
if (ZSTDv06_isError(frameHeaderSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
return;

View File

@@ -3470,14 +3470,12 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
}
/* FSE table descriptors */
if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
{ U32 const LLtype = *ip >> 6;
U32 const OFtype = (*ip >> 4) & 3;
U32 const MLtype = (*ip >> 2) & 3;
ip++;
/* check */
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
/* Build DTables */
{ size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
@@ -3918,7 +3916,7 @@ void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS
}
/* Frame Header */
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize);
if (ZSTDv07_isError(frameHeaderSize)) {
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
return;

View File

@@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
assert(flushed <= produced);
assert(jobPtr->consumed <= jobPtr->src.size);
toFlush = produced - flushed;
if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
/* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
/* if toFlush==0, nothing is available to flush.
* However, jobID is expected to still be active:
* if jobID was already completed and fully flushed,
* ZSTDMT_flushProduced() should have already moved onto next job.
* Therefore, some input has not yet been consumed. */
if (toFlush==0) {
assert(jobPtr->consumed < jobPtr->src.size);
}
}
@@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
{
if (params.ldmParams.enableLdm)
unsigned jobLog;
if (params.ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead. */
return MAX(21, params.cParams.chainLog + 4);
return MAX(20, params.cParams.windowLog + 2);
jobLog = MAX(21, params.cParams.chainLog + 4);
} else {
jobLog = MAX(20, params.cParams.windowLog + 2);
}
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
}
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
- overlapRLog;
}
assert(0 <= ovLog && ovLog <= 30);
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
@@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal(
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
if (mtctx->singleBlockingThread) {
@@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal(
if (mtctx->targetSectionSize == 0) {
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
}
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
if (params.rsyncable) {
/* Aim for the targetsectionSize as the average job size. */
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);

View File

@@ -50,6 +50,7 @@
#ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1 MB)
#endif
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))