From 1364288df0defed181e9d16e697f1864b4adaadc Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Mon, 29 Jul 2019 09:03:32 +0200 Subject: [PATCH] Update zstd to version 1.4.2 --- C/zstd/compiler.h | 7 + C/zstd/zstd.h | 76 ++- C/zstd/zstd_compress.c | 869 +++++++------------------------ C/zstd/zstd_compress_internal.h | 128 +++-- C/zstd/zstd_compress_literals.c | 149 ++++++ C/zstd/zstd_compress_literals.h | 29 ++ C/zstd/zstd_compress_sequences.c | 415 +++++++++++++++ C/zstd/zstd_compress_sequences.h | 47 ++ C/zstd/zstd_decompress.c | 23 +- C/zstd/zstd_decompress_block.c | 29 +- C/zstd/zstd_double_fast.c | 92 ++-- C/zstd/zstd_fast.c | 49 +- C/zstd/zstd_internal.h | 64 ++- C/zstd/zstd_lazy.c | 15 +- C/zstd/zstd_ldm.c | 2 +- C/zstd/zstd_legacy.h | 4 + C/zstd/zstd_opt.c | 77 +-- C/zstd/zstd_v01.c | 190 ++++--- C/zstd/zstd_v02.c | 17 +- C/zstd/zstd_v03.c | 17 +- C/zstd/zstd_v04.c | 19 +- C/zstd/zstd_v05.c | 189 +++---- C/zstd/zstd_v06.c | 6 +- C/zstd/zstd_v07.c | 6 +- C/zstd/zstdmt_compress.c | 25 +- C/zstd/zstdmt_compress.h | 1 + 26 files changed, 1466 insertions(+), 1079 deletions(-) create mode 100644 C/zstd/zstd_compress_literals.c create mode 100644 C/zstd/zstd_compress_literals.h create mode 100644 C/zstd/zstd_compress_sequences.c create mode 100644 C/zstd/zstd_compress_sequences.h diff --git a/C/zstd/compiler.h b/C/zstd/compiler.h index 0836e3ed..87bf51ae 100644 --- a/C/zstd/compiler.h +++ b/C/zstd/compiler.h @@ -127,6 +127,13 @@ } \ } +/* vectorization */ +#if !defined(__clang__) && defined(__GNUC__) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +#else +# define DONT_VECTORIZE +#endif + /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ # include /* For Visual 2005 */ diff --git a/C/zstd/zstd.h b/C/zstd/zstd.h index 53470c18..4a1f8161 100644 --- a/C/zstd/zstd.h +++ b/C/zstd/zstd.h @@ -71,7 +71,7 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 0 +#define ZSTD_VERSION_RELEASE 2 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ @@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library v #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ -/*************************************** -* Default constant -***************************************/ +/* ************************************* + * Default constant + ***************************************/ #ifndef ZSTD_CLEVEL_DEFAULT # define ZSTD_CLEVEL_DEFAULT 3 #endif -/*************************************** -* Constants -***************************************/ +/* ************************************* + * Constants + ***************************************/ /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ #define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ @@ -183,9 +183,14 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres ***************************************/ /*= Compression context * When compressing many times, - * it is recommended to allocate a context just once, and re-use it for each successive compression operation. + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. * This will make workload friendlier for system's memory. - * Use one context per thread for parallel execution in multi-threaded environments. */ + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); @@ -380,6 +385,7 @@ typedef enum { * ZSTD_c_forceMaxWindow * ZSTD_c_forceAttachDict * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -389,6 +395,7 @@ typedef enum { ZSTD_c_experimentalParam3=1000, ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, } ZSTD_cParameter; typedef struct { @@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_inBuffer* input, ZSTD_EndDirective endOp); -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ -/******************************************************************************* - * This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and - * ZSTD_compressStream2(). It is redundant, but is still fully supported. +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. * Advanced parameters and dictionary compression can only be used through the * new API. ******************************************************************************/ -/** +/*! * Equivalent to: * * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); @@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); */ ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -/** +/*! * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). * NOTE: The return value is different. ZSTD_compressStream() returns a hint for * the next read size (if non-zero and not an error). ZSTD_compressStream2() - * returns the number of bytes left to flush (if non-zero and not an error). + * returns the minimum nb of bytes left to flush (if non-zero and not an error). */ ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); @@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #endif /* ZSTD_H_235446 */ -/**************************************************************************************** +/* ************************************************************************************** * ADVANCED AND EXPERIMENTAL FUNCTIONS **************************************************************************************** * The definitions in the following section are considered experimental. @@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_LDM_HASHRATELOG_MIN 0 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) +/* Advanced parameter bounds */ +#define ZSTD_TARGETCBLOCKSIZE_MIN 64 +#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX + /* internal */ #define ZSTD_HASHLOG3_MAX 17 @@ -1162,7 +1189,7 @@ typedef enum { * however it does mean that all frame data must be present and valid. */ ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); -/** ZSTD_decompressBound() : +/*! ZSTD_decompressBound() : * `src` should point to the start of a series of ZSTD encoded and/or skippable frames * `srcSize` must be the _exact_ size of this series * (i.e. there should be a frame boundary at `src + srcSize`) @@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre */ #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -1843,7 +1875,7 @@ typedef struct { unsigned checksumFlag; } ZSTD_frameHeader; -/** ZSTD_getFrameHeader() : +/*! ZSTD_getFrameHeader() : * decode Frame Header, or requires larger `srcSize`. * @return : 0, `zfhPtr` is correctly filled, * >0, `srcSize` is too small, value is wanted `srcSize` amount, diff --git a/C/zstd/zstd_compress.c b/C/zstd/zstd_compress.c index 2e163c8b..8ad15e1c 100644 --- a/C/zstd/zstd_compress.c +++ b/C/zstd/zstd_compress.c @@ -21,6 +21,8 @@ #define HUF_STATIC_LINKING_ONLY #include "huf.h" #include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" #include "zstd_fast.h" #include "zstd_double_fast.h" #include "zstd_lazy.h" @@ -385,6 +387,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = ZSTD_lcm_uncompressed; return bounds; + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; + default: { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; return boundError; @@ -392,18 +399,6 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) } } -/* ZSTD_cParam_withinBounds: - * @return 1 if value is within cParam bounds, - * 0 otherwise */ -static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) -{ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); - if (ZSTD_isError(bounds.error)) return 0; - if (value < bounds.lowerBound) return 0; - if (value > bounds.upperBound) return 0; - return 1; -} - /* ZSTD_cParam_clampBounds: * Clamps the value into the bounded range. */ @@ -452,6 +447,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_ldmHashRateLog: case ZSTD_c_forceAttachDict: case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: default: return 0; } @@ -497,6 +493,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_ldmHashLog: case ZSTD_c_ldmMinMatch: case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: break; default: RETURN_ERROR(parameter_unsupported); @@ -671,6 +668,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->ldmParams.hashRateLog = value; return CCtxParams->ldmParams.hashRateLog; + case ZSTD_c_targetCBlockSize : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + CCtxParams->targetCBlockSize = value; + return CCtxParams->targetCBlockSize; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -692,13 +695,13 @@ size_t ZSTD_CCtxParams_getParameter( *value = CCtxParams->compressionLevel; break; case ZSTD_c_windowLog : - *value = CCtxParams->cParams.windowLog; + *value = (int)CCtxParams->cParams.windowLog; break; case ZSTD_c_hashLog : - *value = CCtxParams->cParams.hashLog; + *value = (int)CCtxParams->cParams.hashLog; break; case ZSTD_c_chainLog : - *value = CCtxParams->cParams.chainLog; + *value = (int)CCtxParams->cParams.chainLog; break; case ZSTD_c_searchLog : *value = CCtxParams->cParams.searchLog; @@ -773,6 +776,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_ldmHashRateLog : *value = CCtxParams->ldmParams.hashRateLog; break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -930,12 +936,12 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) @return : 0, or an error code if one value is beyond authorized range */ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) { - BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog); - BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog); - BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog); - BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog); - BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch); - BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength); + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); return 0; } @@ -951,7 +957,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams) if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ } -# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int) +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) CLAMP(ZSTD_c_windowLog, cParams.windowLog); CLAMP(ZSTD_c_chainLog, cParams.chainLog); CLAMP(ZSTD_c_hashLog, cParams.hashLog); @@ -1282,15 +1288,14 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) } /*! ZSTD_invalidateMatchState() - * Invalidate all the matches in the match finder tables. - * Requires nextSrc and base to be set (can be NULL). + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). */ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) { ZSTD_window_clear(&ms->window); ms->nextToUpdate = ms->window.dictLimit; - ms->nextToUpdate3 = ms->window.dictLimit; ms->loadedDictEnd = 0; ms->opt.litLengthSum = 0; /* force reset of btopt stats */ ms->dictMatchState = NULL; @@ -1327,15 +1332,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; +typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; + static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, const ZSTD_compressionParameters* cParams, - ZSTD_compResetPolicy_e const crp, U32 const forCCtx) + ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho) { size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); size_t const hSize = ((size_t)1) << cParams->hashLog; - U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = ((size_t)1) << hashLog3; size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); @@ -1349,7 +1356,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, ZSTD_invalidateMatchState(ms); /* opt parser space */ - if (forCCtx && (cParams->strategy >= ZSTD_btopt)) { + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { DEBUGLOG(4, "reserving optimal parser space"); ms->opt.litFreq = (unsigned*)ptr; ms->opt.litLengthFreq = ms->opt.litFreq + (1< (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} + #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large * during at least this number of times, @@ -1388,7 +1408,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, note : `params` are assumed fully validated at this stage */ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_CCtx_params params, - U64 pledgedSrcSize, + U64 const pledgedSrcSize, ZSTD_compResetPolicy_e const crp, ZSTD_buffered_policy_e const zbuff) { @@ -1400,13 +1420,21 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (ZSTD_equivalentParams(zc->appliedParams, params, zc->inBuffSize, zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, - zbuff, pledgedSrcSize)) { - DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)", - zc->appliedParams.cParams.windowLog, zc->blockSize); + zbuff, pledgedSrcSize) ) { + DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode"); zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ - if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) + if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) { + DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)", + zc->appliedParams.cParams.windowLog, zc->blockSize); + if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { + /* prefer a reset, faster than a rescale */ + ZSTD_reset_matchState(&zc->blockState.matchState, + zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, + ¶ms.cParams, + crp, ZSTD_resetTarget_CCtx); + } return ZSTD_continueCCtx(zc, params, pledgedSrcSize); - } } + } } } DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); if (params.ldmParams.enableLdm) { @@ -1449,7 +1477,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); if (workSpaceTooSmall || workSpaceWasteful) { - DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB", + DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB", zc->workSpaceSize >> 10, neededSpace >> 10); @@ -1491,7 +1519,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; + ptr = ZSTD_reset_matchState(&zc->blockState.matchState, + zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, + ¶ms.cParams, + crp, ZSTD_resetTarget_CCtx); /* ldm hash table */ /* initialize bucketOffsets table later for pointer alignment */ @@ -1509,8 +1540,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, } assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); - /* sequences storage */ zc->seqStore.maxNbSeq = maxNbSeq; zc->seqStore.sequencesStart = (seqDef*)ptr; @@ -1587,15 +1616,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, * handled in _enforceMaxDist */ } -static size_t ZSTD_resetCCtx_byAttachingCDict( - ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) { - { - const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Resize working context table params for input only, since the dict @@ -1607,8 +1635,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict( assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); } - { - const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc - cdict->matchState.window.base); const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; if (cdictLen == 0) { @@ -1625,9 +1652,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict( cctx->blockState.matchState.window.base + cdictEnd; ZSTD_window_clear(&cctx->blockState.matchState.window); } + /* loadedDictEnd is expressed within the referential of the active context */ cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; - } - } + } } cctx->dictID = cdict->dictID; @@ -1681,7 +1708,6 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; dstMatchState->window = srcMatchState->window; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; } @@ -1761,7 +1787,6 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; dstMatchState->window = srcMatchState->window; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; } dstCCtx->dictID = srcCCtx->dictID; @@ -1831,16 +1856,15 @@ static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const /*! ZSTD_reduceIndex() : * rescale all indexes to avoid future overflow (indexes are U32) */ -static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) { - ZSTD_matchState_t* const ms = &zc->blockState.matchState; - { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; + { U32 const hSize = (U32)1 << params->cParams.hashLog; ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); } - if (zc->appliedParams.cParams.strategy != ZSTD_fast) { - U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; - if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) + if (params->cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); else ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); @@ -1869,155 +1893,6 @@ static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* s return ZSTD_blockHeaderSize + srcSize; } -static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - - memcpy(ostart + flSize, src, srcSize); - return srcSize + flSize; -} - -static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - - ostart[flSize] = *(const BYTE*)src; - return flSize+1; -} - - -/* ZSTD_minGain() : - * minimum compression required - * to generate a compress block or a compressed literals section. - * note : use same formula for both situations */ -static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) -{ - U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; - ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - return (srcSize >> minlog) + 2; -} - -static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* workspace, size_t wkspSize, - const int bmi2) -{ - size_t const minGain = ZSTD_minGain(srcSize, strategy); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; - - DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", - disableLiteralCompression); - - /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralCompression) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - - RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); - { HUF_repeat repeat = prevHuf->repeatMode; - int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; - if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; - cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) - : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); - if (repeat != HUF_repeat_none) { - /* reused the existing table */ - hType = set_repeat; - } - } - - if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - if (cLitSize==1) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); - } - - if (hType == set_compressed) { - /* using a newly constructed table */ - nextHuf->repeatMode = HUF_repeat_check; - } - - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - default: /* not possible : lhSize is {3,4,5} */ - assert(0); - } - return lhSize+cLitSize; -} - - void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { const seqDef* const sequences = seqStorePtr->sequencesStart; @@ -2040,418 +1915,6 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } - -/** - * -log2(x / 256) lookup table for x in [0, 256). - * If x == 0: Return 0 - * Else: Return floor(-log2(x / 256) * 256) - */ -static unsigned const kInverseProbabilityLog256[256] = { - 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, - 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, - 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, - 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, - 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, - 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, - 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, - 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, - 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, - 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, - 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, - 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, - 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, - 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, - 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, - 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, - 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, - 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, - 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, - 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, - 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, - 5, 4, 2, 1, -}; - - -/** - * Returns the cost in bits of encoding the distribution described by count - * using the entropy bound. - */ -static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) -{ - unsigned cost = 0; - unsigned s; - for (s = 0; s <= max; ++s) { - unsigned norm = (unsigned)((256 * count[s]) / total); - if (count[s] != 0 && norm == 0) - norm = 1; - assert(count[s] < total); - cost += count[s] * kInverseProbabilityLog256[norm]; - } - return cost >> 8; -} - - -/** - * Returns the cost in bits of encoding the distribution in count using the - * table described by norm. The max symbol support by norm is assumed >= max. - * norm must be valid for every symbol with non-zero probability in count. - */ -static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max) -{ - unsigned const shift = 8 - accuracyLog; - size_t cost = 0; - unsigned s; - assert(accuracyLog <= 8); - for (s = 0; s <= max; ++s) { - unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; - unsigned const norm256 = normAcc << shift; - assert(norm256 > 0); - assert(norm256 < 256); - cost += count[s] * kInverseProbabilityLog256[norm256]; - } - return cost >> 8; -} - - -static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { - void const* ptr = ctable; - U16 const* u16ptr = (U16 const*)ptr; - U32 const maxSymbolValue = MEM_read16(u16ptr + 1); - return maxSymbolValue; -} - - -/** - * Returns the cost in bits of encoding the distribution in count using ctable. - * Returns an error if ctable cannot represent all the symbols in count. - */ -static size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, - unsigned const* count, - unsigned const max) -{ - unsigned const kAccuracyLog = 8; - size_t cost = 0; - unsigned s; - FSE_CState_t cstate; - FSE_initCState(&cstate, ctable); - RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, - "Repeat FSE_CTable has maxSymbolValue %u < %u", - ZSTD_getFSEMaxSymbolValue(ctable), max); - for (s = 0; s <= max; ++s) { - unsigned const tableLog = cstate.stateLog; - unsigned const badCost = (tableLog + 1) << kAccuracyLog; - unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); - if (count[s] == 0) - continue; - RETURN_ERROR_IF(bitCost >= badCost, GENERIC, - "Repeat FSE_CTable has Prob[%u] == 0", s); - cost += count[s] * bitCost; - } - return cost >> kAccuracyLog; -} - -/** - * Returns the cost in bytes of encoding the normalized count header. - * Returns an error if any of the helper functions return an error. - */ -static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, - size_t const nbSeq, unsigned const FSELog) -{ - BYTE wksp[FSE_NCOUNTBOUND]; - S16 norm[MaxSeq + 1]; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); - return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); -} - - -typedef enum { - ZSTD_defaultDisallowed = 0, - ZSTD_defaultAllowed = 1 -} ZSTD_defaultPolicy_e; - -MEM_STATIC symbolEncodingType_e -ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, - ZSTD_defaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy) -{ - ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); - if (mostFrequent == nbSeq) { - *repeatMode = FSE_repeat_none; - if (isDefaultAllowed && nbSeq <= 2) { - /* Prefer set_basic over set_rle when there are 2 or less symbols, - * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. - * If basic encoding isn't possible, always choose RLE. - */ - DEBUGLOG(5, "Selected set_basic"); - return set_basic; - } - DEBUGLOG(5, "Selected set_rle"); - return set_rle; - } - if (strategy < ZSTD_lazy) { - if (isDefaultAllowed) { - size_t const staticFse_nbSeq_max = 1000; - size_t const mult = 10 - strategy; - size_t const baseLog = 3; - size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ - assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ - assert(mult <= 9 && mult >= 7); - if ( (*repeatMode == FSE_repeat_valid) - && (nbSeq < staticFse_nbSeq_max) ) { - DEBUGLOG(5, "Selected set_repeat"); - return set_repeat; - } - if ( (nbSeq < dynamicFse_nbSeq_min) - || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { - DEBUGLOG(5, "Selected set_basic"); - /* The format allows default tables to be repeated, but it isn't useful. - * When using simple heuristics to select encoding type, we don't want - * to confuse these tables with dictionaries. When running more careful - * analysis, we don't need to waste time checking both repeating tables - * and default tables. - */ - *repeatMode = FSE_repeat_none; - return set_basic; - } - } - } else { - size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); - size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); - size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); - size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); - - if (isDefaultAllowed) { - assert(!ZSTD_isError(basicCost)); - assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); - } - assert(!ZSTD_isError(NCountCost)); - assert(compressedCost < ERROR(maxCode)); - DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", - (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); - if (basicCost <= repeatCost && basicCost <= compressedCost) { - DEBUGLOG(5, "Selected set_basic"); - assert(isDefaultAllowed); - *repeatMode = FSE_repeat_none; - return set_basic; - } - if (repeatCost <= compressedCost) { - DEBUGLOG(5, "Selected set_repeat"); - assert(!ZSTD_isError(repeatCost)); - return set_repeat; - } - assert(compressedCost < basicCost && compressedCost < repeatCost); - } - DEBUGLOG(5, "Selected set_compressed"); - *repeatMode = FSE_repeat_check; - return set_compressed; -} - -MEM_STATIC size_t -ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, - const FSE_CTable* prevCTable, size_t prevCTableSize, - void* workspace, size_t workspaceSize) -{ - BYTE* op = (BYTE*)dst; - const BYTE* const oend = op + dstCapacity; - DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); - - switch (type) { - case set_rle: - FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); - RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); - *op = codeTable[0]; - return 1; - case set_repeat: - memcpy(nextCTable, prevCTable, prevCTableSize); - return 0; - case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ - return 0; - case set_compressed: { - S16 norm[MaxSeq + 1]; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - if (count[codeTable[nbSeq-1]] > 1) { - count[codeTable[nbSeq-1]]--; - nbSeq_1--; - } - assert(nbSeq_1 > 1); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - FORWARD_IF_ERROR(NCountSize); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); - return NCountSize; - } - } - default: assert(0); RETURN_ERROR(GENERIC); - } -} - -FORCE_INLINE_TEMPLATE size_t -ZSTD_encodeSequences_body( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - RETURN_ERROR_IF( - ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), - dstSize_tooSmall, "not enough space remaining"); - DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", - (int)(blockStream.endPtr - blockStream.startPtr), - (unsigned)dstCapacity); - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - if (longOffsets) { - U32 const ofBits = ofCodeTable[nbSeq-1]; - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); - BIT_flushBits(&blockStream); - } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, - ofBits - extraBits); - } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); - } - BIT_flushBits(&blockStream); - - { size_t n; - for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); - BIT_flushBits(&blockStream); /* (7)*/ - } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, - ofBits - extraBits); /* 31 */ - } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - } - BIT_flushBits(&blockStream); /* (7)*/ - DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); - } } - - DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); - FSE_flushCState(&blockStream, &stateMatchLength); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); - FSE_flushCState(&blockStream, &stateOffsetBits); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); - FSE_flushCState(&blockStream, &stateLitLength); - - { size_t const streamSize = BIT_closeCStream(&blockStream); - RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); - return streamSize; - } -} - -static size_t -ZSTD_encodeSequences_default( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - - -#if DYNAMIC_BMI2 - -static TARGET_ATTRIBUTE("bmi2") size_t -ZSTD_encodeSequences_bmi2( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - -#endif - -static size_t ZSTD_encodeSequences( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) -{ - DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); -#if DYNAMIC_BMI2 - if (bmi2) { - return ZSTD_encodeSequences_bmi2(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); - } -#endif - (void)bmi2; - return ZSTD_encodeSequences_default(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) { switch (cctxParams->literalCompressionMode) { @@ -2496,8 +1959,8 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, BYTE* seqHead; BYTE* lastNCount = NULL; + DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<litStart; @@ -2524,6 +1987,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + assert(op <= oend); if (nbSeq==0) { /* Copy the old tables over as if we repeated them */ memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); @@ -2532,6 +1996,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* seqHead : flags for FSE encoding type */ seqHead = op++; + assert(op <= oend); /* convert length/distances into codes */ ZSTD_seqToCodes(seqStorePtr); @@ -2555,6 +2020,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (LLtype == set_compressed) lastNCount = op; op += countSize; + assert(op <= oend); } } /* build CTable for Offsets */ { unsigned max = MaxOff; @@ -2577,6 +2043,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (Offtype == set_compressed) lastNCount = op; op += countSize; + assert(op <= oend); } } /* build CTable for MatchLengths */ { unsigned max = MaxML; @@ -2597,6 +2064,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (MLtype == set_compressed) lastNCount = op; op += countSize; + assert(op <= oend); } } *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); @@ -2610,6 +2078,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, longOffsets, bmi2); FORWARD_IF_ERROR(bitstreamSize); op += bitstreamSize; + assert(op <= oend); /* zstd versions <= 1.3.4 mistakenly report corruption when * FSE_readNCount() receives a buffer < 4 bytes. * Fixed by https://github.com/facebook/zstd/pull/1146. @@ -2721,30 +2190,24 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthID = 0; } -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; + +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; - size_t cSize; - DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate); + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); - cSize = 0; - goto out; /* don't even attempt compression below a certain srcSize */ + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ } ZSTD_resetSeqStore(&(zc->seqStore)); /* required for optimal parser to read stats from dictionary */ ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* tell the optimal parser how we expect to compress literals */ ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; - /* a gap between an attached dict and the current window is not safe, * they must remain adjacent, * and when that stops being the case, the dict must be unset */ @@ -2798,6 +2261,21 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); } } + return ZSTDbss_compress; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t cSize; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss); + if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + } /* encode sequences and literals */ cSize = ZSTD_compressSequences(&zc->seqStore, @@ -2826,6 +2304,25 @@ out: } +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend) +{ + if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { + U32 const maxDist = (U32)1 << params->cParams.windowLog; + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_reduceIndex(ms, params, correction); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} + + /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. @@ -2844,7 +2341,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; - assert(cctx->appliedParams.cParams.windowLog <= 31); + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); if (cctx->appliedParams.fParams.checksumFlag && srcSize) @@ -2859,19 +2356,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, "not enough space to store compressed block"); if (remaining < blockSize) blockSize = remaining; - if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { - U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); - U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); - ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); - ZSTD_reduceIndex(cctx, correction); - if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; - else ms->nextToUpdate -= correction; - ms->loadedDictEnd = 0; - ms->dictMatchState = NULL; - } - ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; { size_t cSize = ZSTD_compressBlock_internal(cctx, @@ -2899,7 +2387,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, } } if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; - return op-ostart; + return (size_t)(op-ostart); } @@ -2991,6 +2479,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, cctx->pledgedSrcSizePlusOne-1, cctx->dictID); FORWARD_IF_ERROR(fhSize); + assert(fhSize <= dstCapacity); dstCapacity -= fhSize; dst = (char*)dst + fhSize; cctx->stage = ZSTDcs_ongoing; @@ -3007,18 +2496,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (!frame) { /* overflow check and correction for block mode */ - if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) { - U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); - U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src); - ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); - ZSTD_reduceIndex(cctx, correction); - if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; - else ms->nextToUpdate -= correction; - ms->loadedDictEnd = 0; - ms->dictMatchState = NULL; - } + ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize); } DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); @@ -3074,7 +2552,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, const void* src, size_t srcSize, ZSTD_dictTableLoadMethod_e dtlm) { - const BYTE* const ip = (const BYTE*) src; + const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; ZSTD_window_update(&ms->window, src, srcSize); @@ -3085,32 +2563,42 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, if (srcSize <= HASH_READ_SIZE) return 0; - switch(params->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, iend, dtlm); - break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, iend, dtlm); - break; + while (iend - ip > HASH_READ_SIZE) { + size_t const remaining = iend - ip; + size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); + const BYTE* const ichunk = ip + chunk; - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); - break; + ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk); - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); - break; + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, ichunk, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); + break; - default: - assert(0); /* not possible : not a valid strategy id */ + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (chunk >= HASH_READ_SIZE) + ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + if (chunk >= HASH_READ_SIZE) + ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ip = ichunk; } ms->nextToUpdate = (U32)(iend - ms->window.base); @@ -3297,12 +2785,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_continue, zbuff) ); - { - size_t const dictID = ZSTD_compress_insertDictionary( + { size_t const dictID = ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); FORWARD_IF_ERROR(dictID); - assert(dictID <= (size_t)(U32)-1); + assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; } return 0; @@ -3555,10 +3042,10 @@ static size_t ZSTD_initCDict_internal( /* Reset the state to no dictionary */ ZSTD_reset_compressedBlockState(&cdict->cBlockState); - { void* const end = ZSTD_reset_matchState( - &cdict->matchState, - (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, - &cParams, ZSTDcrp_continue, /* forCCtx */ 0); + { void* const end = ZSTD_reset_matchState(&cdict->matchState, + (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, + &cParams, + ZSTDcrp_continue, ZSTD_resetTarget_CDict); assert(end == (char*)cdict->workspace + cdict->workspaceSize); (void)end; } @@ -4068,7 +3555,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, case zcss_flush: DEBUGLOG(5, "flush stage"); { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, oend-op, + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), zcs->outBuff + zcs->outBuffFlushedSize, toFlush); DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); @@ -4262,7 +3749,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ /* single thread mode : attempt to calculate remaining to flush more precisely */ { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; - size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); return toFlush; diff --git a/C/zstd/zstd_compress_internal.h b/C/zstd/zstd_compress_internal.h index cc3cbb9d..10b59d39 100644 --- a/C/zstd/zstd_compress_internal.h +++ b/C/zstd/zstd_compress_internal.h @@ -33,13 +33,13 @@ extern "C" { ***************************************/ #define kSearchStrength 8 #define HASH_READ_SIZE 8 -#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". It could be confused for a real successor at index "1", if sorted as larger than its predecessor. It's not a big deal though : candidate will just be sorted again. Additionally, candidate position 1 will be lost. But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy - Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ /*-************************************* @@ -128,21 +128,20 @@ typedef struct { BYTE const* base; /* All regular indexes relative to this position */ BYTE const* dictBase; /* extDict indexes relative to this position */ U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more data */ + U32 lowLimit; /* below that point, no more valid data */ } ZSTD_window_t; typedef struct ZSTD_matchState_t ZSTD_matchState_t; struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */ U32 nextToUpdate; /* index from which to continue table update */ - U32 nextToUpdate3; /* index from which to continue table update */ U32 hashLog3; /* dispatch table : larger == faster, more memory */ U32* hashTable; U32* hashTable3; U32* chainTable; optState_t opt; /* optimal parser state */ - const ZSTD_matchState_t * dictMatchState; + const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; }; @@ -195,6 +194,9 @@ struct ZSTD_CCtx_params_s { int compressionLevel; int forceWindow; /* force back-references to respect limit of * 1< 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; } +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; +} + /*! ZSTD_storeSeq() : * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). @@ -324,7 +350,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v /* copy Literals */ assert(seqStorePtr->maxNbLit <= 128 KB); assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); - ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap); seqStorePtr->lit += litLength; /* literal Length */ @@ -564,6 +590,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 /*-************************************* * Round buffer management ***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +#endif /* Max current allowed */ #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) /* Maximum chunk size before overflow correction needs to be called again */ @@ -675,31 +704,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * Updates lowLimit so that: * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd * - * This allows a simple check that index >= lowLimit to see if index is valid. - * This must be called before a block compression call, with srcEnd as the block - * source end. + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. * - * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. - * This is because dictionaries are allowed to be referenced as long as the last - * byte of the dictionary is in the window, but once they are out of range, - * they cannot be referenced. If loadedDictEndPtr is NULL, we use - * loadedDictEnd == 0. + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. * - * In normal dict mode, the dict is between lowLimit and dictLimit. In - * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary - * is below them. forceWindow and dictMatchState are therefore incompatible. + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. */ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, - void const* srcEnd, - U32 maxDist, - U32* loadedDictEndPtr, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, const ZSTD_matchState_t** dictMatchStatePtr) { - U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base); - U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; - DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", - (unsigned)blockEndIdx, (unsigned)maxDist); + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ if (blockEndIdx > maxDist + loadedDictEnd) { U32 const newLowLimit = blockEndIdx - maxDist; if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; @@ -708,10 +755,31 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window, (unsigned)window->dictLimit, (unsigned)window->lowLimit); window->dictLimit = window->lowLimit; } - if (loadedDictEndPtr) - *loadedDictEndPtr = 0; - if (dictMatchStatePtr) - *dictMatchStatePtr = NULL; + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} + +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. */ +MEM_STATIC void +ZSTD_checkDictValidity(ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) { + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; } } diff --git a/C/zstd/zstd_compress_literals.c b/C/zstd/zstd_compress_literals.c new file mode 100644 index 00000000..eb3e5a44 --- /dev/null +++ b/C/zstd/zstd_compress_literals.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_literals.h" + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; +} + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", + disableLiteralCompression); + + /* Prepare nextEntropy assuming reusing the existing table */ + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) + : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + return lhSize+cLitSize; +} diff --git a/C/zstd/zstd_compress_literals.h b/C/zstd/zstd_compress_literals.h new file mode 100644 index 00000000..7adbecc0 --- /dev/null +++ b/C/zstd/zstd_compress_literals.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H + +#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ + + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + const int bmi2); + +#endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/C/zstd/zstd_compress_sequences.c b/C/zstd/zstd_compress_sequences.c new file mode 100644 index 00000000..3c3deae0 --- /dev/null +++ b/C/zstd/zstd_compress_sequences.c @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_sequences.h" + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +static size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, + "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + RETURN_ERROR_IF(bitCost >= badCost, GENERIC, + "Repeat FSE_CTable has Prob[%u] == 0", s); + cost += count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); + *op = codeTable[0]; + return 1; + case set_repeat: + memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} diff --git a/C/zstd/zstd_compress_sequences.h b/C/zstd/zstd_compress_sequences.h new file mode 100644 index 00000000..f5234d94 --- /dev/null +++ b/C/zstd/zstd_compress_sequences.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H + +#include "fse.h" /* FSE_repeat, FSE_CTable */ +#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize); + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); + +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/C/zstd/zstd_decompress.c b/C/zstd/zstd_decompress.c index 675596f5..cf99a137 100644 --- a/C/zstd/zstd_decompress.c +++ b/C/zstd/zstd_decompress.c @@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, frameParameter_unsupported); - - return skippableHeaderSize + sizeU32; + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong); + return skippableSize; + } } /** ZSTD_findDecompressedSize() : @@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { size_t const skippableSize = readSkippableFrameSize(src, srcSize); - if (ZSTD_isError(skippableSize)) - return skippableSize; - if (srcSize < skippableSize) { + if (ZSTD_isError(skippableSize)) { return ZSTD_CONTENTSIZE_ERROR; } + assert(skippableSize <= srcSize); src = (const BYTE *)src + skippableSize; srcSize -= skippableSize; @@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); return frameSizeInfo; } else { const BYTE* ip = (const BYTE*)src; @@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) return frameSizeInfo.compressedSize; } - /** ZSTD_decompressBound() : * compatible with legacy mode * `src` must point to the start of a ZSTD frame or a skippeable frame @@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); src = (const BYTE*)src + compressedSize; srcSize -= compressedSize; bound += decompressedBound; @@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, (unsigned)magicNumber, ZSTD_MAGICNUMBER); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { size_t const skippableSize = readSkippableFrameSize(src, srcSize); - if (ZSTD_isError(skippableSize)) - return skippableSize; - RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong); + FORWARD_IF_ERROR(skippableSize); + assert(skippableSize <= srcSize); src = (const BYTE *)src + skippableSize; srcSize -= skippableSize; @@ -906,6 +909,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { blockProperties_t bp; size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); dctx->expected = cBlockSize; dctx->bType = bp.blockType; dctx->rleSize = bp.origSize; @@ -950,6 +954,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c RETURN_ERROR(corruption_detected); } if (ZSTD_isError(rSize)) return rSize; + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); diff --git a/C/zstd/zstd_decompress_block.c b/C/zstd/zstd_decompress_block.c index a2a7eedc..24f4859c 100644 --- a/C/zstd/zstd_decompress_block.c +++ b/C/zstd/zstd_decompress_block.c @@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, *nbSeqPtr = nbSeq; /* FSE table descriptors */ - RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); @@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op, if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* copy Literals */ - ZSTD_copy8(op, *litPtr); if (sequence.litLength > 8) - ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + else + ZSTD_copy8(op, *litPtr); op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ @@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op, if (oMatchEnd > oend-(16-MINMATCH)) { if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); + ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); match += oend_w - op; op = oend_w; } while (op < oMatchEnd) *op++ = *match++; } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ } return sequenceLength; } @@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op, if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); /* copy Literals */ - ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ if (sequence.litLength > 8) - ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + else + ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ + op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ @@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op, if (oMatchEnd > oend-(16-MINMATCH)) { if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); + ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); match += oend_w - op; op = oend_w; } while (op < oMatchEnd) *op++ = *match++; } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ } return sequenceLength; } @@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) } FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, @@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { nbSeq--; { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); @@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); RETURN_ERROR_IF(nbSeq, corruption_detected); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } } @@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static TARGET_ATTRIBUTE("bmi2") size_t +DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, diff --git a/C/zstd/zstd_double_fast.c b/C/zstd/zstd_double_fast.c index 47faf6d6..5957255d 100644 --- a/C/zstd/zstd_double_fast.c +++ b/C/zstd/zstd_double_fast.c @@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, /* Only load extra positions for ZSTD_dtlm_full */ if (dtlm == ZSTD_dtlm_fast) break; - } - } + } } } @@ -63,7 +62,10 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 prefixLowestIndex = ms->window.dictLimit; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowestValid = ms->window.dictLimit; + const U32 maxDistance = 1U << cParams->windowLog; + const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid; const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -95,8 +97,15 @@ size_t ZSTD_compressBlock_doubleFast_generic( dictCParams->chainLog : hBitsS; const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + /* if a dictionary is attached, it must be within window range */ + if (dictMode == ZSTD_dictMatchState) { + assert(lowestValid + maxDistance >= endIndex); + } + /* init */ ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { @@ -138,7 +147,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); goto _match_stored; } @@ -147,7 +156,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); goto _match_stored; } @@ -170,8 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( offset = (U32)(current - dictMatchIndexL - dictIndexDelta); while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ goto _match_found; - } - } + } } if (matchIndexS > prefixLowestIndex) { /* check prefix short match */ @@ -186,16 +194,14 @@ size_t ZSTD_compressBlock_doubleFast_generic( if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { goto _search_next_long; - } - } + } } ip += ((ip-anchor) >> kSearchStrength) + 1; continue; _search_next_long: - { - size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); U32 const matchIndexL3 = hashLong[hl3]; const BYTE* matchL3 = base + matchIndexL3; @@ -221,9 +227,7 @@ _search_next_long: offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ goto _match_found; - } - } - } + } } } /* if no long +1 match, explore the short match we found */ if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { @@ -242,7 +246,7 @@ _match_found: offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); _match_stored: /* match found */ @@ -250,11 +254,14 @@ _match_stored: anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } /* check immediate repcode */ if (dictMode == ZSTD_dictMatchState) { @@ -278,8 +285,7 @@ _match_stored: continue; } break; - } - } + } } if (dictMode == ZSTD_noDict) { while ( (ip <= ilimit) @@ -294,14 +300,15 @@ _match_stored: ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ - } } } } + } } } + } /* while (ip < ilimit) */ /* save reps for next block */ rep[0] = offset_1 ? offset_1 : offsetSaved; rep[1] = offset_2 ? offset_2 : offsetSaved; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -360,10 +367,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - const U32 prefixStartIndex = ms->window.dictLimit; const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid; + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; const BYTE* const prefixStart = base + prefixStartIndex; - const U32 dictStartIndex = ms->window.lowLimit; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictStart = dictBase + dictStartIndex; const BYTE* const dictEnd = dictBase + prefixStartIndex; @@ -371,6 +383,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); @@ -396,7 +412,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); } else { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; @@ -407,7 +423,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); @@ -432,23 +448,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( } offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } else { ip += ((ip-anchor) >> kSearchStrength) + 1; continue; } } - /* found a match : store it */ + /* move to next sequence start */ ip += mLength; anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + /* check immediate repcode */ while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); @@ -475,7 +495,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( rep[1] = offset_2; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } diff --git a/C/zstd/zstd_fast.c b/C/zstd/zstd_fast.c index ed997b44..a05b8a47 100644 --- a/C/zstd/zstd_fast.c +++ b/C/zstd/zstd_fast.c @@ -13,7 +13,8 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm) + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; @@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, } } } } } + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* ip0 = istart; const BYTE* ip1; const BYTE* anchor = istart; - const U32 prefixStartIndex = ms->window.dictLimit; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 validStartIndex = ms->window.dictLimit; + const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex; const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -165,7 +170,7 @@ _match: /* Requires: ip0, match0, offcode */ rep[1] = offset_2 ? offset_2 : offsetSaved; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); const U32 dictHLog = dictCParams->hashLog; - /* otherwise, we would get index underflow when translating a dict index - * into a local index */ + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + + /* ensure there will be no no underflow + * when translating a dict index into a local index */ assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); /* init */ @@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); } else if ( (matchIndex <= prefixStartIndex) ) { size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); U32 const dictMatchIndex = dictHashTable[dictHash]; @@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } else if (MEM_read32(match) != MEM_read32(ip)) { /* it's not a match, and we're not going to check the dictionary */ @@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } /* match found */ @@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( rep[1] = offset_2 ? offset_2 : offsetSaved; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } size_t ZSTD_compressBlock_fast_dictMatchState( @@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 dictStartIndex = ms->window.lowLimit; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 validLow = ms->window.lowLimit; + const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow; + const U32 dictStartIndex = lowLimit; const BYTE* const dictStart = dictBase + dictStartIndex; - const U32 prefixStartIndex = ms->window.dictLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const dictEnd = dictBase + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ const size_t h = ZSTD_hashPtr(ip, hlog, mls); @@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); } else { if ( (matchIndex < dictStartIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { @@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( offset = current - matchIndex; offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } /* found a match : store it */ @@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( rep[1] = offset_2; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } diff --git a/C/zstd/zstd_internal.h b/C/zstd/zstd_internal.h index 31f756ab..81b16eac 100644 --- a/C/zstd/zstd_internal.h +++ b/C/zstd/zstd_internal.h @@ -34,7 +34,6 @@ #endif #include "xxhash.h" /* XXH_reset, update, digest */ - #if defined (__cplusplus) extern "C" { #endif @@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; * Shared functions to include for inlining *********************************************/ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } + #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } +static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } +#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } + +#define WILDCOPY_OVERLENGTH 8 +#define VECLEN 16 + +typedef enum { + ZSTD_no_overlap, + ZSTD_overlap_src_before_dst, + /* ZSTD_overlap_dst_before_src, */ +} ZSTD_overlap_e; /*! ZSTD_wildcopy() : * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ -#define WILDCOPY_OVERLENGTH 8 -MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) +MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE +void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; BYTE* const oend = op + length; - do - COPY8(op, ip) - while (op < oend); + + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); + if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { + do + COPY8(op, ip) + while (op < oend); + } + else { + if ((length & 8) == 0) + COPY8(op, ip); + do { + COPY16(op, ip); + } + while (op < oend); + } +} + +/*! ZSTD_wildcopy_16min() : + * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ +MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE +void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) +{ + ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + length; + + assert(length >= 8); + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { + do + COPY8(op, ip) + while (op < oend); + } + else { + if ((length & 8) == 0) + COPY8(op, ip); + do { + COPY16(op, ip); + } + while (op < oend); + } } MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ diff --git a/C/zstd/zstd_lazy.c b/C/zstd/zstd_lazy.c index 53f998a4..94d906c0 100644 --- a/C/zstd/zstd_lazy.c +++ b/C/zstd/zstd_lazy.c @@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, U32* largerPtr = smallerPtr + 1; U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = ms->window.lowLimit; + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", current, dictLimit, windowLow); @@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, const BYTE* const base = ms->window.base; U32 const current = (U32)(ip-base); - U32 const windowLow = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowValid = ms->window.lowLimit; + U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; U32* const bt = ms->chainTable; U32 const btLog = cParams->chainLog - 1; @@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic ( const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; - const U32 lowLimit = ms->window.lowLimit; const U32 current = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowValid = ms->window.lowLimit; + const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid; const U32 minChain = current > chainSize ? current - chainSize : 0; U32 nbAttempts = 1U << cParams->searchLog; size_t ml=4-1; @@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic( /* init */ ip += (dictAndPrefixLength == 0); - ms->nextToUpdate3 = ms->nextToUpdate; if (dictMode == ZSTD_noDict) { U32 const maxRep = (U32)(ip - prefixLowest); if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; @@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( U32 offset_1 = rep[0], offset_2 = rep[1]; /* init */ - ms->nextToUpdate3 = ms->nextToUpdate; ip += (ip == prefixStart); /* Match Loop */ diff --git a/C/zstd/zstd_ldm.c b/C/zstd/zstd_ldm.c index 784d20f3..3dcf86e6 100644 --- a/C/zstd/zstd_ldm.c +++ b/C/zstd/zstd_ldm.c @@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences( if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { U32 const ldmHSize = 1U << params->hashLog; U32 const correction = ZSTD_window_correctOverflow( - &ldmState->window, /* cycleLog */ 0, maxDist, src); + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); } /* 2. We enforce the maximum offset allowed. diff --git a/C/zstd/zstd_legacy.h b/C/zstd/zstd_legacy.h index e5b383ee..0dbd3c7a 100644 --- a/C/zstd/zstd_legacy.h +++ b/C/zstd/zstd_legacy.h @@ -238,6 +238,10 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; break; } + if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) { + frameSizeInfo.compressedSize = ERROR(srcSize_wrong); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + } return frameSizeInfo; } diff --git a/C/zstd/zstd_opt.c b/C/zstd/zstd_opt.c index efb69d32..e32e542e 100644 --- a/C/zstd/zstd_opt.c +++ b/C/zstd/zstd_opt.c @@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP * to provide a cost which is directly comparable to a match ending at same position */ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) { - if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel); + if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel); /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); - int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER) - + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ - - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) + + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ + - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel); #if 1 return contribution; #else @@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe const optState_t* const optPtr, int optLevel) { - int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) + int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) + ZSTD_litLengthContribution(litLength, optPtr, optLevel); return contribution; } @@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) /* Update hashTable3 up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip) +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) { U32* const hashTable3 = ms->hashTable3; U32 const hashLog3 = ms->hashLog3; const BYTE* const base = ms->window.base; - U32 idx = ms->nextToUpdate3; - U32 const target = ms->nextToUpdate3 = (U32)(ip - base); + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); assert(hashLog3 > 0); @@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* idx++; } + *nextToUpdate3 = target; return hashTable3[hash3]; } @@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1( } } *smallerPtr = *largerPtr = 0; - if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - assert(matchEndIdx > current + 8); - return matchEndIdx - (current + 8); + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > current + 8); + return MAX(positions, matchEndIdx - (current + 8)); + } } FORCE_INLINE_TEMPLATE @@ -520,8 +525,13 @@ void ZSTD_updateTree_internal( DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", idx, target, dictMode); - while(idx < target) - idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); ms->nextToUpdate = target; } @@ -531,16 +541,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { FORCE_INLINE_TEMPLATE U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ ZSTD_matchState_t* ms, + U32* nextToUpdate3, const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, - U32 rep[ZSTD_REP_NUM], + const U32 rep[ZSTD_REP_NUM], U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ - ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const maxDistance = 1U << cParams->windowLog; const BYTE* const base = ms->window.base; U32 const current = (U32)(ip-base); U32 const hashLog = cParams->hashLog; @@ -556,8 +568,9 @@ U32 ZSTD_insertBtAndGetAllMatches ( U32 const dictLimit = ms->window.dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; - U32 const btLow = btMask >= current ? 0 : current - btMask; - U32 const windowLow = ms->window.lowLimit; + U32 const btLow = (btMask >= current) ? 0 : current - btMask; + U32 const windowValid = ms->window.lowLimit; + U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid; U32 const matchLow = windowLow ? windowLow : 1; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; @@ -627,7 +640,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( /* HC3 match finder */ if ((mls == 3) /*static*/ && (bestLength < mls)) { - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); if ((matchIndex3 >= matchLow) & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { size_t mlen; @@ -653,9 +666,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( (ip+mlen == iLimit) ) { /* best possible length */ ms->nextToUpdate = current+1; /* skip insertion */ return 1; - } - } - } + } } } /* no dictMatchState lookup: dicts don't have a populated HC3 table */ } @@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches ( FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ ZSTD_matchState_t* ms, + U32* nextToUpdate3, const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, - U32 rep[ZSTD_REP_NUM], U32 const ll0, - ZSTD_match_t* matches, U32 const lengthToBeat) + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32 const matchLengthSearch = cParams->minMatch; @@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); switch(matchLengthSearch) { - case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3); + case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); default : - case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4); - case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5); + case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); case 7 : - case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6); + case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); } } @@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_match_t* const matches = optStatePtr->matchTable; @@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); assert(optLevel <= 2); - ms->nextToUpdate3 = ms->nextToUpdate; ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); ip += (ip==prefixStart); @@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* find first match */ { U32 const litlen = (U32)(ip - anchor); U32 const ll0 = !litlen; - U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch); + U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); if (!nbMatches) { ip++; continue; } /* initialize opt[0] */ @@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; U32 const previousPrice = opt[cur].price; U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch); + U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); U32 matchNb; if (!nbMatches) { DEBUGLOG(7, "rPos:%u : no match found", cur); @@ -1094,7 +1108,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ } /* while (ip < ilimit) */ /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ms->window.dictLimit += (U32)srcSize; ms->window.lowLimit = ms->window.dictLimit; ms->nextToUpdate = ms->window.dictLimit; - ms->nextToUpdate3 = ms->window.dictLimit; /* re-inforce weight of collected statistics */ ZSTD_upscaleStats(&ms->opt); diff --git a/C/zstd/zstd_v01.c b/C/zstd/zstd_v01.c index cad2b99b..ae8cba2a 100644 --- a/C/zstd/zstd_v01.c +++ b/C/zstd/zstd_v01.c @@ -1073,99 +1073,102 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ const void* cSrc, size_t cSrcSize, const U16* DTable) { - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const omax = op + maxDstSize; - BYTE* const olimit = omax-15; - - const void* ptr = DTable; - const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; - const U32 dtLog = DTable[0]; - size_t errorCode; - U32 reloadStatus; - - /* Init */ - - const U16* jumpTable = (const U16*)cSrc; - const size_t length1 = FSE_readLE16(jumpTable); - const size_t length2 = FSE_readLE16(jumpTable+1); - const size_t length3 = FSE_readLE16(jumpTable+2); - const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! - const char* const start1 = (const char*)(cSrc) + 6; - const char* const start2 = start1 + length1; - const char* const start3 = start2 + length2; - const char* const start4 = start3 + length3; - FSE_DStream_t bitD1, bitD2, bitD3, bitD4; - - if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - - errorCode = FSE_initDStream(&bitD1, start1, length1); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD2, start2, length2); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD3, start3, length3); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD4, start4, length4); - if (FSE_isError(errorCode)) return errorCode; - - reloadStatus=FSE_reloadDStream(&bitD2); - - /* 16 symbols per loop */ - for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) + const void* ptr = DTable; + const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; + const U32 dtLog = DTable[0]; + size_t errorCode; + U32 reloadStatus; -#define HUF_DECODE_SYMBOL_2(n, Dstream) \ - op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ - if (FSE_32bits()) FSE_reloadDStream(&Dstream) + /* Init */ - HUF_DECODE_SYMBOL_1( 0, bitD1); - HUF_DECODE_SYMBOL_1( 1, bitD2); - HUF_DECODE_SYMBOL_1( 2, bitD3); - HUF_DECODE_SYMBOL_1( 3, bitD4); - HUF_DECODE_SYMBOL_2( 4, bitD1); - HUF_DECODE_SYMBOL_2( 5, bitD2); - HUF_DECODE_SYMBOL_2( 6, bitD3); - HUF_DECODE_SYMBOL_2( 7, bitD4); - HUF_DECODE_SYMBOL_1( 8, bitD1); - HUF_DECODE_SYMBOL_1( 9, bitD2); - HUF_DECODE_SYMBOL_1(10, bitD3); - HUF_DECODE_SYMBOL_1(11, bitD4); - HUF_DECODE_SYMBOL_0(12, bitD1); - HUF_DECODE_SYMBOL_0(13, bitD2); - HUF_DECODE_SYMBOL_0(14, bitD3); - HUF_DECODE_SYMBOL_0(15, bitD4); - } + const U16* jumpTable = (const U16*)cSrc; + const size_t length1 = FSE_readLE16(jumpTable); + const size_t length2 = FSE_readLE16(jumpTable+1); + const size_t length3 = FSE_readLE16(jumpTable+2); + const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! + const char* const start1 = (const char*)(cSrc) + 6; + const char* const start2 = start1 + length1; + const char* const start3 = start2 + length2; + const char* const start4 = start3 + length3; + FSE_DStream_t bitD1, bitD2, bitD3, bitD4; - if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ - return (size_t)-FSE_ERROR_corruptionDetected; + if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - /* tail */ - { - // bitTail = bitD1; // *much* slower : -20% !??! - FSE_DStream_t bitTail; - bitTail.ptr = bitD1.ptr; - bitTail.bitsConsumed = bitD1.bitsConsumed; - bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer - bitTail.start = start1; - for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op12)) FSE_reloadDStream(&Dstream) + + #define HUF_DECODE_SYMBOL_2(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ + if (FSE_32bits()) FSE_reloadDStream(&Dstream) + + HUF_DECODE_SYMBOL_1( 0, bitD1); + HUF_DECODE_SYMBOL_1( 1, bitD2); + HUF_DECODE_SYMBOL_1( 2, bitD3); + HUF_DECODE_SYMBOL_1( 3, bitD4); + HUF_DECODE_SYMBOL_2( 4, bitD1); + HUF_DECODE_SYMBOL_2( 5, bitD2); + HUF_DECODE_SYMBOL_2( 6, bitD3); + HUF_DECODE_SYMBOL_2( 7, bitD4); + HUF_DECODE_SYMBOL_1( 8, bitD1); + HUF_DECODE_SYMBOL_1( 9, bitD2); + HUF_DECODE_SYMBOL_1(10, bitD3); + HUF_DECODE_SYMBOL_1(11, bitD4); + HUF_DECODE_SYMBOL_0(12, bitD1); + HUF_DECODE_SYMBOL_0(13, bitD2); + HUF_DECODE_SYMBOL_0(14, bitD3); + HUF_DECODE_SYMBOL_0(15, bitD4); } - if (FSE_endOfDStream(&bitTail)) - return op-ostart; + if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ + return (size_t)-FSE_ERROR_corruptionDetected; + + /* tail */ + { + // bitTail = bitD1; // *much* slower : -20% !??! + FSE_DStream_t bitTail; + bitTail.ptr = bitD1.ptr; + bitTail.bitsConsumed = bitD1.bitsConsumed; + bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer + bitTail.start = start1; + for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (opprevOffset = seq->offset; if (litLength == MaxLL) { - U32 add = dumps 1 byte */ + litLength = ZSTD_readLE24(dumps); dumps += 3; } } @@ -1732,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = dumps 1 byte */ + matchLength = ZSTD_readLE24(dumps); dumps += 3; } } diff --git a/C/zstd/zstd_v02.c b/C/zstd/zstd_v02.c index 561bc412..793df602 100644 --- a/C/zstd/zstd_v02.c +++ b/C/zstd/zstd_v02.c @@ -217,6 +217,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) } } +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + MEM_STATIC U32 MEM_readLE32(const void* memPtr) { if (MEM_isLittleEndian()) @@ -3043,11 +3048,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) seqState->prevOffset = seq->offset; if (litLength == MaxLL) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ + litLength = MEM_readLE24(dumps); dumps += 3; } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ @@ -3073,11 +3078,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ + matchLength = MEM_readLE24(dumps); dumps += 3; } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ diff --git a/C/zstd/zstd_v03.c b/C/zstd/zstd_v03.c index a1bf0fa9..7a0e7c9b 100644 --- a/C/zstd/zstd_v03.c +++ b/C/zstd/zstd_v03.c @@ -219,6 +219,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) } } +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + MEM_STATIC U32 MEM_readLE32(const void* memPtr) { if (MEM_isLittleEndian()) @@ -2684,11 +2689,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) seqState->prevOffset = seq->offset; if (litLength == MaxLL) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ + litLength = MEM_readLE24(dumps); dumps += 3; } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ @@ -2714,11 +2719,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ + matchLength = MEM_readLE24(dumps); dumps += 3; } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ diff --git a/C/zstd/zstd_v04.c b/C/zstd/zstd_v04.c index 4342330e..645a6e31 100644 --- a/C/zstd/zstd_v04.c +++ b/C/zstd/zstd_v04.c @@ -189,6 +189,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) } } +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + MEM_STATIC U32 MEM_readLE32(const void* memPtr) { if (MEM_isLittleEndian()) @@ -2808,13 +2813,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); prevOffset = litLength ? seq->offset : seqState->prevOffset; if (litLength == MaxLL) { - U32 add = *dumps++; + const U32 add = dumps de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } @@ -2837,13 +2841,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) /* MatchLength */ matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = *dumps++; + const U32 add = dumps de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } matchLength += MINMATCH; diff --git a/C/zstd/zstd_v05.c b/C/zstd/zstd_v05.c index caaf15f9..e347b00d 100644 --- a/C/zstd/zstd_v05.c +++ b/C/zstd/zstd_v05.c @@ -1998,91 +1998,92 @@ size_t HUFv05_decompress4X2_usingDTable( const void* cSrc, size_t cSrcSize, const U16* DTable) { - const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable; - const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1; - const U32 dtLog = DTable[0]; - size_t errorCode; - - /* Init */ - BITv05_DStream_t bitD1; - BITv05_DStream_t bitD2; - BITv05_DStream_t bitD3; - BITv05_DStream_t bitD4; - const size_t length1 = MEM_readLE16(istart); - const size_t length2 = MEM_readLE16(istart+2); - const size_t length3 = MEM_readLE16(istart+4); - size_t length4; - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal; - /* Check */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable; + const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; - length4 = cSrcSize - (length1 + length2 + length3 + 6); - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - errorCode = BITv05_initDStream(&bitD1, istart1, length1); - if (HUFv05_isError(errorCode)) return errorCode; - errorCode = BITv05_initDStream(&bitD2, istart2, length2); - if (HUFv05_isError(errorCode)) return errorCode; - errorCode = BITv05_initDStream(&bitD3, istart3, length3); - if (HUFv05_isError(errorCode)) return errorCode; - errorCode = BITv05_initDStream(&bitD4, istart4, length4); - if (HUFv05_isError(errorCode)) return errorCode; + /* Init */ + BITv05_DStream_t bitD1; + BITv05_DStream_t bitD2; + BITv05_DStream_t bitD3; + BITv05_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); - for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) { - HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); - HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); - HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); - HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); - HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1); - HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2); - HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3); - HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4); - HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); - HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); - HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); - HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); - HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1); - HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2); - HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3); - HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4); + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BITv05_initDStream(&bitD1, istart1, length1); + if (HUFv05_isError(errorCode)) return errorCode; + errorCode = BITv05_initDStream(&bitD2, istart2, length2); + if (HUFv05_isError(errorCode)) return errorCode; + errorCode = BITv05_initDStream(&bitD3, istart3, length3); + if (HUFv05_isError(errorCode)) return errorCode; + errorCode = BITv05_initDStream(&bitD4, istart4, length4); + if (HUFv05_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); + for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) { + HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); + HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); + HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); + HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); + HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1); + HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2); + HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3); + HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4); + HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); + HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); + HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); + HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); + HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1); + HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2); + HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3); + HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; } - - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - - /* check */ - endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4); - if (!endSignal) return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; } @@ -3150,14 +3151,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState) litLength = FSEv05_peakSymbol(&(seqState->stateLL)); prevOffset = litLength ? seq->offset : seqState->prevOffset; if (litLength == MaxLL) { - U32 add = *dumps++; + const U32 add = *dumps++; if (add < 255) litLength += add; - else { - litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */ - if (litLength&1) litLength>>=1, dumps += 3; - else litLength = (U16)(litLength)>>1, dumps += 2; + else if (dumps + 2 <= de) { + litLength = MEM_readLE16(dumps); + dumps += 2; + if ((litLength & 1) && dumps < de) { + litLength += *dumps << 16; + dumps += 1; + } + litLength>>=1; } - if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } @@ -3184,14 +3188,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState) /* MatchLength */ matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ - if (matchLength&1) matchLength>>=1, dumps += 3; - else matchLength = (U16)(matchLength)>>1, dumps += 2; + else if (dumps + 2 <= de) { + matchLength = MEM_readLE16(dumps); + dumps += 2; + if ((matchLength & 1) && dumps < de) { + matchLength += *dumps << 16; + dumps += 1; + } + matchLength >>= 1; } - if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } matchLength += MINMATCH; diff --git a/C/zstd/zstd_v06.c b/C/zstd/zstd_v06.c index a695cbb8..f907a3a7 100644 --- a/C/zstd/zstd_v06.c +++ b/C/zstd/zstd_v06.c @@ -3242,14 +3242,12 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr, } /* FSE table descriptors */ + if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */ { U32 const LLtype = *ip >> 6; U32 const Offtype = (*ip >> 4) & 3; U32 const MLtype = (*ip >> 2) & 3; ip++; - /* check */ - if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - /* Build DTables */ { size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable); if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected); @@ -3672,7 +3670,7 @@ void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS blockProperties_t blockProperties = { bt_compressed, 0 }; /* Frame Header */ - { size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min); + { size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize); if (ZSTDv06_isError(frameHeaderSize)) { ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize); return; diff --git a/C/zstd/zstd_v07.c b/C/zstd/zstd_v07.c index 6b948893..a83ddc9a 100644 --- a/C/zstd/zstd_v07.c +++ b/C/zstd/zstd_v07.c @@ -3470,14 +3470,12 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr, } /* FSE table descriptors */ + if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */ { U32 const LLtype = *ip >> 6; U32 const OFtype = (*ip >> 4) & 3; U32 const MLtype = (*ip >> 2) & 3; ip++; - /* check */ - if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - /* Build DTables */ { size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable); if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected); @@ -3918,7 +3916,7 @@ void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS } /* Frame Header */ - { size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min); + { size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize); if (ZSTDv07_isError(frameHeaderSize)) { ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize); return; diff --git a/C/zstd/zstdmt_compress.c b/C/zstd/zstdmt_compress.c index 38fbb907..9e537b88 100644 --- a/C/zstd/zstdmt_compress.c +++ b/C/zstd/zstdmt_compress.c @@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; assert(flushed <= produced); + assert(jobPtr->consumed <= jobPtr->src.size); toFlush = produced - flushed; - if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) { - /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */ + /* if toFlush==0, nothing is available to flush. + * However, jobID is expected to still be active: + * if jobID was already completed and fully flushed, + * ZSTDMT_flushProduced() should have already moved onto next job. + * Therefore, some input has not yet been consumed. */ + if (toFlush==0) { assert(jobPtr->consumed < jobPtr->src.size); } } @@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) { - if (params.ldmParams.enableLdm) + unsigned jobLog; + if (params.ldmParams.enableLdm) { /* In Long Range Mode, the windowLog is typically oversized. * In which case, it's preferable to determine the jobSize * based on chainLog instead. */ - return MAX(21, params.cParams.chainLog + 4); - return MAX(20, params.cParams.windowLog + 2); + jobLog = MAX(21, params.cParams.chainLog + 4); + } else { + jobLog = MAX(20, params.cParams.windowLog + 2); + } + return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); } static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) @@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params) ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog; } - assert(0 <= ovLog && ovLog <= 30); + assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); DEBUGLOG(4, "overlapLog : %i", params.overlapLog); DEBUGLOG(4, "overlap size : %i", 1 << ovLog); return (ovLog==0) ? 0 : (size_t)1 << ovLog; @@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal( FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; - if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; + if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ if (mtctx->singleBlockingThread) { @@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal( if (mtctx->targetSectionSize == 0) { mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); } + assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); + if (params.rsyncable) { /* Aim for the targetsectionSize as the average job size. */ U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); diff --git a/C/zstd/zstdmt_compress.h b/C/zstd/zstdmt_compress.h index 12e6bcb3..12a52608 100644 --- a/C/zstd/zstdmt_compress.h +++ b/C/zstd/zstdmt_compress.h @@ -50,6 +50,7 @@ #ifndef ZSTDMT_JOBSIZE_MIN # define ZSTDMT_JOBSIZE_MIN (1 MB) #endif +#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))