From fe3a65bfbdae35a59e678da486b66fc11b80dad9 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Sat, 19 Nov 2016 11:32:02 +0100 Subject: [PATCH] update of zstd to v1.1.1 --- C/zstd/entropy_common.c | 5 +- C/zstd/error_public.h | 59 ------------ C/zstd/huf_compress.c | 9 +- C/zstd/zstd.h | 202 +++++++++++++++++++++++---------------- C/zstd/zstd_compress.c | 165 ++++++++++++++++++++++++-------- C/zstd/zstd_decompress.c | 152 +++++++++++++++++++++-------- C/zstd/zstd_internal.h | 10 ++ C/zstd/zstd_opt.h | 92 +++++++++--------- C/zstd/zstd_v01.c | 7 +- C/zstd/zstd_v02.c | 5 +- C/zstd/zstd_v03.c | 5 +- C/zstd/zstd_v04.c | 7 +- C/zstd/zstd_v05.c | 41 +++++--- C/zstd/zstd_v06.c | 24 +++-- C/zstd/zstd_v07.c | 23 +++-- 15 files changed, 502 insertions(+), 304 deletions(-) delete mode 100644 C/zstd/error_public.h diff --git a/C/zstd/entropy_common.c b/C/zstd/entropy_common.c index acd96699..18bba0e8 100644 --- a/C/zstd/entropy_common.c +++ b/C/zstd/entropy_common.c @@ -168,9 +168,11 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, { U32 weightTotal; const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; + size_t iSize; size_t oSize; + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ @@ -198,6 +200,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } } + if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ { U32 const tableLog = BIT_highbit32(weightTotal) + 1; diff --git a/C/zstd/error_public.h b/C/zstd/error_public.h deleted file mode 100644 index d46abd2c..00000000 --- a/C/zstd/error_public.h +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. - */ - -#ifndef ERROR_PUBLIC_H_MODULE -#define ERROR_PUBLIC_H_MODULE - -#if defined (__cplusplus) -extern "C" { -#endif - -/*===== dependency =====*/ -#include /* size_t */ - - -/*-**************************************** -* error codes list -******************************************/ -typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_version_unsupported, - ZSTD_error_parameter_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_checksum_wrong, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_dictionary_wrong, - ZSTD_error_maxCode -} ZSTD_ErrorCode; - -/*! ZSTD_getErrorCode() : - convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, - which can be used to compare directly with enum list published into "error_public.h" */ -ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); -const char* ZSTD_getErrorString(ZSTD_ErrorCode code); - - -#if defined (__cplusplus) -} -#endif - -#endif /* ERROR_PUBLIC_H_MODULE */ diff --git a/C/zstd/huf_compress.c b/C/zstd/huf_compress.c index b7d3d77a..78784aa3 100644 --- a/C/zstd/huf_compress.c +++ b/C/zstd/huf_compress.c @@ -155,13 +155,14 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si } } /* fill val */ - { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; - U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; { U32 n; for (n=0; n0; n--) { - valPerRank[n] = min; /* get starting value within each rank */ + U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ min += nbPerRank[n]; min >>= 1; } } diff --git a/C/zstd/zstd.h b/C/zstd/zstd.h index c2f5f5d4..ea5caf14 100644 --- a/C/zstd/zstd.h +++ b/C/zstd/zstd.h @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * @@ -14,12 +14,12 @@ extern "C" { #endif -/*====== Dependency ======*/ +/* ====== Dependency ======*/ #include /* size_t */ -/*====== Export for Windows ======*/ -/*! +/* ====== Export for Windows ======*/ +/* * ZSTD_DLL_EXPORT : * Enable exporting of functions when building a Windows DLL */ @@ -30,7 +30,29 @@ extern "C" { #endif -/*======= Version =======*/ +/******************************************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios + at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and + decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22. + Levels >= 20, labelled `--ultra`, should be used with caution, as they require more memory. + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit memory management) + - unbounded multiple steps (described as Streaming compression) + The compression ratio achievable on small data can be highly improved using compression with a dictionary in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Fast dictionary API) + + Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h. + These APIs shall never be used with a dynamic library. + They are not "stable", their definition may change in the future. Only static linking is allowed. +*********************************************************************************************************/ + +/*------ Version ------*/ +ZSTDLIB_API unsigned ZSTD_versionNumber (void); /**< returns version number of ZSTD */ + #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 1 #define ZSTD_VERSION_RELEASE 1 @@ -41,10 +63,9 @@ extern "C" { #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) -ZSTDLIB_API unsigned ZSTD_versionNumber (void); -/* ************************************* +/*************************************** * Simple API ***************************************/ /*! ZSTD_compress() : @@ -91,29 +112,33 @@ ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `siz ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -/*-************************************* +/*************************************** * Explicit memory management ***************************************/ -/** Compression context */ +/*= Compression context +* When compressing many messages / blocks, +* it is recommended to allocate a context just once, and re-use it for each successive compression operation. +* This will make the situation much easier for the system's memory. +* Use one context per thread for parallel execution in multi-threaded environments. */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); -/** ZSTD_compressCCtx() : +/*! ZSTD_compressCCtx() : Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) */ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); -/** Decompression context */ +/*= Decompression context */ typedef struct ZSTD_DCtx_s ZSTD_DCtx; ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); -/** ZSTD_decompressDCtx() : +/*! ZSTD_decompressDCtx() : * Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -/*-************************ +/************************** * Simple dictionary API ***************************/ /*! ZSTD_compress_usingDict() : @@ -135,14 +160,20 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, const void* dict,size_t dictSize); -/*-************************** -* Fast Dictionary API +/**************************** +* Fast dictionary API ****************************/ -/*! ZSTD_createCDict() : -* Create a digested dictionary, ready to start compression operation without startup delay. -* `dict` can be released after ZSTD_CDict creation */ typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : +* When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. +* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. +* ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. +* `dict` can be released after ZSTD_CDict creation */ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel); + +/*! ZSTD_freeCDict() : +* Function frees memory allocated by ZSTD_createCDict() */ ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); /*! ZSTD_compress_usingCDict() : @@ -154,11 +185,16 @@ ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const ZSTD_CDict* cdict); + +typedef struct ZSTD_DDict_s ZSTD_DDict; + /*! ZSTD_createDDict() : * Create a digested dictionary, ready to start decompression operation without startup delay. * `dict` can be released after creation */ -typedef struct ZSTD_DDict_s ZSTD_DDict; ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize); + +/*! ZSTD_freeDDict() : +* Function frees memory allocated with ZSTD_createDDict() */ ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); /*! ZSTD_decompress_usingDDict() : @@ -170,7 +206,7 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -/*-************************** +/**************************** * Streaming ****************************/ @@ -187,16 +223,18 @@ typedef struct ZSTD_outBuffer_s { } ZSTD_outBuffer; -/*====== streaming compression ======*/ /*-*********************************************************************** -* Streaming compression - howto +* Streaming compression - HowTo * * A ZSTD_CStream object is required to track streaming operation. * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively, +* since it will play nicer with system's memory, by re-using already allocated memory. +* Use one separate ZSTD_CStream per thread for parallel execution. * -* Start by initializing ZSTD_CStream. +* Start a new compression by initializing ZSTD_CStream. * Use ZSTD_initCStream() to start a new compression operation. * Use ZSTD_initCStream_usingDict() for a compression which requires a dictionary. * @@ -225,23 +263,22 @@ typedef struct ZSTD_outBuffer_s { * * *******************************************************************/ +/*===== Streaming compression functions ======*/ typedef struct ZSTD_CStream_s ZSTD_CStream; ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); - -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ - ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ + -/*====== decompression ======*/ /*-*************************************************************************** -* Streaming decompression howto +* Streaming decompression - HowTo * * A ZSTD_DStream object is required to track streaming operations. * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. @@ -262,28 +299,29 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); * The return value is a suggested next input size (just an hint, to help latency). * *******************************************************************************/ +/*===== Streaming decompression functions =====*/ typedef struct ZSTD_DStream_s ZSTD_DStream; ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ -ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); -ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); - #ifdef ZSTD_STATIC_LINKING_ONLY -/* ==================================================================================== +/**************************************************************************************** + * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS * The definitions in this section are considered experimental. * They should never be used with a dynamic library, as they may change in the future. * They are provided for advanced usages. * Use them only in association with static linking. - * ==================================================================================== */ + * ***************************************************************************************/ -/*--- Constants ---*/ +/* --- Constants ---*/ #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8 */ #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U @@ -310,8 +348,8 @@ static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ -/*--- Types ---*/ -typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; /* from faster to stronger */ +/*--- Advanced types ---*/ +typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt, ZSTD_btopt2 } ZSTD_strategy; /* from faster to stronger */ typedef struct { unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ @@ -334,13 +372,13 @@ typedef struct { ZSTD_frameParameters fParams; } ZSTD_parameters; -/* custom memory allocation functions */ +/*= Custom memory allocation functions */ typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); typedef void (*ZSTD_freeFunction) (void* opaque, void* address); typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; -/*-************************************* +/*************************************** * Advanced compression functions ***************************************/ /*! ZSTD_estimateCCtxSize() : @@ -393,7 +431,7 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, ZSTD_parameters params); -/*--- Advanced Decompression functions ---*/ +/*--- Advanced decompression functions ---*/ /*! ZSTD_estimateDCtxSize() : * Gives the potential amount of memory allocated to create a ZSTD_DCtx */ @@ -412,47 +450,41 @@ ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); -/* ****************************************************************** -* Advanced Streaming functions +/******************************************************************** +* Advanced streaming functions ********************************************************************/ -/*====== compression ======*/ - +/*===== Advanced Streaming compression functions =====*/ ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; saves dictionary loading */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before */ ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); -/*====== decompression ======*/ - +/*===== Advanced Streaming decompression functions =====*/ typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e; - ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict will just be referenced, and must outlive decompression session */ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); -/* ****************************************************************** +/********************************************************************* * Buffer-less and synchronous inner streaming functions -********************************************************************/ -/* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. * But it's also a complex one, with many restrictions (documented below). -* Prefer using normal streaming API for an easier experience */ +* Prefer using normal streaming API for an easier experience +********************************************************************* */ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); +/** + Buffer-less streaming compression (synchronous mode) -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -/* A ZSTD_CCtx object is required to track streaming operations. Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. ZSTD_CCtx object can be re-used multiple times within successive compression operations. @@ -481,26 +513,17 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame. */ -typedef struct { - unsigned long long frameContentSize; - unsigned windowSize; - unsigned dictID; - unsigned checksumFlag; -} ZSTD_frameParams; +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */ -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - -/* +/*- Buffer-less streaming decompression (synchronous mode) A ZSTD_DCtx object is required to track streaming operations. @@ -557,11 +580,27 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); It also returns Frame Size as fparamsPtr->frameContentSize. */ +typedef struct { + unsigned long long frameContentSize; + unsigned windowSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameParams; -/* ************************************** -* Block functions -****************************************/ -/*! Block functions produce and decode raw zstd blocks, without frame metadata. +/*===== Buffer-less streaming decompression functions =====*/ +ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */ +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + +/** + Block functions + + Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). User will have to take in charge required information to regenerate data, such as compressed and content sizes. @@ -585,6 +624,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); */ #define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /* define, for static allocation */ +/*===== Raw zstd block functions =====*/ ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); diff --git a/C/zstd/zstd_compress.c b/C/zstd/zstd_compress.c index e71873b0..e7f7d991 100644 --- a/C/zstd/zstd_compress.c +++ b/C/zstd/zstd_compress.c @@ -122,6 +122,11 @@ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface * return &(ctx->seqStore); } +static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx) +{ + return cctx->params; +} + /** ZSTD_checkParams() : ensure param values remain within authorized range. @@ -137,7 +142,7 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1; CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); } CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - if ((U32)(cParams.strategy) > (U32)ZSTD_btopt) return ERROR(compressionParameter_unsupported); + if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported); return 0; } @@ -160,7 +165,7 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; } } if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog; - { U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt); + { U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt) | (cPar.strategy == ZSTD_btopt2); U32 const maxChainLog = cPar.windowLog+btPlus; if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */ @@ -186,7 +191,7 @@ size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams) size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<workSpaceSize < neededSpace) { ZSTD_free(zc->workSpace, zc->customMem); zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); @@ -276,7 +281,7 @@ static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc, zc->frameContentSize = frameContentSize; { int i; for (i=0; irep[i] = repStartValue[i]; } - if (params.cParams.strategy == ZSTD_btopt) { + if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) { zc->seqStore.litFreq = (U32*)ptr; zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); @@ -377,7 +382,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) * Block entropic compression *********************************************************/ -/* See zstd_compression_format.md for detailed format description */ +/* See doc/zstd_compression_format.md for detailed format description */ size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) { @@ -2170,7 +2175,17 @@ static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { #ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize); + ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); +#else + (void)ctx; (void)src; (void)srcSize; + return; +#endif +} + +static void ZSTD_compressBlock_btopt2(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +#ifdef ZSTD_OPT_H_91842398743 + ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); #else (void)ctx; (void)src; (void)srcSize; return; @@ -2180,7 +2195,17 @@ static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t src static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) { #ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize); + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); +#else + (void)ctx; (void)src; (void)srcSize; + return; +#endif +} + +static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize) +{ +#ifdef ZSTD_OPT_H_91842398743 + ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); #else (void)ctx; (void)src; (void)srcSize; return; @@ -2192,9 +2217,9 @@ typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t sr static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) { - static const ZSTD_blockCompressor blockCompressor[2][7] = { - { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt }, - { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict } + static const ZSTD_blockCompressor blockCompressor[2][8] = { + { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 }, + { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict } }; return blockCompressor[extDict][(U32)strat]; @@ -2247,7 +2272,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, /* preemptive overflow correction */ if (cctx->lowLimit > (1<<30)) { - U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt); + U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt) | (cctx->params.cParams.strategy == ZSTD_btopt2); U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1; U32 const supLog = MAX(cctx->params.cParams.chainLog, 17 /* blockSize */); U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog); /* preserve position % chainSize, ensure current-repcode doesn't underflow */ @@ -2436,6 +2461,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t case ZSTD_btlazy2: case ZSTD_btopt: + case ZSTD_btopt2: ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); break; @@ -2448,14 +2474,28 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t } +/* Dictionaries that assign zero probability to symbols that show up causes problems + when FSE encoding. Refuse dictionaries that assign zero probability to symbols + that we may encounter during compression. + NOTE: This behavior is not standard and could be improved in the future. */ +static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted); + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted); + } + return 0; +} + + /* Dictionary format : - Magic == ZSTD_DICT_MAGIC (4 bytes) - HUF_writeCTable(256) - FSE_writeNCount(off) - FSE_writeNCount(ml) - FSE_writeNCount(ll) - RepOffsets - Dictionary content + Magic == ZSTD_DICT_MAGIC (4 bytes) + HUF_writeCTable(256) + FSE_writeNCount(off) + FSE_writeNCount(ml) + FSE_writeNCount(ll) + RepOffsets + Dictionary content */ /*! ZSTD_loadDictEntropyStats() : @return : size read from dictionary @@ -2464,32 +2504,41 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_ { const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); dictPtr += hufHeaderSize; } - { short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog; + { unsigned offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); + /* Every match length code must have non-zero probability */ + CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); + /* Every literal length code must have non-zero probability */ + CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted); dictPtr += litlengthHeaderSize; } @@ -2500,6 +2549,16 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_ cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); dictPtr += 12; + { U32 offcodeMax = MaxOff; + if ((size_t)(dictEnd - dictPtr) <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)(dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */ + /* Calculate minimum offset code required to represent maxOffset */ + offcodeMax = ZSTD_highbit32(maxOffset); + } + /* Every possible supported offset <= dictContentSize + 128 KB must be representable */ + CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); + } + cctx->flagStaticTables = 1; return dictPtr - (const BYTE*)dict; } @@ -2725,6 +2784,10 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict) } } +static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) { + return ZSTD_getParamsFromCCtx(cdict->refContext); +} + size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize) { if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize)) @@ -2761,7 +2824,8 @@ typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage; struct ZSTD_CStream_s { ZSTD_CCtx* cctx; - ZSTD_CDict* cdict; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; char* inBuff; size_t inBuffSize; size_t inToCompress; @@ -2775,6 +2839,7 @@ struct ZSTD_CStream_s { ZSTD_cStreamStage stage; U32 checksum; U32 frameEnded; + ZSTD_parameters params; ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CStream within "zstd.h" */ @@ -2804,7 +2869,7 @@ size_t ZSTD_freeCStream(ZSTD_CStream* zcs) if (zcs==NULL) return 0; /* support free on NULL */ { ZSTD_customMem const cMem = zcs->customMem; ZSTD_freeCCtx(zcs->cctx); - ZSTD_freeCDict(zcs->cdict); + ZSTD_freeCDict(zcs->cdictLocal); ZSTD_free(zcs->inBuff, cMem); ZSTD_free(zcs->outBuff, cMem); ZSTD_free(zcs, cMem); @@ -2820,7 +2885,10 @@ size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSO size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) { - CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize)); + if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once */ + + if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize)) + else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize)); zcs->inToCompress = 0; zcs->inBuffPos = 0; @@ -2852,15 +2920,28 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, if (zcs->outBuff == NULL) return ERROR(memory_allocation); } - ZSTD_freeCDict(zcs->cdict); - zcs->cdict = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem); - if (zcs->cdict == NULL) return ERROR(memory_allocation); + if (dict) { + ZSTD_freeCDict(zcs->cdictLocal); + zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem); + if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); + zcs->cdict = zcs->cdictLocal; + } else zcs->cdict = NULL; zcs->checksum = params.fParams.checksumFlag > 0; + zcs->params = params; return ZSTD_resetCStream(zcs, pledgedSrcSize); } +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict); + size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0); + zcs->cdict = cdict; + return initError; +} + size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); @@ -2875,7 +2956,7 @@ size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) { if (zcs==NULL) return 0; /* support sizeof on NULL */ - return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdict) + zcs->outBuffSize + zcs->inBuffSize; + return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize; } /*====== Compression ======*/ @@ -3066,9 +3147,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */ { 23, 23, 22, 6, 5, 32, ZSTD_btopt }, /* level 18 */ { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */ - { 25, 25, 23, 7, 3, 64, ZSTD_btopt }, /* level 20 */ - { 26, 26, 23, 7, 3,256, ZSTD_btopt }, /* level 21 */ - { 27, 27, 25, 9, 3,512, ZSTD_btopt }, /* level 22 */ + { 25, 25, 23, 7, 3, 64, ZSTD_btopt2 }, /* level 20 */ + { 26, 26, 23, 7, 3,256, ZSTD_btopt2 }, /* level 21 */ + { 27, 27, 25, 9, 3,512, ZSTD_btopt2 }, /* level 22 */ }, { /* for srcSize <= 256 KB */ /* W, C, H, S, L, T, strat */ @@ -3092,9 +3173,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/ { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/ { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/ - { 18, 19, 18, 11, 3,512, ZSTD_btopt }, /* level 20.*/ - { 18, 19, 18, 12, 3,512, ZSTD_btopt }, /* level 21.*/ - { 18, 19, 18, 13, 3,512, ZSTD_btopt }, /* level 22.*/ + { 18, 19, 18, 11, 3,512, ZSTD_btopt2 }, /* level 20.*/ + { 18, 19, 18, 12, 3,512, ZSTD_btopt2 }, /* level 21.*/ + { 18, 19, 18, 13, 3,512, ZSTD_btopt2 }, /* level 22.*/ }, { /* for srcSize <= 128 KB */ /* W, C, H, S, L, T, strat */ @@ -3118,9 +3199,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/ { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/ { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/ - { 17, 18, 17, 9, 3,256, ZSTD_btopt }, /* level 20.*/ - { 17, 18, 17, 10, 3,256, ZSTD_btopt }, /* level 21.*/ - { 17, 18, 17, 11, 3,512, ZSTD_btopt }, /* level 22.*/ + { 17, 18, 17, 9, 3,256, ZSTD_btopt2 }, /* level 20.*/ + { 17, 18, 17, 10, 3,256, ZSTD_btopt2 }, /* level 21.*/ + { 17, 18, 17, 11, 3,512, ZSTD_btopt2 }, /* level 22.*/ }, { /* for srcSize <= 16 KB */ /* W, C, H, S, L, T, strat */ @@ -3144,9 +3225,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/ { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/ { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 20.*/ - { 14, 15, 15, 9, 3,256, ZSTD_btopt }, /* level 21.*/ - { 14, 15, 15, 10, 3,256, ZSTD_btopt }, /* level 22.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btopt2 }, /* level 20.*/ + { 14, 15, 15, 9, 3,256, ZSTD_btopt2 }, /* level 21.*/ + { 14, 15, 15, 10, 3,256, ZSTD_btopt2 }, /* level 22.*/ }, }; diff --git a/C/zstd/zstd_decompress.c b/C/zstd/zstd_decompress.c index 990a6491..4c47930c 100644 --- a/C/zstd/zstd_decompress.c +++ b/C/zstd/zstd_decompress.c @@ -34,7 +34,7 @@ * Frames requiring more memory will be rejected. */ #ifndef ZSTD_MAXWINDOWSIZE_DEFAULT -# define ZSTD_MAXWINDOWSIZE_DEFAULT (257 << 20) /* 257 MB */ +# define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */ #endif @@ -111,7 +111,7 @@ struct ZSTD_DCtx_s BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ -size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) { if (dctx==NULL) return 0; return sizeof(ZSTD_DCtx); } /* support sizeof on NULL */ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) { return (dctx==NULL) ? 0 : sizeof(ZSTD_DCtx); } size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } @@ -170,20 +170,22 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) { ZSTD_decompressBegin(dstDCtx); /* init */ - dstDCtx->dictEnd = srcDCtx->dictEnd; - dstDCtx->vBase = srcDCtx->vBase; - dstDCtx->base = srcDCtx->base; - dstDCtx->previousDstEnd = srcDCtx->previousDstEnd; - dstDCtx->dictID = srcDCtx->dictID; - dstDCtx->litEntropy = srcDCtx->litEntropy; - dstDCtx->fseEntropy = srcDCtx->fseEntropy; - dstDCtx->LLTptr = srcDCtx->LLTable; - dstDCtx->MLTptr = srcDCtx->MLTable; - dstDCtx->OFTptr = srcDCtx->OFTable; - dstDCtx->HUFptr = srcDCtx->hufTable; - dstDCtx->rep[0] = srcDCtx->rep[0]; - dstDCtx->rep[1] = srcDCtx->rep[1]; - dstDCtx->rep[2] = srcDCtx->rep[2]; + if (srcDCtx) { /* support refDCtx on NULL */ + dstDCtx->dictEnd = srcDCtx->dictEnd; + dstDCtx->vBase = srcDCtx->vBase; + dstDCtx->base = srcDCtx->base; + dstDCtx->previousDstEnd = srcDCtx->previousDstEnd; + dstDCtx->dictID = srcDCtx->dictID; + dstDCtx->litEntropy = srcDCtx->litEntropy; + dstDCtx->fseEntropy = srcDCtx->fseEntropy; + dstDCtx->LLTptr = srcDCtx->LLTable; + dstDCtx->MLTptr = srcDCtx->MLTable; + dstDCtx->OFTptr = srcDCtx->OFTable; + dstDCtx->HUFptr = srcDCtx->hufTable; + dstDCtx->rep[0] = srcDCtx->rep[0]; + dstDCtx->rep[1] = srcDCtx->rep[1]; + dstDCtx->rep[2] = srcDCtx->rep[2]; + } } @@ -191,7 +193,7 @@ static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) * Decompression section ***************************************************************/ -/* See compression format details in : zstd_compression_format.md */ +/* See compression format details in : doc/zstd_compression_format.md */ /** ZSTD_frameHeaderSize() : * srcSize must be >= ZSTD_frameHeaderSize_prefix. @@ -301,14 +303,16 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) /** ZSTD_decodeFrameHeader() : -* `srcSize` must be the size provided by ZSTD_frameHeaderSize(). +* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t srcSize) +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) { - size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize); + size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize); + if (ZSTD_isError(result)) return result; /* invalid header */ + if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */ if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong); if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); - return result; + return 0; } @@ -710,10 +714,13 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, { int nbSeq = *ip++; if (!nbSeq) { *nbSeqPtr=0; return 1; } if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) + if (nbSeq == 0xFF) { + if (ip+2 > iend) return ERROR(srcSize_wrong); nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - else + } else { + if (ip >= iend) return ERROR(srcSize_wrong); nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } } *nbSeqPtr = nbSeq; } @@ -807,7 +814,8 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState) if (ofCode <= 1) { offset += (llCode==0); if (offset) { - size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset = temp; @@ -839,6 +847,53 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState) } +FORCE_NOINLINE +size_t ZSTD_execSequenceLast7(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit_w, + const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + /* check */ + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (iLitEnd > litLimit_w) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd <= oend_w) return ERROR(GENERIC); /* Precondition */ + + /* copy literals */ + if (op < oend_w) { + ZSTD_wildcopy(op, *litPtr, oend_w - op); + *litPtr += oend_w - op; + op = oend_w; + } + while (op < oLitEnd) *op++ = *(*litPtr)++; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - base)) { + /* offset beyond prefix */ + if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); + match = dictEnd - (base-match); + if (match + sequence.matchLength <= dictEnd) { + memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = base; + } } + while (op < oMatchEnd) *op++ = *match++; + return sequenceLength; +} + + FORCE_INLINE size_t ZSTD_execSequence(BYTE* op, BYTE* const oend, seq_t sequence, @@ -853,8 +908,9 @@ size_t ZSTD_execSequence(BYTE* op, const BYTE* match = oLitEnd - sequence.offset; /* check */ - if ((oLitEnd>oend_w) | (oMatchEnd>oend)) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ + if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ if (iLitEnd > litLimit_w) return ERROR(corruption_detected); /* over-read beyond lit buffer */ + if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit_w, base, vBase, dictEnd); /* copy Literals */ ZSTD_copy8(op, *litPtr); @@ -879,7 +935,8 @@ size_t ZSTD_execSequence(BYTE* op, sequence.matchLength -= length1; match = base; if (op > oend_w) { - memmove(op, match, sequence.matchLength); + U32 i; + for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; return sequenceLength; } } } @@ -1315,25 +1372,28 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t c } { short offcodeNCount[MaxOff+1]; - U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog; + U32 offcodeMaxValue=MaxOff, offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); CHECK_E(FSE_buildDTable(dctx->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); CHECK_E(FSE_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); CHECK_E(FSE_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted); dictPtr += litlengthHeaderSize; } @@ -1474,7 +1534,8 @@ typedef enum { zdss_init, zdss_loadHeader, /* *** Resource management *** */ struct ZSTD_DStream_s { ZSTD_DCtx* dctx; - ZSTD_DDict* ddict; + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; ZSTD_frameParams fParams; ZSTD_dStreamStage stage; char* inBuff; @@ -1524,7 +1585,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds) if (zds==NULL) return 0; /* support free on null */ { ZSTD_customMem const cMem = zds->customMem; ZSTD_freeDCtx(zds->dctx); - ZSTD_freeDDict(zds->ddict); + ZSTD_freeDDict(zds->ddictLocal); ZSTD_free(zds->inBuff, cMem); ZSTD_free(zds->outBuff, cMem); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) @@ -1546,9 +1607,12 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di { zds->stage = zdss_loadHeader; zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - ZSTD_freeDDict(zds->ddict); - zds->ddict = ZSTD_createDDict(dict, dictSize); - if (zds->ddict == NULL) return ERROR(memory_allocation); + ZSTD_freeDDict(zds->ddictLocal); + if (dict) { + zds->ddictLocal = ZSTD_createDDict(dict, dictSize); + if (zds->ddictLocal == NULL) return ERROR(memory_allocation); + } else zds->ddictLocal = NULL; + zds->ddict = zds->ddictLocal; zds->legacyVersion = 0; zds->hostageByte = 0; return ZSTD_frameHeaderSize_prefix; @@ -1559,9 +1623,15 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds) return ZSTD_initDStream_usingDict(zds, NULL, 0); } +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict) /**< note : ddict will just be referenced, and must outlive decompression session */ +{ + size_t const initResult = ZSTD_initDStream(zds); + zds->ddict = ddict; + return initResult; +} + size_t ZSTD_resetDStream(ZSTD_DStream* zds) { - if (zds->ddict == NULL) return ERROR(stage_wrong); /* must be init at least once */ zds->stage = zdss_loadHeader; zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; zds->legacyVersion = 0; @@ -1584,7 +1654,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds) { if (zds==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->dctx) + ZSTD_sizeof_DDict(zds->ddict) + zds->inBuffSize + zds->outBuffSize; + return sizeof(*zds) + ZSTD_sizeof_DCtx(zds->dctx) + ZSTD_sizeof_DDict(zds->ddictLocal) + zds->inBuffSize + zds->outBuffSize; } @@ -1625,15 +1695,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) { U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); if (legacyVersion) { + const void* const dict = zds->ddict ? zds->ddict->dict : NULL; + size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0; CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, - zds->ddict->dict, zds->ddict->dictSize)); + dict, dictSize)); zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); } else { return hSize; /* error */ } } #else - return hSize; + return hSize; #endif if (hSize != 0) { /* need more input */ size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ @@ -1648,7 +1720,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } } /* Consume header */ - ZSTD_refDCtx(zds->dctx, zds->ddict->refContext); + { const ZSTD_DCtx* refContext = zds->ddict ? zds->ddict->refContext : NULL; + ZSTD_refDCtx(zds->dctx, refContext); + } { size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */ CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size)); { size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); diff --git a/C/zstd/zstd_internal.h b/C/zstd/zstd_internal.h index f40e00aa..d889c840 100644 --- a/C/zstd/zstd_internal.h +++ b/C/zstd/zstd_internal.h @@ -31,6 +31,16 @@ # endif /* __STDC_VERSION__ */ #endif +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# ifdef __GNUC__ +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + /*-************************************* * Dependencies diff --git a/C/zstd/zstd_opt.h b/C/zstd/zstd_opt.h index cea67056..90d511c7 100644 --- a/C/zstd/zstd_opt.h +++ b/C/zstd/zstd_opt.h @@ -1,5 +1,5 @@ /** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the @@ -16,6 +16,7 @@ #define ZSTD_FREQ_DIV 5 +#define ZSTD_MAX_PRICE (1<<30) /*-************************************* * Price functions for optimal parser @@ -120,12 +121,14 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY } -FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) { /* offset */ BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); + if (!ultra && offCode >= 20) price += (offCode-19)*2; + /* match Length */ { const BYTE ML_deltaCode = 36; const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; @@ -171,7 +174,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B #define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ { \ - while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \ + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \ opt[pos].mlen = mlen_; \ opt[pos].off = offset_; \ opt[pos].litlen = litlen_; \ @@ -375,7 +378,7 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( *********************************/ FORCE_INLINE void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) + const void* src, size_t srcSize, const int ultra) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -401,7 +404,6 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, ZSTD_rescaleFreqs(seqStorePtr); ip += (ip==prefixStart); { U32 i; for (i=0; irep[i]; } - //inr = ip; /* Match Loop */ while (ip < ilimit) { @@ -424,7 +426,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } best_off = i - (ip == anchor); do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ mlen--; @@ -449,7 +451,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, mlen = (u>0) ? matches[u-1].len+1 : best_mlen; best_mlen = matches[u].len; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ mlen++; @@ -496,7 +498,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); } - best_mlen = minMatch; + best_mlen = minMatch; { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); for (i=(opt[cur].mlen != 1); i best_mlen) best_mlen = mlen; - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); - } + do { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); + } - if (mlen > best_mlen) best_mlen = mlen; - - do { if (cur + mlen > last_pos || price <= opt[cur + mlen].price) SET_PRICE(cur + mlen, mlen, i, litlen, price); mlen--; @@ -549,12 +550,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); } if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) @@ -626,7 +627,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ FORCE_INLINE void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, - const void* src, size_t srcSize) + const void* src, size_t srcSize, const int ultra) { seqStore_t* seqStorePtr = &(ctx->seqStore); const BYTE* const istart = (const BYTE*)src; @@ -657,7 +658,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, ctx->nextToUpdate3 = ctx->nextToUpdate; ZSTD_rescaleFreqs(seqStorePtr); ip += (ip==prefixStart); - //inr = ip; /* Match Loop */ while (ip < ilimit) { @@ -666,7 +666,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, U32 current = (U32)(ip-base); memset(opt, 0, sizeof(ZSTD_optimal_t)); last_pos = 0; - //inr = ip; opt[0].litlen = (U32)(ip - anchor); /* check repCode */ @@ -691,7 +690,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, best_off = i - (ip==anchor); litlen = opt[0].litlen; do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ mlen--; @@ -721,7 +720,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, best_mlen = matches[u].len; litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; @@ -765,8 +764,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]); } - best_mlen = 0; - + best_mlen = minMatch; { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); for (i = (mlen != 1); i litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH); - } - - best_mlen = mlen; + if (mlen > best_mlen) best_mlen = mlen; do { + if (opt[cur].mlen == 1) { + litlen = opt[cur].litlen; + if (cur > litlen) { + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); + } else + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + } else { + litlen = 0; + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); + } + if (cur + mlen > last_pos || price <= opt[cur + mlen].price) SET_PRICE(cur + mlen, mlen, i, litlen, price); mlen--; @@ -815,8 +813,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, goto _storeSequence; } - best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch; - /* set prices using matches at position = cur */ for (u = 0; u < match_num; u++) { mlen = (u>0) ? matches[u-1].len+1 : best_mlen; @@ -826,12 +822,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH); + price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH); + price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH); + price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); } if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) diff --git a/C/zstd/zstd_v01.c b/C/zstd/zstd_v01.c index fe9c5ccd..5c36c210 100644 --- a/C/zstd/zstd_v01.c +++ b/C/zstd/zstd_v01.c @@ -958,13 +958,16 @@ static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) U32 weightTotal; U32 maxBits; const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; + size_t iSize; size_t oSize; U32 n; U32 nextRankStart; void* ptr = DTable+1; HUF_DElt* const dt = (HUF_DElt*)ptr; + if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + iSize = ip[0]; + FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ if (iSize >= 128) /* special header */ @@ -1005,6 +1008,7 @@ static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) rankVal[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } + if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected; /* get last non-null symbol weight (implied, total must be 2^n) */ maxBits = FSE_highbit32(weightTotal) + 1; @@ -1533,6 +1537,7 @@ size_t ZSTDv01_decodeLiteralsBlock(void* ctx, { size_t rleSize = litbp.origSize; if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall); + if (!srcSize) return ERROR(srcSize_wrong); memset(oend - rleSize, *ip, rleSize); *litStart = oend - rleSize; *litSize = rleSize; diff --git a/C/zstd/zstd_v02.c b/C/zstd/zstd_v02.c index de1592e1..24498fed 100644 --- a/C/zstd/zstd_v02.c +++ b/C/zstd/zstd_v02.c @@ -1607,10 +1607,12 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32 weightTotal; U32 tableLog; const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; + size_t iSize; size_t oSize; U32 n; + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) /* special header */ @@ -1652,6 +1654,7 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } + if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ tableLog = BIT_highbit32(weightTotal) + 1; diff --git a/C/zstd/zstd_v03.c b/C/zstd/zstd_v03.c index caad331d..a3bd1da2 100644 --- a/C/zstd/zstd_v03.c +++ b/C/zstd/zstd_v03.c @@ -1604,10 +1604,12 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32 weightTotal; U32 tableLog; const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; + size_t iSize; size_t oSize; U32 n; + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) /* special header */ @@ -1649,6 +1651,7 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } + if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ tableLog = BIT_highbit32(weightTotal) + 1; diff --git a/C/zstd/zstd_v04.c b/C/zstd/zstd_v04.c index 05e40aac..0a740bac 100644 --- a/C/zstd/zstd_v04.c +++ b/C/zstd/zstd_v04.c @@ -1896,10 +1896,12 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32 weightTotal; U32 tableLog; const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; + size_t iSize; size_t oSize; U32 n; + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) /* special header */ @@ -1941,6 +1943,7 @@ static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } + if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ tableLog = BIT_highbit32(weightTotal) + 1; @@ -3108,7 +3111,7 @@ static size_t ZSTD_execSequence(BYTE* op, sequence.matchLength -= length1; match = base; if (op > oend_8) { - memmove(op, match, sequence.matchLength); + while (op < oMatchEnd) *op++ = *match++; return sequenceLength; } } diff --git a/C/zstd/zstd_v05.c b/C/zstd/zstd_v05.c index 96ffceb9..201bf3c6 100644 --- a/C/zstd/zstd_v05.c +++ b/C/zstd/zstd_v05.c @@ -1873,10 +1873,12 @@ static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32 weightTotal; U32 tableLog; const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; + size_t iSize; size_t oSize; U32 n; + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ @@ -1910,6 +1912,7 @@ static size_t HUFv05_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } + if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ tableLog = BITv05_highbit32(weightTotal) + 1; @@ -2032,13 +2035,14 @@ size_t HUFv05_decompress1X2_usingDTable( { BYTE* op = (BYTE*)dst; BYTE* const oend = op + dstSize; - size_t errorCode; const U32 dtLog = DTable[0]; const void* dtPtr = DTable; const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr)+1; BITv05_DStream_t bitD; - errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize); - if (HUFv05_isError(errorCode)) return errorCode; + + if (dstSize <= cSrcSize) return ERROR(dstSize_tooSmall); + { size_t const errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize); + if (HUFv05_isError(errorCode)) return errorCode; } HUFv05_decodeStreamX2(op, &bitD, oend, dt, dtLog); @@ -2942,6 +2946,7 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx, { size_t litSize, litCSize, singleStream=0; U32 lhSize = ((istart[0]) >> 4) & 3; + if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ switch(lhSize) { case 0: case 1: default: /* note : default is impossible, since lhSize into [0..3] */ @@ -2965,6 +2970,7 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx, break; } if (litSize > BLOCKSIZE) return ERROR(corruption_detected); + if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); if (HUFv05_isError(singleStream ? HUFv05_decompress1X2(dctx->litBuffer, litSize, istart+lhSize, litCSize) : @@ -2990,6 +2996,7 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx, lhSize=3; litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); litCSize = ((istart[1] & 3) << 8) + istart[2]; + if (litCSize + litSize > srcSize) return ERROR(corruption_detected); errorCode = HUFv05_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4); if (HUFv05_isError(errorCode)) return ERROR(corruption_detected); @@ -3046,6 +3053,7 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx, break; case 3: litSize = ((istart[0] & 15) << 16) + (istart[1] << 8) + istart[2]; + if (srcSize<4) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ break; } if (litSize > BLOCKSIZE) return ERROR(corruption_detected); @@ -3063,7 +3071,7 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx, size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb, - const void* src, size_t srcSize) + const void* src, size_t srcSize, U32 flagStaticTable) { const BYTE* const istart = (const BYTE* const)src; const BYTE* ip = istart; @@ -3079,17 +3087,22 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps /* SeqHead */ *nbSeq = *ip++; if (*nbSeq==0) return 1; - if (*nbSeq >= 128) + if (*nbSeq >= 128) { + if (ip >= iend) return ERROR(srcSize_wrong); *nbSeq = ((nbSeq[0]-128)<<8) + *ip++; + } + if (ip >= iend) return ERROR(srcSize_wrong); LLtype = *ip >> 6; Offtype = (*ip >> 4) & 3; MLtype = (*ip >> 2) & 3; if (*ip & 2) { + if (ip+3 > iend) return ERROR(srcSize_wrong); dumpsLength = ip[2]; dumpsLength += ip[1] << 8; ip += 3; } else { + if (ip+2 > iend) return ERROR(srcSize_wrong); dumpsLength = ip[1]; dumpsLength += (ip[0] & 1) << 8; ip += 2; @@ -3118,6 +3131,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps FSEv05_buildDTable_raw(DTableLL, LLbits); break; case FSEv05_ENCODING_STATIC: + if (!flagStaticTable) return ERROR(corruption_detected); break; case FSEv05_ENCODING_DYNAMIC : default : /* impossible */ @@ -3141,6 +3155,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps FSEv05_buildDTable_raw(DTableOffb, Offbits); break; case FSEv05_ENCODING_STATIC: + if (!flagStaticTable) return ERROR(corruption_detected); break; case FSEv05_ENCODING_DYNAMIC : default : /* impossible */ @@ -3164,6 +3179,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps FSEv05_buildDTable_raw(DTableML, MLbits); break; case FSEv05_ENCODING_STATIC: + if (!flagStaticTable) return ERROR(corruption_detected); break; case FSEv05_ENCODING_DYNAMIC : default : /* impossible */ @@ -3313,7 +3329,7 @@ static size_t ZSTDv05_execSequence(BYTE* op, sequence.matchLength -= length1; match = base; if (op > oend_8) { - memmove(op, match, sequence.matchLength); + while (op < oMatchEnd) *op++ = *match++; return sequenceLength; } } } @@ -3376,7 +3392,7 @@ static size_t ZSTDv05_decompressSequences( /* Build Decoding Tables */ errorCode = ZSTDv05_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, DTableLL, DTableML, DTableOffb, - ip, seqSize); + ip, seqSize, dctx->flagStaticTables); if (ZSTDv05_isError(errorCode)) return errorCode; ip += errorCode; @@ -3665,11 +3681,11 @@ static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t d { size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize; short offcodeNCount[MaxOff+1]; - U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSEv05Log; + U32 offcodeMaxValue=MaxOff, offcodeLog; short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSEv05Log; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSEv05Log; + unsigned litlengthMaxValue = MaxLL, litlengthLog; hSize = HUFv05_readDTableX4(dctx->hufTableX4, dict, dictSize); if (HUFv05_isError(hSize)) return ERROR(dictionary_corrupted); @@ -3678,6 +3694,7 @@ static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t d offcodeHeaderSize = FSEv05_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); if (FSEv05_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSEv05Log) return ERROR(dictionary_corrupted); errorCode = FSEv05_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog); if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted); dict = (const char*)dict + offcodeHeaderSize; @@ -3685,12 +3702,14 @@ static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t d matchlengthHeaderSize = FSEv05_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); if (FSEv05_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSEv05Log) return ERROR(dictionary_corrupted); errorCode = FSEv05_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted); dict = (const char*)dict + matchlengthHeaderSize; dictSize -= matchlengthHeaderSize; litlengthHeaderSize = FSEv05_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); + if (litlengthLog > LLFSEv05Log) return ERROR(dictionary_corrupted); if (FSEv05_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); errorCode = FSEv05_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog); if (FSEv05_isError(errorCode)) return ERROR(dictionary_corrupted); diff --git a/C/zstd/zstd_v06.c b/C/zstd/zstd_v06.c index 96a84d3e..b6fde3aa 100644 --- a/C/zstd/zstd_v06.c +++ b/C/zstd/zstd_v06.c @@ -1932,9 +1932,11 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta { U32 weightTotal; const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; + size_t iSize; size_t oSize; + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ @@ -1969,6 +1971,7 @@ MEM_STATIC size_t HUFv06_readStats(BYTE* huffWeight, size_t hwSize, U32* rankSta rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } } + if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ { U32 const tableLog = BITv06_highbit32(weightTotal) + 1; @@ -3183,6 +3186,7 @@ size_t ZSTDv06_decodeLiteralsBlock(ZSTDv06_DCtx* dctx, lhSize=3; litSize = ((istart[0] & 15) << 6) + (istart[1] >> 2); litCSize = ((istart[1] & 3) << 8) + istart[2]; + if (litCSize + litSize > srcSize) return ERROR(corruption_detected); { size_t const errorCode = HUFv06_decompress1X4_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->hufTableX4); if (HUFv06_isError(errorCode)) return ERROR(corruption_detected); @@ -3302,10 +3306,13 @@ size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr, { int nbSeq = *ip++; if (!nbSeq) { *nbSeqPtr=0; return 1; } if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) + if (nbSeq == 0xFF) { + if (ip+2 > iend) return ERROR(srcSize_wrong); nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - else + } else { + if (ip >= iend) return ERROR(srcSize_wrong); nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } } *nbSeqPtr = nbSeq; } @@ -3467,7 +3474,7 @@ size_t ZSTDv06_execSequence(BYTE* op, sequence.matchLength -= length1; match = base; if (op > oend_8) { - memmove(op, match, sequence.matchLength); + while (op < oMatchEnd) *op++ = *match++; return sequenceLength; } } } @@ -3827,9 +3834,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d dictSize -= hSize; { short offcodeNCount[MaxOff+1]; - U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog; + U32 offcodeMaxValue=MaxOff, offcodeLog; offcodeHeaderSize = FSEv06_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize); if (FSEv06_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); { size_t const errorCode = FSEv06_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog); if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); } dict = (const char*)dict + offcodeHeaderSize; @@ -3837,9 +3845,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d } { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; matchlengthHeaderSize = FSEv06_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize); if (FSEv06_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); { size_t const errorCode = FSEv06_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); } dict = (const char*)dict + matchlengthHeaderSize; @@ -3847,9 +3856,10 @@ static size_t ZSTDv06_loadEntropy(ZSTDv06_DCtx* dctx, const void* dict, size_t d } { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + unsigned litlengthMaxValue = MaxLL, litlengthLog; litlengthHeaderSize = FSEv06_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize); if (FSEv06_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); { size_t const errorCode = FSEv06_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog); if (FSEv06_isError(errorCode)) return ERROR(dictionary_corrupted); } } diff --git a/C/zstd/zstd_v07.c b/C/zstd/zstd_v07.c index 62285238..c7693f24 100644 --- a/C/zstd/zstd_v07.c +++ b/C/zstd/zstd_v07.c @@ -1382,9 +1382,11 @@ size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, { U32 weightTotal; const BYTE* ip = (const BYTE*) src; - size_t iSize = ip[0]; + size_t iSize; size_t oSize; + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ @@ -1419,6 +1421,7 @@ size_t HUFv07_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, rankStats[huffWeight[n]]++; weightTotal += (1 << huffWeight[n]) >> 1; } } + if (weightTotal == 0) return ERROR(corruption_detected); /* get last non-null symbol weight (implied, total must be 2^n) */ { U32 const tableLog = BITv07_highbit32(weightTotal) + 1; @@ -3529,10 +3532,13 @@ size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr, { int nbSeq = *ip++; if (!nbSeq) { *nbSeqPtr=0; return 1; } if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) + if (nbSeq == 0xFF) { + if (ip+2 > iend) return ERROR(srcSize_wrong); nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - else + } else { + if (ip >= iend) return ERROR(srcSize_wrong); nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } } *nbSeqPtr = nbSeq; } @@ -3691,7 +3697,7 @@ size_t ZSTDv07_execSequence(BYTE* op, sequence.matchLength -= length1; match = base; if (op > oend_w) { - memmove(op, match, sequence.matchLength); + while (op < oMatchEnd) *op++ = *match++; return sequenceLength; } } } @@ -4102,27 +4108,30 @@ static size_t ZSTDv07_loadEntropy(ZSTDv07_DCtx* dctx, const void* const dict, si } { short offcodeNCount[MaxOff+1]; - U32 offcodeMaxValue=MaxOff, offcodeLog=OffFSELog; + U32 offcodeMaxValue=MaxOff, offcodeLog; size_t const offcodeHeaderSize = FSEv07_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); if (FSEv07_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); + if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); { size_t const errorCode = FSEv07_buildDTable(dctx->OffTable, offcodeNCount, offcodeMaxValue, offcodeLog); if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); } dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSEv07_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); if (FSEv07_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); { size_t const errorCode = FSEv07_buildDTable(dctx->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog); if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); } dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog; + unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSEv07_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); if (FSEv07_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted); + if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); { size_t const errorCode = FSEv07_buildDTable(dctx->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog); if (FSEv07_isError(errorCode)) return ERROR(dictionary_corrupted); } dictPtr += litlengthHeaderSize;