mirror of
https://github.com/Xevion/easy7zip.git
synced 2025-12-08 18:07:15 -06:00
Update zstd to version 1.4.2
This commit is contained in:
@@ -127,6 +127,13 @@
|
||||
} \
|
||||
}
|
||||
|
||||
/* vectorization */
|
||||
#if !defined(__clang__) && defined(__GNUC__)
|
||||
# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
||||
#else
|
||||
# define DONT_VECTORIZE
|
||||
#endif
|
||||
|
||||
/* disable warnings */
|
||||
#ifdef _MSC_VER /* Visual Studio */
|
||||
# include <intrin.h> /* For Visual 2005 */
|
||||
|
||||
@@ -71,7 +71,7 @@ extern "C" {
|
||||
/*------ Version ------*/
|
||||
#define ZSTD_VERSION_MAJOR 1
|
||||
#define ZSTD_VERSION_MINOR 4
|
||||
#define ZSTD_VERSION_RELEASE 0
|
||||
#define ZSTD_VERSION_RELEASE 2
|
||||
|
||||
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
|
||||
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */
|
||||
@@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library v
|
||||
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
|
||||
ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */
|
||||
|
||||
/***************************************
|
||||
* Default constant
|
||||
***************************************/
|
||||
/* *************************************
|
||||
* Default constant
|
||||
***************************************/
|
||||
#ifndef ZSTD_CLEVEL_DEFAULT
|
||||
# define ZSTD_CLEVEL_DEFAULT 3
|
||||
#endif
|
||||
|
||||
/***************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
/* *************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
|
||||
/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
|
||||
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */
|
||||
@@ -183,9 +183,14 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres
|
||||
***************************************/
|
||||
/*= Compression context
|
||||
* When compressing many times,
|
||||
* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
|
||||
* it is recommended to allocate a context just once,
|
||||
* and re-use it for each successive compression operation.
|
||||
* This will make workload friendlier for system's memory.
|
||||
* Use one context per thread for parallel execution in multi-threaded environments. */
|
||||
* Note : re-using context is just a speed / resource optimization.
|
||||
* It doesn't change the compression ratio, which remains identical.
|
||||
* Note 2 : In multi-threaded environments,
|
||||
* use one different context per thread for parallel execution.
|
||||
*/
|
||||
typedef struct ZSTD_CCtx_s ZSTD_CCtx;
|
||||
ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
|
||||
ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
|
||||
@@ -380,6 +385,7 @@ typedef enum {
|
||||
* ZSTD_c_forceMaxWindow
|
||||
* ZSTD_c_forceAttachDict
|
||||
* ZSTD_c_literalCompressionMode
|
||||
* ZSTD_c_targetCBlockSize
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
@@ -389,6 +395,7 @@ typedef enum {
|
||||
ZSTD_c_experimentalParam3=1000,
|
||||
ZSTD_c_experimentalParam4=1001,
|
||||
ZSTD_c_experimentalParam5=1002,
|
||||
ZSTD_c_experimentalParam6=1003,
|
||||
} ZSTD_cParameter;
|
||||
|
||||
typedef struct {
|
||||
@@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
|
||||
ZSTD_inBuffer* input,
|
||||
ZSTD_EndDirective endOp);
|
||||
|
||||
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
|
||||
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
|
||||
|
||||
/*******************************************************************************
|
||||
* This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and
|
||||
* ZSTD_compressStream2(). It is redundant, but is still fully supported.
|
||||
/* These buffer sizes are softly recommended.
|
||||
* They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
|
||||
* Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
|
||||
* reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
|
||||
*
|
||||
* However, note that these recommendations are from the perspective of a C caller program.
|
||||
* If the streaming interface is invoked from some other language,
|
||||
* especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
|
||||
* a major performance rule is to reduce crossing such interface to an absolute minimum.
|
||||
* It's not rare that performance ends being spent more into the interface, rather than compression itself.
|
||||
* In which cases, prefer using large buffers, as large as practical,
|
||||
* for both input and output, to reduce the nb of roundtrips.
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */
|
||||
ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
|
||||
|
||||
|
||||
/* *****************************************************************************
|
||||
* This following is a legacy streaming API.
|
||||
* It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
|
||||
* It is redundant, but remains fully supported.
|
||||
* Advanced parameters and dictionary compression can only be used through the
|
||||
* new API.
|
||||
******************************************************************************/
|
||||
|
||||
/**
|
||||
/*!
|
||||
* Equivalent to:
|
||||
*
|
||||
* ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
|
||||
@@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output
|
||||
* ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
|
||||
/**
|
||||
/*!
|
||||
* Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
|
||||
* NOTE: The return value is different. ZSTD_compressStream() returns a hint for
|
||||
* the next read size (if non-zero and not an error). ZSTD_compressStream2()
|
||||
* returns the number of bytes left to flush (if non-zero and not an error).
|
||||
* returns the minimum nb of bytes left to flush (if non-zero and not an error).
|
||||
*/
|
||||
ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
|
||||
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
|
||||
ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
|
||||
/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
|
||||
/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
|
||||
ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
|
||||
|
||||
|
||||
@@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
#endif /* ZSTD_H_235446 */
|
||||
|
||||
|
||||
/****************************************************************************************
|
||||
/* **************************************************************************************
|
||||
* ADVANCED AND EXPERIMENTAL FUNCTIONS
|
||||
****************************************************************************************
|
||||
* The definitions in the following section are considered experimental.
|
||||
@@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
#define ZSTD_LDM_HASHRATELOG_MIN 0
|
||||
#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
|
||||
|
||||
/* Advanced parameter bounds */
|
||||
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
|
||||
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
|
||||
|
||||
/* internal */
|
||||
#define ZSTD_HASHLOG3_MAX 17
|
||||
|
||||
@@ -1162,7 +1189,7 @@ typedef enum {
|
||||
* however it does mean that all frame data must be present and valid. */
|
||||
ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
|
||||
|
||||
/** ZSTD_decompressBound() :
|
||||
/*! ZSTD_decompressBound() :
|
||||
* `src` should point to the start of a series of ZSTD encoded and/or skippable frames
|
||||
* `srcSize` must be the _exact_ size of this series
|
||||
* (i.e. there should be a frame boundary at `src + srcSize`)
|
||||
@@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
|
||||
*/
|
||||
#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
|
||||
|
||||
/* Tries to fit compressed block size to be around targetCBlockSize.
|
||||
* No target when targetCBlockSize == 0.
|
||||
* There is no guarantee on compressed block size (default:0) */
|
||||
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
|
||||
|
||||
/*! ZSTD_CCtx_getParameter() :
|
||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||
* and store it into int* value.
|
||||
@@ -1843,7 +1875,7 @@ typedef struct {
|
||||
unsigned checksumFlag;
|
||||
} ZSTD_frameHeader;
|
||||
|
||||
/** ZSTD_getFrameHeader() :
|
||||
/*! ZSTD_getFrameHeader() :
|
||||
* decode Frame Header, or requires larger `srcSize`.
|
||||
* @return : 0, `zfhPtr` is correctly filled,
|
||||
* >0, `srcSize` is too small, value is wanted `srcSize` amount,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -33,13 +33,13 @@ extern "C" {
|
||||
***************************************/
|
||||
#define kSearchStrength 8
|
||||
#define HASH_READ_SIZE 8
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
|
||||
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
|
||||
It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
|
||||
It's not a big deal though : candidate will just be sorted again.
|
||||
Additionally, candidate position 1 will be lost.
|
||||
But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
|
||||
Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
||||
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
|
||||
This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@@ -128,21 +128,20 @@ typedef struct {
|
||||
BYTE const* base; /* All regular indexes relative to this position */
|
||||
BYTE const* dictBase; /* extDict indexes relative to this position */
|
||||
U32 dictLimit; /* below that point, need extDict */
|
||||
U32 lowLimit; /* below that point, no more data */
|
||||
U32 lowLimit; /* below that point, no more valid data */
|
||||
} ZSTD_window_t;
|
||||
|
||||
typedef struct ZSTD_matchState_t ZSTD_matchState_t;
|
||||
struct ZSTD_matchState_t {
|
||||
ZSTD_window_t window; /* State for window round buffer management */
|
||||
U32 loadedDictEnd; /* index of end of dictionary */
|
||||
U32 loadedDictEnd; /* index of end of dictionary, within context's referential. When dict referential is copied into active context (i.e. not attached), effectively same value as dictSize, since referential starts from zero */
|
||||
U32 nextToUpdate; /* index from which to continue table update */
|
||||
U32 nextToUpdate3; /* index from which to continue table update */
|
||||
U32 hashLog3; /* dispatch table : larger == faster, more memory */
|
||||
U32* hashTable;
|
||||
U32* hashTable3;
|
||||
U32* chainTable;
|
||||
optState_t opt; /* optimal parser state */
|
||||
const ZSTD_matchState_t * dictMatchState;
|
||||
const ZSTD_matchState_t* dictMatchState;
|
||||
ZSTD_compressionParameters cParams;
|
||||
};
|
||||
|
||||
@@ -195,6 +194,9 @@ struct ZSTD_CCtx_params_s {
|
||||
int compressionLevel;
|
||||
int forceWindow; /* force back-references to respect limit of
|
||||
* 1<<wLog, even for dictionary */
|
||||
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
|
||||
* No target when targetCBlockSize == 0.
|
||||
* There is no guarantee on compressed block size */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
@@ -305,6 +307,30 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
|
||||
return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
|
||||
}
|
||||
|
||||
/* ZSTD_cParam_withinBounds:
|
||||
* @return 1 if value is within cParam bounds,
|
||||
* 0 otherwise */
|
||||
MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
|
||||
{
|
||||
ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
|
||||
if (ZSTD_isError(bounds.error)) return 0;
|
||||
if (value < bounds.lowerBound) return 0;
|
||||
if (value > bounds.upperBound) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* ZSTD_minGain() :
|
||||
* minimum compression required
|
||||
* to generate a compress block or a compressed literals section.
|
||||
* note : use same formula for both situations */
|
||||
MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
|
||||
{
|
||||
U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
|
||||
ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
|
||||
assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
|
||||
return (srcSize >> minlog) + 2;
|
||||
}
|
||||
|
||||
/*! ZSTD_storeSeq() :
|
||||
* Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
|
||||
* `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
|
||||
@@ -324,7 +350,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
|
||||
/* copy Literals */
|
||||
assert(seqStorePtr->maxNbLit <= 128 KB);
|
||||
assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
|
||||
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
|
||||
ZSTD_wildcopy(seqStorePtr->lit, literals, litLength, ZSTD_no_overlap);
|
||||
seqStorePtr->lit += litLength;
|
||||
|
||||
/* literal Length */
|
||||
@@ -564,6 +590,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
|
||||
/*-*************************************
|
||||
* Round buffer management
|
||||
***************************************/
|
||||
#if (ZSTD_WINDOWLOG_MAX_64 > 31)
|
||||
# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
|
||||
#endif
|
||||
/* Max current allowed */
|
||||
#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
|
||||
/* Maximum chunk size before overflow correction needs to be called again */
|
||||
@@ -675,31 +704,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
* Updates lowLimit so that:
|
||||
* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
|
||||
*
|
||||
* This allows a simple check that index >= lowLimit to see if index is valid.
|
||||
* This must be called before a block compression call, with srcEnd as the block
|
||||
* source end.
|
||||
* It ensures index is valid as long as index >= lowLimit.
|
||||
* This must be called before a block compression call.
|
||||
*
|
||||
* If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
|
||||
* This is because dictionaries are allowed to be referenced as long as the last
|
||||
* byte of the dictionary is in the window, but once they are out of range,
|
||||
* they cannot be referenced. If loadedDictEndPtr is NULL, we use
|
||||
* loadedDictEnd == 0.
|
||||
* loadedDictEnd is only defined if a dictionary is in use for current compression.
|
||||
* As the name implies, loadedDictEnd represents the index at end of dictionary.
|
||||
* The value lies within context's referential, it can be directly compared to blockEndIdx.
|
||||
*
|
||||
* In normal dict mode, the dict is between lowLimit and dictLimit. In
|
||||
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
|
||||
* is below them. forceWindow and dictMatchState are therefore incompatible.
|
||||
* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
|
||||
* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
|
||||
* This is because dictionaries are allowed to be referenced fully
|
||||
* as long as the last byte of the dictionary is in the window.
|
||||
* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
|
||||
*
|
||||
* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
|
||||
* In dictMatchState mode, lowLimit and dictLimit are the same,
|
||||
* and the dictionary is below them.
|
||||
* forceWindow and dictMatchState are therefore incompatible.
|
||||
*/
|
||||
MEM_STATIC void
|
||||
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||
void const* srcEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const void* blockEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||
{
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
|
||||
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist);
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
||||
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
||||
|
||||
/* - When there is no dictionary : loadedDictEnd == 0.
|
||||
In which case, the test (blockEndIdx > maxDist) is merely to avoid
|
||||
overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
|
||||
- When there is a standard dictionary :
|
||||
Index referential is copied from the dictionary,
|
||||
which means it starts from 0.
|
||||
In which case, loadedDictEnd == dictSize,
|
||||
and it makes sense to compare `blockEndIdx > maxDist + dictSize`
|
||||
since `blockEndIdx` also starts from zero.
|
||||
- When there is an attached dictionary :
|
||||
loadedDictEnd is expressed within the referential of the context,
|
||||
so it can be directly compared against blockEndIdx.
|
||||
*/
|
||||
if (blockEndIdx > maxDist + loadedDictEnd) {
|
||||
U32 const newLowLimit = blockEndIdx - maxDist;
|
||||
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
|
||||
@@ -708,10 +755,31 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
|
||||
window->dictLimit = window->lowLimit;
|
||||
}
|
||||
if (loadedDictEndPtr)
|
||||
*loadedDictEndPtr = 0;
|
||||
if (dictMatchStatePtr)
|
||||
*dictMatchStatePtr = NULL;
|
||||
/* On reaching window size, dictionaries are invalidated */
|
||||
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
|
||||
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Similar to ZSTD_window_enforceMaxDist(),
|
||||
* but only invalidates dictionary
|
||||
* when input progresses beyond window size. */
|
||||
MEM_STATIC void
|
||||
ZSTD_checkDictValidity(ZSTD_window_t* window,
|
||||
const void* blockEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||
{
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
|
||||
U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
|
||||
|
||||
if (loadedDictEnd && (blockEndIdx > maxDist + loadedDictEnd)) {
|
||||
/* On reaching window size, dictionaries are invalidated */
|
||||
if (loadedDictEndPtr) *loadedDictEndPtr = 0;
|
||||
if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
149
C/zstd/zstd_compress_literals.c
Normal file
149
C/zstd/zstd_compress_literals.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include "zstd_compress_literals.h"
|
||||
|
||||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
BYTE* const ostart = (BYTE* const)dst;
|
||||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
||||
|
||||
RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
|
||||
|
||||
switch(flSize)
|
||||
{
|
||||
case 1: /* 2 - 1 - 5 */
|
||||
ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
|
||||
break;
|
||||
case 2: /* 2 - 2 - 12 */
|
||||
MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
|
||||
break;
|
||||
case 3: /* 2 - 2 - 20 */
|
||||
MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
|
||||
break;
|
||||
default: /* not necessary : flSize is {1,2,3} */
|
||||
assert(0);
|
||||
}
|
||||
|
||||
memcpy(ostart + flSize, src, srcSize);
|
||||
return srcSize + flSize;
|
||||
}
|
||||
|
||||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
||||
{
|
||||
BYTE* const ostart = (BYTE* const)dst;
|
||||
U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
|
||||
|
||||
(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
|
||||
|
||||
switch(flSize)
|
||||
{
|
||||
case 1: /* 2 - 1 - 5 */
|
||||
ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
|
||||
break;
|
||||
case 2: /* 2 - 2 - 12 */
|
||||
MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
|
||||
break;
|
||||
case 3: /* 2 - 2 - 20 */
|
||||
MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
|
||||
break;
|
||||
default: /* not necessary : flSize is {1,2,3} */
|
||||
assert(0);
|
||||
}
|
||||
|
||||
ostart[flSize] = *(const BYTE*)src;
|
||||
return flSize+1;
|
||||
}
|
||||
|
||||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* workspace, size_t wkspSize,
|
||||
const int bmi2)
|
||||
{
|
||||
size_t const minGain = ZSTD_minGain(srcSize, strategy);
|
||||
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
U32 singleStream = srcSize < 256;
|
||||
symbolEncodingType_e hType = set_compressed;
|
||||
size_t cLitSize;
|
||||
|
||||
DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
|
||||
disableLiteralCompression);
|
||||
|
||||
/* Prepare nextEntropy assuming reusing the existing table */
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
|
||||
if (disableLiteralCompression)
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
|
||||
/* small ? don't even attempt compression (speed opt) */
|
||||
# define COMPRESS_LITERALS_SIZE_MIN 63
|
||||
{ size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
|
||||
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
|
||||
RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
|
||||
{ HUF_repeat repeat = prevHuf->repeatMode;
|
||||
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
|
||||
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
|
||||
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
|
||||
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
|
||||
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
|
||||
workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
|
||||
if (repeat != HUF_repeat_none) {
|
||||
/* reused the existing table */
|
||||
hType = set_repeat;
|
||||
}
|
||||
}
|
||||
|
||||
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
if (cLitSize==1) {
|
||||
memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
|
||||
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
|
||||
if (hType == set_compressed) {
|
||||
/* using a newly constructed table */
|
||||
nextHuf->repeatMode = HUF_repeat_check;
|
||||
}
|
||||
|
||||
/* Build header */
|
||||
switch(lhSize)
|
||||
{
|
||||
case 3: /* 2 - 2 - 10 - 10 */
|
||||
{ U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
|
||||
MEM_writeLE24(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 4: /* 2 - 2 - 14 - 14 */
|
||||
{ U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
break;
|
||||
}
|
||||
case 5: /* 2 - 2 - 18 - 18 */
|
||||
{ U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
|
||||
MEM_writeLE32(ostart, lhc);
|
||||
ostart[4] = (BYTE)(cLitSize >> 10);
|
||||
break;
|
||||
}
|
||||
default: /* not possible : lhSize is {3,4,5} */
|
||||
assert(0);
|
||||
}
|
||||
return lhSize+cLitSize;
|
||||
}
|
||||
29
C/zstd/zstd_compress_literals.h
Normal file
29
C/zstd/zstd_compress_literals.h
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_COMPRESS_LITERALS_H
|
||||
#define ZSTD_COMPRESS_LITERALS_H
|
||||
|
||||
#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
|
||||
|
||||
|
||||
size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
|
||||
|
||||
size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
|
||||
ZSTD_hufCTables_t* nextHuf,
|
||||
ZSTD_strategy strategy, int disableLiteralCompression,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
void* workspace, size_t wkspSize,
|
||||
const int bmi2);
|
||||
|
||||
#endif /* ZSTD_COMPRESS_LITERALS_H */
|
||||
415
C/zstd/zstd_compress_sequences.c
Normal file
415
C/zstd/zstd_compress_sequences.c
Normal file
@@ -0,0 +1,415 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include "zstd_compress_sequences.h"
|
||||
|
||||
/**
|
||||
* -log2(x / 256) lookup table for x in [0, 256).
|
||||
* If x == 0: Return 0
|
||||
* Else: Return floor(-log2(x / 256) * 256)
|
||||
*/
|
||||
static unsigned const kInverseProbabilityLog256[256] = {
|
||||
0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
|
||||
1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
|
||||
874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
|
||||
724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
|
||||
618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
|
||||
535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
|
||||
468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
|
||||
411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
|
||||
362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
|
||||
318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
|
||||
279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
|
||||
244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
|
||||
212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
|
||||
182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
|
||||
155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
|
||||
130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
|
||||
106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
|
||||
83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
|
||||
62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
|
||||
42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
|
||||
23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
|
||||
5, 4, 2, 1,
|
||||
};
|
||||
|
||||
static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
|
||||
void const* ptr = ctable;
|
||||
U16 const* u16ptr = (U16 const*)ptr;
|
||||
U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
|
||||
return maxSymbolValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cost in bytes of encoding the normalized count header.
|
||||
* Returns an error if any of the helper functions return an error.
|
||||
*/
|
||||
static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
|
||||
size_t const nbSeq, unsigned const FSELog)
|
||||
{
|
||||
BYTE wksp[FSE_NCOUNTBOUND];
|
||||
S16 norm[MaxSeq + 1];
|
||||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
|
||||
return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cost in bits of encoding the distribution described by count
|
||||
* using the entropy bound.
|
||||
*/
|
||||
static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
|
||||
{
|
||||
unsigned cost = 0;
|
||||
unsigned s;
|
||||
for (s = 0; s <= max; ++s) {
|
||||
unsigned norm = (unsigned)((256 * count[s]) / total);
|
||||
if (count[s] != 0 && norm == 0)
|
||||
norm = 1;
|
||||
assert(count[s] < total);
|
||||
cost += count[s] * kInverseProbabilityLog256[norm];
|
||||
}
|
||||
return cost >> 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cost in bits of encoding the distribution in count using ctable.
|
||||
* Returns an error if ctable cannot represent all the symbols in count.
|
||||
*/
|
||||
static size_t ZSTD_fseBitCost(
|
||||
FSE_CTable const* ctable,
|
||||
unsigned const* count,
|
||||
unsigned const max)
|
||||
{
|
||||
unsigned const kAccuracyLog = 8;
|
||||
size_t cost = 0;
|
||||
unsigned s;
|
||||
FSE_CState_t cstate;
|
||||
FSE_initCState(&cstate, ctable);
|
||||
RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
|
||||
"Repeat FSE_CTable has maxSymbolValue %u < %u",
|
||||
ZSTD_getFSEMaxSymbolValue(ctable), max);
|
||||
for (s = 0; s <= max; ++s) {
|
||||
unsigned const tableLog = cstate.stateLog;
|
||||
unsigned const badCost = (tableLog + 1) << kAccuracyLog;
|
||||
unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
|
||||
if (count[s] == 0)
|
||||
continue;
|
||||
RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
|
||||
"Repeat FSE_CTable has Prob[%u] == 0", s);
|
||||
cost += count[s] * bitCost;
|
||||
}
|
||||
return cost >> kAccuracyLog;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the cost in bits of encoding the distribution in count using the
|
||||
* table described by norm. The max symbol support by norm is assumed >= max.
|
||||
* norm must be valid for every symbol with non-zero probability in count.
|
||||
*/
|
||||
static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
|
||||
unsigned const* count, unsigned const max)
|
||||
{
|
||||
unsigned const shift = 8 - accuracyLog;
|
||||
size_t cost = 0;
|
||||
unsigned s;
|
||||
assert(accuracyLog <= 8);
|
||||
for (s = 0; s <= max; ++s) {
|
||||
unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
|
||||
unsigned const norm256 = normAcc << shift;
|
||||
assert(norm256 > 0);
|
||||
assert(norm256 < 256);
|
||||
cost += count[s] * kInverseProbabilityLog256[norm256];
|
||||
}
|
||||
return cost >> 8;
|
||||
}
|
||||
|
||||
symbolEncodingType_e
|
||||
ZSTD_selectEncodingType(
|
||||
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
|
||||
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
|
||||
FSE_CTable const* prevCTable,
|
||||
short const* defaultNorm, U32 defaultNormLog,
|
||||
ZSTD_defaultPolicy_e const isDefaultAllowed,
|
||||
ZSTD_strategy const strategy)
|
||||
{
|
||||
ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
|
||||
if (mostFrequent == nbSeq) {
|
||||
*repeatMode = FSE_repeat_none;
|
||||
if (isDefaultAllowed && nbSeq <= 2) {
|
||||
/* Prefer set_basic over set_rle when there are 2 or less symbols,
|
||||
* since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
|
||||
* If basic encoding isn't possible, always choose RLE.
|
||||
*/
|
||||
DEBUGLOG(5, "Selected set_basic");
|
||||
return set_basic;
|
||||
}
|
||||
DEBUGLOG(5, "Selected set_rle");
|
||||
return set_rle;
|
||||
}
|
||||
if (strategy < ZSTD_lazy) {
|
||||
if (isDefaultAllowed) {
|
||||
size_t const staticFse_nbSeq_max = 1000;
|
||||
size_t const mult = 10 - strategy;
|
||||
size_t const baseLog = 3;
|
||||
size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
|
||||
assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
|
||||
assert(mult <= 9 && mult >= 7);
|
||||
if ( (*repeatMode == FSE_repeat_valid)
|
||||
&& (nbSeq < staticFse_nbSeq_max) ) {
|
||||
DEBUGLOG(5, "Selected set_repeat");
|
||||
return set_repeat;
|
||||
}
|
||||
if ( (nbSeq < dynamicFse_nbSeq_min)
|
||||
|| (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
|
||||
DEBUGLOG(5, "Selected set_basic");
|
||||
/* The format allows default tables to be repeated, but it isn't useful.
|
||||
* When using simple heuristics to select encoding type, we don't want
|
||||
* to confuse these tables with dictionaries. When running more careful
|
||||
* analysis, we don't need to waste time checking both repeating tables
|
||||
* and default tables.
|
||||
*/
|
||||
*repeatMode = FSE_repeat_none;
|
||||
return set_basic;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
|
||||
size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
|
||||
size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
|
||||
size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
|
||||
|
||||
if (isDefaultAllowed) {
|
||||
assert(!ZSTD_isError(basicCost));
|
||||
assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
|
||||
}
|
||||
assert(!ZSTD_isError(NCountCost));
|
||||
assert(compressedCost < ERROR(maxCode));
|
||||
DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
|
||||
(unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
|
||||
if (basicCost <= repeatCost && basicCost <= compressedCost) {
|
||||
DEBUGLOG(5, "Selected set_basic");
|
||||
assert(isDefaultAllowed);
|
||||
*repeatMode = FSE_repeat_none;
|
||||
return set_basic;
|
||||
}
|
||||
if (repeatCost <= compressedCost) {
|
||||
DEBUGLOG(5, "Selected set_repeat");
|
||||
assert(!ZSTD_isError(repeatCost));
|
||||
return set_repeat;
|
||||
}
|
||||
assert(compressedCost < basicCost && compressedCost < repeatCost);
|
||||
}
|
||||
DEBUGLOG(5, "Selected set_compressed");
|
||||
*repeatMode = FSE_repeat_check;
|
||||
return set_compressed;
|
||||
}
|
||||
|
||||
size_t
|
||||
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
|
||||
unsigned* count, U32 max,
|
||||
const BYTE* codeTable, size_t nbSeq,
|
||||
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
||||
const FSE_CTable* prevCTable, size_t prevCTableSize,
|
||||
void* workspace, size_t workspaceSize)
|
||||
{
|
||||
BYTE* op = (BYTE*)dst;
|
||||
const BYTE* const oend = op + dstCapacity;
|
||||
DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
|
||||
|
||||
switch (type) {
|
||||
case set_rle:
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
|
||||
RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
|
||||
*op = codeTable[0];
|
||||
return 1;
|
||||
case set_repeat:
|
||||
memcpy(nextCTable, prevCTable, prevCTableSize);
|
||||
return 0;
|
||||
case set_basic:
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
|
||||
return 0;
|
||||
case set_compressed: {
|
||||
S16 norm[MaxSeq + 1];
|
||||
size_t nbSeq_1 = nbSeq;
|
||||
const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
|
||||
if (count[codeTable[nbSeq-1]] > 1) {
|
||||
count[codeTable[nbSeq-1]]--;
|
||||
nbSeq_1--;
|
||||
}
|
||||
assert(nbSeq_1 > 1);
|
||||
FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
|
||||
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
|
||||
FORWARD_IF_ERROR(NCountSize);
|
||||
FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
|
||||
return NCountSize;
|
||||
}
|
||||
}
|
||||
default: assert(0); RETURN_ERROR(GENERIC);
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
ZSTD_encodeSequences_body(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
||||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
||||
{
|
||||
BIT_CStream_t blockStream;
|
||||
FSE_CState_t stateMatchLength;
|
||||
FSE_CState_t stateOffsetBits;
|
||||
FSE_CState_t stateLitLength;
|
||||
|
||||
RETURN_ERROR_IF(
|
||||
ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
|
||||
dstSize_tooSmall, "not enough space remaining");
|
||||
DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
|
||||
(int)(blockStream.endPtr - blockStream.startPtr),
|
||||
(unsigned)dstCapacity);
|
||||
|
||||
/* first symbols */
|
||||
FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
|
||||
FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
|
||||
FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream);
|
||||
if (longOffsets) {
|
||||
U32 const ofBits = ofCodeTable[nbSeq-1];
|
||||
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||
if (extraBits) {
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
|
||||
BIT_flushBits(&blockStream);
|
||||
}
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
|
||||
ofBits - extraBits);
|
||||
} else {
|
||||
BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
|
||||
}
|
||||
BIT_flushBits(&blockStream);
|
||||
|
||||
{ size_t n;
|
||||
for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
|
||||
BYTE const llCode = llCodeTable[n];
|
||||
BYTE const ofCode = ofCodeTable[n];
|
||||
BYTE const mlCode = mlCodeTable[n];
|
||||
U32 const llBits = LL_bits[llCode];
|
||||
U32 const ofBits = ofCode;
|
||||
U32 const mlBits = ML_bits[mlCode];
|
||||
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
|
||||
(unsigned)sequences[n].litLength,
|
||||
(unsigned)sequences[n].matchLength + MINMATCH,
|
||||
(unsigned)sequences[n].offset);
|
||||
/* 32b*/ /* 64b*/
|
||||
/* (7)*/ /* (7)*/
|
||||
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
|
||||
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
|
||||
if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
|
||||
FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
|
||||
if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
BIT_addBits(&blockStream, sequences[n].litLength, llBits);
|
||||
if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
|
||||
BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
|
||||
if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
|
||||
if (longOffsets) {
|
||||
int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
|
||||
if (extraBits) {
|
||||
BIT_addBits(&blockStream, sequences[n].offset, extraBits);
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
}
|
||||
BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
|
||||
ofBits - extraBits); /* 31 */
|
||||
} else {
|
||||
BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
|
||||
}
|
||||
BIT_flushBits(&blockStream); /* (7)*/
|
||||
DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
|
||||
} }
|
||||
|
||||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
|
||||
FSE_flushCState(&blockStream, &stateMatchLength);
|
||||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
|
||||
FSE_flushCState(&blockStream, &stateOffsetBits);
|
||||
DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
|
||||
FSE_flushCState(&blockStream, &stateLitLength);
|
||||
|
||||
{ size_t const streamSize = BIT_closeCStream(&blockStream);
|
||||
RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
|
||||
return streamSize;
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
ZSTD_encodeSequences_default(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
||||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
||||
{
|
||||
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
||||
CTable_MatchLength, mlCodeTable,
|
||||
CTable_OffsetBits, ofCodeTable,
|
||||
CTable_LitLength, llCodeTable,
|
||||
sequences, nbSeq, longOffsets);
|
||||
}
|
||||
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
ZSTD_encodeSequences_bmi2(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
||||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||
seqDef const* sequences, size_t nbSeq, int longOffsets)
|
||||
{
|
||||
return ZSTD_encodeSequences_body(dst, dstCapacity,
|
||||
CTable_MatchLength, mlCodeTable,
|
||||
CTable_OffsetBits, ofCodeTable,
|
||||
CTable_LitLength, llCodeTable,
|
||||
sequences, nbSeq, longOffsets);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
size_t ZSTD_encodeSequences(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
||||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
|
||||
CTable_MatchLength, mlCodeTable,
|
||||
CTable_OffsetBits, ofCodeTable,
|
||||
CTable_LitLength, llCodeTable,
|
||||
sequences, nbSeq, longOffsets);
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
return ZSTD_encodeSequences_default(dst, dstCapacity,
|
||||
CTable_MatchLength, mlCodeTable,
|
||||
CTable_OffsetBits, ofCodeTable,
|
||||
CTable_LitLength, llCodeTable,
|
||||
sequences, nbSeq, longOffsets);
|
||||
}
|
||||
47
C/zstd/zstd_compress_sequences.h
Normal file
47
C/zstd/zstd_compress_sequences.h
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_COMPRESS_SEQUENCES_H
|
||||
#define ZSTD_COMPRESS_SEQUENCES_H
|
||||
|
||||
#include "fse.h" /* FSE_repeat, FSE_CTable */
|
||||
#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
|
||||
|
||||
typedef enum {
|
||||
ZSTD_defaultDisallowed = 0,
|
||||
ZSTD_defaultAllowed = 1
|
||||
} ZSTD_defaultPolicy_e;
|
||||
|
||||
symbolEncodingType_e
|
||||
ZSTD_selectEncodingType(
|
||||
FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
|
||||
size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
|
||||
FSE_CTable const* prevCTable,
|
||||
short const* defaultNorm, U32 defaultNormLog,
|
||||
ZSTD_defaultPolicy_e const isDefaultAllowed,
|
||||
ZSTD_strategy const strategy);
|
||||
|
||||
size_t
|
||||
ZSTD_buildCTable(void* dst, size_t dstCapacity,
|
||||
FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
|
||||
unsigned* count, U32 max,
|
||||
const BYTE* codeTable, size_t nbSeq,
|
||||
const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
|
||||
const FSE_CTable* prevCTable, size_t prevCTableSize,
|
||||
void* workspace, size_t workspaceSize);
|
||||
|
||||
size_t ZSTD_encodeSequences(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
|
||||
FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
|
||||
seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
|
||||
|
||||
#endif /* ZSTD_COMPRESS_SEQUENCES_H */
|
||||
@@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
|
||||
sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
|
||||
RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
|
||||
frameParameter_unsupported);
|
||||
|
||||
return skippableHeaderSize + sizeU32;
|
||||
{
|
||||
size_t const skippableSize = skippableHeaderSize + sizeU32;
|
||||
RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
|
||||
return skippableSize;
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_findDecompressedSize() :
|
||||
@@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
|
||||
|
||||
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
||||
if (ZSTD_isError(skippableSize))
|
||||
return skippableSize;
|
||||
if (srcSize < skippableSize) {
|
||||
if (ZSTD_isError(skippableSize)) {
|
||||
return ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
assert(skippableSize <= srcSize);
|
||||
|
||||
src = (const BYTE *)src + skippableSize;
|
||||
srcSize -= skippableSize;
|
||||
@@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
|
||||
if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
|
||||
&& (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
|
||||
assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
|
||||
frameSizeInfo.compressedSize <= srcSize);
|
||||
return frameSizeInfo;
|
||||
} else {
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
@@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
|
||||
return frameSizeInfo.compressedSize;
|
||||
}
|
||||
|
||||
|
||||
/** ZSTD_decompressBound() :
|
||||
* compatible with legacy mode
|
||||
* `src` must point to the start of a ZSTD frame or a skippeable frame
|
||||
@@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
|
||||
unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
|
||||
if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
|
||||
return ZSTD_CONTENTSIZE_ERROR;
|
||||
assert(srcSize >= compressedSize);
|
||||
src = (const BYTE*)src + compressedSize;
|
||||
srcSize -= compressedSize;
|
||||
bound += decompressedBound;
|
||||
@@ -738,9 +742,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
|
||||
(unsigned)magicNumber, ZSTD_MAGICNUMBER);
|
||||
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
|
||||
size_t const skippableSize = readSkippableFrameSize(src, srcSize);
|
||||
if (ZSTD_isError(skippableSize))
|
||||
return skippableSize;
|
||||
RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong);
|
||||
FORWARD_IF_ERROR(skippableSize);
|
||||
assert(skippableSize <= srcSize);
|
||||
|
||||
src = (const BYTE *)src + skippableSize;
|
||||
srcSize -= skippableSize;
|
||||
@@ -906,6 +909,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
{ blockProperties_t bp;
|
||||
size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
|
||||
if (ZSTD_isError(cBlockSize)) return cBlockSize;
|
||||
RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
|
||||
dctx->expected = cBlockSize;
|
||||
dctx->bType = bp.blockType;
|
||||
dctx->rleSize = bp.origSize;
|
||||
@@ -950,6 +954,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
|
||||
RETURN_ERROR(corruption_detected);
|
||||
}
|
||||
if (ZSTD_isError(rSize)) return rSize;
|
||||
RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
|
||||
DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
|
||||
dctx->decodedSize += rSize;
|
||||
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
|
||||
|
||||
@@ -505,7 +505,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
*nbSeqPtr = nbSeq;
|
||||
|
||||
/* FSE table descriptors */
|
||||
RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */
|
||||
RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
|
||||
{ symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
|
||||
symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
|
||||
symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
|
||||
@@ -637,9 +637,10 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
|
||||
|
||||
/* copy Literals */
|
||||
ZSTD_copy8(op, *litPtr);
|
||||
if (sequence.litLength > 8)
|
||||
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
else
|
||||
ZSTD_copy8(op, *litPtr);
|
||||
op = oLitEnd;
|
||||
*litPtr = iLitEnd; /* update for next sequence */
|
||||
|
||||
@@ -686,13 +687,13 @@ size_t ZSTD_execSequence(BYTE* op,
|
||||
|
||||
if (oMatchEnd > oend-(16-MINMATCH)) {
|
||||
if (op < oend_w) {
|
||||
ZSTD_wildcopy(op, match, oend_w - op);
|
||||
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||
match += oend_w - op;
|
||||
op = oend_w;
|
||||
}
|
||||
while (op < oMatchEnd) *op++ = *match++;
|
||||
} else {
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||
}
|
||||
return sequenceLength;
|
||||
}
|
||||
@@ -717,9 +718,11 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
|
||||
|
||||
/* copy Literals */
|
||||
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
||||
if (sequence.litLength > 8)
|
||||
ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
|
||||
else
|
||||
ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
|
||||
|
||||
op = oLitEnd;
|
||||
*litPtr = iLitEnd; /* update for next sequence */
|
||||
|
||||
@@ -766,13 +769,13 @@ size_t ZSTD_execSequenceLong(BYTE* op,
|
||||
|
||||
if (oMatchEnd > oend-(16-MINMATCH)) {
|
||||
if (op < oend_w) {
|
||||
ZSTD_wildcopy(op, match, oend_w - op);
|
||||
ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst);
|
||||
match += oend_w - op;
|
||||
op = oend_w;
|
||||
}
|
||||
while (op < oMatchEnd) *op++ = *match++;
|
||||
} else {
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
|
||||
ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */
|
||||
}
|
||||
return sequenceLength;
|
||||
}
|
||||
@@ -889,6 +892,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
DONT_VECTORIZE
|
||||
ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
void* dst, size_t maxDstSize,
|
||||
const void* seqStart, size_t seqSize, int nbSeq,
|
||||
@@ -918,6 +922,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
|
||||
ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
|
||||
|
||||
ZSTD_STATIC_ASSERT(
|
||||
BIT_DStream_unfinished < BIT_DStream_completed &&
|
||||
BIT_DStream_endOfBuffer < BIT_DStream_completed &&
|
||||
BIT_DStream_completed < BIT_DStream_overflow);
|
||||
|
||||
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
|
||||
nbSeq--;
|
||||
{ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
|
||||
@@ -930,6 +939,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
|
||||
/* check if reached exact end */
|
||||
DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
|
||||
RETURN_ERROR_IF(nbSeq, corruption_detected);
|
||||
RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
|
||||
/* save reps for next block */
|
||||
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
|
||||
}
|
||||
@@ -1131,6 +1141,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
||||
|
||||
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
DONT_VECTORIZE
|
||||
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t maxDstSize,
|
||||
const void* seqStart, size_t seqSize, int nbSeq,
|
||||
|
||||
@@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
}
|
||||
}
|
||||
} }
|
||||
}
|
||||
|
||||
|
||||
@@ -63,7 +62,10 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* ip = istart;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 prefixLowestIndex = ms->window.dictLimit;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 lowestValid = ms->window.dictLimit;
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
|
||||
const BYTE* const prefixLowest = base + prefixLowestIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
@@ -95,8 +97,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
dictCParams->chainLog : hBitsS;
|
||||
const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
|
||||
|
||||
assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
|
||||
|
||||
/* if a dictionary is attached, it must be within window range */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
assert(lowestValid + maxDistance >= endIndex);
|
||||
}
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
@@ -138,7 +147,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
@@ -147,7 +156,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
&& ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
|
||||
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
goto _match_stored;
|
||||
}
|
||||
|
||||
@@ -170,8 +179,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
if (matchIndexS > prefixLowestIndex) {
|
||||
/* check prefix short match */
|
||||
@@ -186,16 +194,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(
|
||||
|
||||
if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
|
||||
goto _search_next_long;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
continue;
|
||||
|
||||
_search_next_long:
|
||||
|
||||
{
|
||||
size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
{ size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
|
||||
U32 const matchIndexL3 = hashLong[hl3];
|
||||
const BYTE* matchL3 = base + matchIndexL3;
|
||||
@@ -221,9 +227,7 @@ _search_next_long:
|
||||
offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
|
||||
while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
|
||||
goto _match_found;
|
||||
}
|
||||
}
|
||||
}
|
||||
} } }
|
||||
|
||||
/* if no long +1 match, explore the short match we found */
|
||||
if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
|
||||
@@ -242,7 +246,7 @@ _match_found:
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
|
||||
_match_stored:
|
||||
/* match found */
|
||||
@@ -250,11 +254,14 @@ _match_stored:
|
||||
anchor = ip;
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Fill Table */
|
||||
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
|
||||
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
|
||||
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = current+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
if (dictMode == ZSTD_dictMatchState) {
|
||||
@@ -278,8 +285,7 @@ _match_stored:
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} }
|
||||
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
while ( (ip <= ilimit)
|
||||
@@ -294,14 +300,15 @@ _match_stored:
|
||||
ip += rLength;
|
||||
anchor = ip;
|
||||
continue; /* faster when present ... (?) */
|
||||
} } } }
|
||||
} } }
|
||||
} /* while (ip < ilimit) */
|
||||
|
||||
/* save reps for next block */
|
||||
rep[0] = offset_1 ? offset_1 : offsetSaved;
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
@@ -360,10 +367,15 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
const BYTE* anchor = istart;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - 8;
|
||||
const U32 prefixStartIndex = ms->window.dictLimit;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 lowestValid = ms->window.lowLimit;
|
||||
const U32 lowLimit = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
|
||||
const U32 dictStartIndex = lowLimit;
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const U32 dictStartIndex = ms->window.lowLimit;
|
||||
const BYTE* const dictBase = ms->window.dictBase;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const BYTE* const dictEnd = dictBase + prefixStartIndex;
|
||||
@@ -371,6 +383,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
|
||||
|
||||
/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
|
||||
@@ -396,7 +412,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
} else {
|
||||
if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
|
||||
const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
|
||||
@@ -407,7 +423,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
|
||||
} else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
|
||||
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
|
||||
@@ -432,23 +448,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
}
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
|
||||
} else {
|
||||
ip += ((ip-anchor) >> kSearchStrength) + 1;
|
||||
continue;
|
||||
} }
|
||||
|
||||
/* found a match : store it */
|
||||
/* move to next sequence start */
|
||||
ip += mLength;
|
||||
anchor = ip;
|
||||
|
||||
if (ip <= ilimit) {
|
||||
/* Fill Table */
|
||||
hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
|
||||
hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
|
||||
hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
/* Complementary insertion */
|
||||
/* done after iLimit test, as candidates could be > iend-8 */
|
||||
{ U32 const indexToInsert = current+2;
|
||||
hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
|
||||
hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
|
||||
hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
|
||||
hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
|
||||
}
|
||||
|
||||
/* check immediate repcode */
|
||||
while (ip <= ilimit) {
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
@@ -475,7 +495,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
rep[1] = offset_2;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
|
||||
|
||||
void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
void const* end, ZSTD_dictTableLoadMethod_e dtlm)
|
||||
const void* const end,
|
||||
ZSTD_dictTableLoadMethod_e dtlm)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
@@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
} } } }
|
||||
}
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
size_t ZSTD_compressBlock_fast_generic(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic(
|
||||
const BYTE* ip0 = istart;
|
||||
const BYTE* ip1;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 prefixStartIndex = ms->window.dictLimit;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 validStartIndex = ms->window.dictLimit;
|
||||
const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - HASH_READ_SIZE;
|
||||
@@ -165,7 +170,7 @@ _match: /* Requires: ip0, match0, offcode */
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
@@ -222,8 +227,15 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
|
||||
const U32 dictHLog = dictCParams->hashLog;
|
||||
|
||||
/* otherwise, we would get index underflow when translating a dict index
|
||||
* into a local index */
|
||||
/* if a dictionary is still attached, it necessarily means that
|
||||
* it is within window size. So we just check it. */
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
|
||||
assert(endIndex - prefixStartIndex <= maxDistance);
|
||||
(void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
|
||||
|
||||
/* ensure there will be no no underflow
|
||||
* when translating a dict index into a local index */
|
||||
assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
|
||||
|
||||
/* init */
|
||||
@@ -251,7 +263,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
} else if ( (matchIndex <= prefixStartIndex) ) {
|
||||
size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
|
||||
U32 const dictMatchIndex = dictHashTable[dictHash];
|
||||
@@ -271,7 +283,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
} /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
} else if (MEM_read32(match) != MEM_read32(ip)) {
|
||||
/* it's not a match, and we're not going to check the dictionary */
|
||||
@@ -286,7 +298,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
&& (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
}
|
||||
|
||||
/* match found */
|
||||
@@ -327,7 +339,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
|
||||
rep[1] = offset_2 ? offset_2 : offsetSaved;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
@@ -366,15 +378,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
const BYTE* ip = istart;
|
||||
const BYTE* anchor = istart;
|
||||
const U32 dictStartIndex = ms->window.lowLimit;
|
||||
const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 validLow = ms->window.lowLimit;
|
||||
const U32 lowLimit = (endIndex - validLow > maxDistance) ? endIndex - maxDistance : validLow;
|
||||
const U32 dictStartIndex = lowLimit;
|
||||
const BYTE* const dictStart = dictBase + dictStartIndex;
|
||||
const U32 prefixStartIndex = ms->window.dictLimit;
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
|
||||
const BYTE* const prefixStart = base + prefixStartIndex;
|
||||
const BYTE* const dictEnd = dictBase + prefixStartIndex;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
const BYTE* const ilimit = iend - 8;
|
||||
U32 offset_1=rep[0], offset_2=rep[1];
|
||||
|
||||
/* switch to "regular" variant if extDict is invalidated due to maxDistance */
|
||||
if (prefixStartIndex == dictStartIndex)
|
||||
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
|
||||
|
||||
/* Search Loop */
|
||||
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
|
||||
const size_t h = ZSTD_hashPtr(ip, hlog, mls);
|
||||
@@ -394,7 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
ip++;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH);
|
||||
} else {
|
||||
if ( (matchIndex < dictStartIndex) ||
|
||||
(MEM_read32(match) != MEM_read32(ip)) ) {
|
||||
@@ -410,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
offset = current - matchIndex;
|
||||
offset_2 = offset_1;
|
||||
offset_1 = offset;
|
||||
ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
|
||||
} }
|
||||
|
||||
/* found a match : store it */
|
||||
@@ -445,7 +466,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
rep[1] = offset_2;
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
#endif
|
||||
#include "xxhash.h" /* XXH_reset, update, digest */
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
|
||||
* Shared functions to include for inlining
|
||||
*********************************************/
|
||||
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
|
||||
|
||||
#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
|
||||
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
|
||||
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
|
||||
|
||||
#define WILDCOPY_OVERLENGTH 8
|
||||
#define VECLEN 16
|
||||
|
||||
typedef enum {
|
||||
ZSTD_no_overlap,
|
||||
ZSTD_overlap_src_before_dst,
|
||||
/* ZSTD_overlap_dst_before_src, */
|
||||
} ZSTD_overlap_e;
|
||||
|
||||
/*! ZSTD_wildcopy() :
|
||||
* custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
|
||||
#define WILDCOPY_OVERLENGTH 8
|
||||
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
|
||||
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
||||
void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||
{
|
||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
BYTE* op = (BYTE*)dst;
|
||||
BYTE* const oend = op + length;
|
||||
do
|
||||
COPY8(op, ip)
|
||||
while (op < oend);
|
||||
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
||||
if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
|
||||
do
|
||||
COPY8(op, ip)
|
||||
while (op < oend);
|
||||
}
|
||||
else {
|
||||
if ((length & 8) == 0)
|
||||
COPY8(op, ip);
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_wildcopy_16min() :
|
||||
* same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
|
||||
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
|
||||
void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
|
||||
{
|
||||
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
BYTE* op = (BYTE*)dst;
|
||||
BYTE* const oend = op + length;
|
||||
|
||||
assert(length >= 8);
|
||||
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
|
||||
|
||||
if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
|
||||
do
|
||||
COPY8(op, ip)
|
||||
while (op < oend);
|
||||
}
|
||||
else {
|
||||
if ((length & 8) == 0)
|
||||
COPY8(op, ip);
|
||||
do {
|
||||
COPY16(op, ip);
|
||||
}
|
||||
while (op < oend);
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
|
||||
|
||||
@@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
U32 const windowValid = ms->window.lowLimit;
|
||||
U32 const maxDistance = 1U << cParams->windowLog;
|
||||
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
||||
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
|
||||
current, dictLimit, windowLow);
|
||||
@@ -239,7 +242,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
U32 const maxDistance = 1U << cParams->windowLog;
|
||||
U32 const windowValid = ms->window.lowLimit;
|
||||
U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
|
||||
|
||||
U32* const bt = ms->chainTable;
|
||||
U32 const btLog = cParams->chainLog - 1;
|
||||
@@ -490,8 +495,10 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const U32 lowLimit = ms->window.lowLimit;
|
||||
const U32 current = (U32)(ip-base);
|
||||
const U32 maxDistance = 1U << cParams->windowLog;
|
||||
const U32 lowValid = ms->window.lowLimit;
|
||||
const U32 lowLimit = (current - lowValid > maxDistance) ? current - maxDistance : lowValid;
|
||||
const U32 minChain = current > chainSize ? current - chainSize : 0;
|
||||
U32 nbAttempts = 1U << cParams->searchLog;
|
||||
size_t ml=4-1;
|
||||
@@ -653,7 +660,6 @@ size_t ZSTD_compressBlock_lazy_generic(
|
||||
|
||||
/* init */
|
||||
ip += (dictAndPrefixLength == 0);
|
||||
ms->nextToUpdate3 = ms->nextToUpdate;
|
||||
if (dictMode == ZSTD_noDict) {
|
||||
U32 const maxRep = (U32)(ip - prefixLowest);
|
||||
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
|
||||
@@ -933,7 +939,6 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
U32 offset_1 = rep[0], offset_2 = rep[1];
|
||||
|
||||
/* init */
|
||||
ms->nextToUpdate3 = ms->nextToUpdate;
|
||||
ip += (ip == prefixStart);
|
||||
|
||||
/* Match Loop */
|
||||
|
||||
@@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
|
||||
if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
|
||||
U32 const ldmHSize = 1U << params->hashLog;
|
||||
U32 const correction = ZSTD_window_correctOverflow(
|
||||
&ldmState->window, /* cycleLog */ 0, maxDist, src);
|
||||
&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
|
||||
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
|
||||
}
|
||||
/* 2. We enforce the maximum offset allowed.
|
||||
|
||||
@@ -238,6 +238,10 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
|
||||
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
break;
|
||||
}
|
||||
if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
|
||||
frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
|
||||
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
||||
}
|
||||
return frameSizeInfo;
|
||||
}
|
||||
|
||||
|
||||
@@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
||||
* to provide a cost which is directly comparable to a match ending at same position */
|
||||
static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
|
||||
{
|
||||
if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
|
||||
if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);
|
||||
|
||||
/* dynamic statistics */
|
||||
{ U32 const llCode = ZSTD_LLcode(litLength);
|
||||
int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
|
||||
+ WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
||||
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
||||
int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
|
||||
+ (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
|
||||
- (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
||||
#if 1
|
||||
return contribution;
|
||||
#else
|
||||
@@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
|
||||
const optState_t* const optPtr,
|
||||
int optLevel)
|
||||
{
|
||||
int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
||||
int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
|
||||
+ ZSTD_litLengthContribution(litLength, optPtr, optLevel);
|
||||
return contribution;
|
||||
}
|
||||
@@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
||||
|
||||
/* Update hashTable3 up to ip (excluded)
|
||||
Assumption : always within prefix (i.e. not within extDict) */
|
||||
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
|
||||
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* const ip)
|
||||
{
|
||||
U32* const hashTable3 = ms->hashTable3;
|
||||
U32 const hashLog3 = ms->hashLog3;
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 idx = ms->nextToUpdate3;
|
||||
U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
|
||||
U32 idx = *nextToUpdate3;
|
||||
U32 const target = (U32)(ip - base);
|
||||
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
|
||||
assert(hashLog3 > 0);
|
||||
|
||||
@@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
|
||||
idx++;
|
||||
}
|
||||
|
||||
*nextToUpdate3 = target;
|
||||
return hashTable3[hash3];
|
||||
}
|
||||
|
||||
@@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1(
|
||||
} }
|
||||
|
||||
*smallerPtr = *largerPtr = 0;
|
||||
if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
||||
assert(matchEndIdx > current + 8);
|
||||
return matchEndIdx - (current + 8);
|
||||
{ U32 positions = 0;
|
||||
if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */
|
||||
assert(matchEndIdx > current + 8);
|
||||
return MAX(positions, matchEndIdx - (current + 8));
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
@@ -520,8 +525,13 @@ void ZSTD_updateTree_internal(
|
||||
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
||||
idx, target, dictMode);
|
||||
|
||||
while(idx < target)
|
||||
idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
||||
while(idx < target) {
|
||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
||||
assert(idx < (U32)(idx + forward));
|
||||
idx += forward;
|
||||
}
|
||||
assert((size_t)(ip - base) <= (size_t)(U32)(-1));
|
||||
assert((size_t)(iend - base) <= (size_t)(U32)(-1));
|
||||
ms->nextToUpdate = target;
|
||||
}
|
||||
|
||||
@@ -531,16 +541,18 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
U32 ZSTD_insertBtAndGetAllMatches (
|
||||
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
||||
ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
|
||||
U32 rep[ZSTD_REP_NUM],
|
||||
const U32 rep[ZSTD_REP_NUM],
|
||||
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
||||
ZSTD_match_t* matches,
|
||||
const U32 lengthToBeat,
|
||||
U32 const mls /* template */)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||
U32 const maxDistance = 1U << cParams->windowLog;
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 const current = (U32)(ip-base);
|
||||
U32 const hashLog = cParams->hashLog;
|
||||
@@ -556,8 +568,9 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
U32 const dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
U32 const btLow = btMask >= current ? 0 : current - btMask;
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
U32 const btLow = (btMask >= current) ? 0 : current - btMask;
|
||||
U32 const windowValid = ms->window.lowLimit;
|
||||
U32 const windowLow = ((current - windowValid) > maxDistance) ? current - maxDistance : windowValid;
|
||||
U32 const matchLow = windowLow ? windowLow : 1;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
||||
@@ -627,7 +640,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
|
||||
/* HC3 match finder */
|
||||
if ((mls == 3) /*static*/ && (bestLength < mls)) {
|
||||
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
|
||||
U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
|
||||
if ((matchIndex3 >= matchLow)
|
||||
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
|
||||
size_t mlen;
|
||||
@@ -653,9 +666,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
(ip+mlen == iLimit) ) { /* best possible length */
|
||||
ms->nextToUpdate = current+1; /* skip insertion */
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} } }
|
||||
/* no dictMatchState lookup: dicts don't have a populated HC3 table */
|
||||
}
|
||||
|
||||
@@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
|
||||
|
||||
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
||||
ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */
|
||||
ZSTD_matchState_t* ms,
|
||||
U32* nextToUpdate3,
|
||||
const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
|
||||
U32 rep[ZSTD_REP_NUM], U32 const ll0,
|
||||
ZSTD_match_t* matches, U32 const lengthToBeat)
|
||||
const U32 rep[ZSTD_REP_NUM],
|
||||
U32 const ll0,
|
||||
U32 const lengthToBeat)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32 const matchLengthSearch = cParams->minMatch;
|
||||
@@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
||||
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
|
||||
switch(matchLengthSearch)
|
||||
{
|
||||
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
|
||||
case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
|
||||
default :
|
||||
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
|
||||
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
|
||||
case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
|
||||
case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
|
||||
case 7 :
|
||||
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
|
||||
case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
|
||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
||||
U32 nextToUpdate3 = ms->nextToUpdate;
|
||||
|
||||
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
||||
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
||||
@@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
||||
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
|
||||
assert(optLevel <= 2);
|
||||
ms->nextToUpdate3 = ms->nextToUpdate;
|
||||
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
|
||||
ip += (ip==prefixStart);
|
||||
|
||||
@@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
/* find first match */
|
||||
{ U32 const litlen = (U32)(ip - anchor);
|
||||
U32 const ll0 = !litlen;
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
|
||||
if (!nbMatches) { ip++; continue; }
|
||||
|
||||
/* initialize opt[0] */
|
||||
@@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
|
||||
U32 const previousPrice = opt[cur].price;
|
||||
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
|
||||
U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
|
||||
U32 matchNb;
|
||||
if (!nbMatches) {
|
||||
DEBUGLOG(7, "rPos:%u : no match found", cur);
|
||||
@@ -1094,7 +1108,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
||||
} /* while (ip < ilimit) */
|
||||
|
||||
/* Return the last literals size */
|
||||
return iend - anchor;
|
||||
return (size_t)(iend - anchor);
|
||||
}
|
||||
|
||||
|
||||
@@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
ms->window.dictLimit += (U32)srcSize;
|
||||
ms->window.lowLimit = ms->window.dictLimit;
|
||||
ms->nextToUpdate = ms->window.dictLimit;
|
||||
ms->nextToUpdate3 = ms->window.dictLimit;
|
||||
|
||||
/* re-inforce weight of collected statistics */
|
||||
ZSTD_upscaleStats(&ms->opt);
|
||||
|
||||
@@ -1073,99 +1073,102 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const U16* DTable)
|
||||
{
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* op = ostart;
|
||||
BYTE* const omax = op + maxDstSize;
|
||||
BYTE* const olimit = omax-15;
|
||||
|
||||
const void* ptr = DTable;
|
||||
const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
U32 reloadStatus;
|
||||
|
||||
/* Init */
|
||||
|
||||
const U16* jumpTable = (const U16*)cSrc;
|
||||
const size_t length1 = FSE_readLE16(jumpTable);
|
||||
const size_t length2 = FSE_readLE16(jumpTable+1);
|
||||
const size_t length3 = FSE_readLE16(jumpTable+2);
|
||||
const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
|
||||
const char* const start1 = (const char*)(cSrc) + 6;
|
||||
const char* const start2 = start1 + length1;
|
||||
const char* const start3 = start2 + length2;
|
||||
const char* const start4 = start3 + length3;
|
||||
FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
|
||||
|
||||
if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
|
||||
|
||||
errorCode = FSE_initDStream(&bitD1, start1, length1);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD2, start2, length2);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD3, start3, length3);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD4, start4, length4);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
|
||||
reloadStatus=FSE_reloadDStream(&bitD2);
|
||||
|
||||
/* 16 symbols per loop */
|
||||
for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
|
||||
op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
|
||||
if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
|
||||
{
|
||||
#define HUF_DECODE_SYMBOL_0(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* op = ostart;
|
||||
BYTE* const omax = op + maxDstSize;
|
||||
BYTE* const olimit = omax-15;
|
||||
|
||||
#define HUF_DECODE_SYMBOL_1(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
|
||||
if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
|
||||
const void* ptr = DTable;
|
||||
const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
U32 reloadStatus;
|
||||
|
||||
#define HUF_DECODE_SYMBOL_2(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
|
||||
if (FSE_32bits()) FSE_reloadDStream(&Dstream)
|
||||
/* Init */
|
||||
|
||||
HUF_DECODE_SYMBOL_1( 0, bitD1);
|
||||
HUF_DECODE_SYMBOL_1( 1, bitD2);
|
||||
HUF_DECODE_SYMBOL_1( 2, bitD3);
|
||||
HUF_DECODE_SYMBOL_1( 3, bitD4);
|
||||
HUF_DECODE_SYMBOL_2( 4, bitD1);
|
||||
HUF_DECODE_SYMBOL_2( 5, bitD2);
|
||||
HUF_DECODE_SYMBOL_2( 6, bitD3);
|
||||
HUF_DECODE_SYMBOL_2( 7, bitD4);
|
||||
HUF_DECODE_SYMBOL_1( 8, bitD1);
|
||||
HUF_DECODE_SYMBOL_1( 9, bitD2);
|
||||
HUF_DECODE_SYMBOL_1(10, bitD3);
|
||||
HUF_DECODE_SYMBOL_1(11, bitD4);
|
||||
HUF_DECODE_SYMBOL_0(12, bitD1);
|
||||
HUF_DECODE_SYMBOL_0(13, bitD2);
|
||||
HUF_DECODE_SYMBOL_0(14, bitD3);
|
||||
HUF_DECODE_SYMBOL_0(15, bitD4);
|
||||
}
|
||||
const U16* jumpTable = (const U16*)cSrc;
|
||||
const size_t length1 = FSE_readLE16(jumpTable);
|
||||
const size_t length2 = FSE_readLE16(jumpTable+1);
|
||||
const size_t length3 = FSE_readLE16(jumpTable+2);
|
||||
const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !!
|
||||
const char* const start1 = (const char*)(cSrc) + 6;
|
||||
const char* const start2 = start1 + length1;
|
||||
const char* const start3 = start2 + length2;
|
||||
const char* const start4 = start3 + length3;
|
||||
FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
|
||||
|
||||
if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
|
||||
return (size_t)-FSE_ERROR_corruptionDetected;
|
||||
if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
|
||||
|
||||
/* tail */
|
||||
{
|
||||
// bitTail = bitD1; // *much* slower : -20% !??!
|
||||
FSE_DStream_t bitTail;
|
||||
bitTail.ptr = bitD1.ptr;
|
||||
bitTail.bitsConsumed = bitD1.bitsConsumed;
|
||||
bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
|
||||
bitTail.start = start1;
|
||||
for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
|
||||
errorCode = FSE_initDStream(&bitD1, start1, length1);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD2, start2, length2);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD3, start3, length3);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
errorCode = FSE_initDStream(&bitD4, start4, length4);
|
||||
if (FSE_isError(errorCode)) return errorCode;
|
||||
|
||||
reloadStatus=FSE_reloadDStream(&bitD2);
|
||||
|
||||
/* 16 symbols per loop */
|
||||
for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit); /* D2-3-4 are supposed to be synchronized and finish together */
|
||||
op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
|
||||
{
|
||||
HUF_DECODE_SYMBOL_0(0, bitTail);
|
||||
#define HUF_DECODE_SYMBOL_0(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
|
||||
|
||||
#define HUF_DECODE_SYMBOL_1(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
|
||||
if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
|
||||
|
||||
#define HUF_DECODE_SYMBOL_2(n, Dstream) \
|
||||
op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
|
||||
if (FSE_32bits()) FSE_reloadDStream(&Dstream)
|
||||
|
||||
HUF_DECODE_SYMBOL_1( 0, bitD1);
|
||||
HUF_DECODE_SYMBOL_1( 1, bitD2);
|
||||
HUF_DECODE_SYMBOL_1( 2, bitD3);
|
||||
HUF_DECODE_SYMBOL_1( 3, bitD4);
|
||||
HUF_DECODE_SYMBOL_2( 4, bitD1);
|
||||
HUF_DECODE_SYMBOL_2( 5, bitD2);
|
||||
HUF_DECODE_SYMBOL_2( 6, bitD3);
|
||||
HUF_DECODE_SYMBOL_2( 7, bitD4);
|
||||
HUF_DECODE_SYMBOL_1( 8, bitD1);
|
||||
HUF_DECODE_SYMBOL_1( 9, bitD2);
|
||||
HUF_DECODE_SYMBOL_1(10, bitD3);
|
||||
HUF_DECODE_SYMBOL_1(11, bitD4);
|
||||
HUF_DECODE_SYMBOL_0(12, bitD1);
|
||||
HUF_DECODE_SYMBOL_0(13, bitD2);
|
||||
HUF_DECODE_SYMBOL_0(14, bitD3);
|
||||
HUF_DECODE_SYMBOL_0(15, bitD4);
|
||||
}
|
||||
|
||||
if (FSE_endOfDStream(&bitTail))
|
||||
return op-ostart;
|
||||
if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */
|
||||
return (size_t)-FSE_ERROR_corruptionDetected;
|
||||
|
||||
/* tail */
|
||||
{
|
||||
// bitTail = bitD1; // *much* slower : -20% !??!
|
||||
FSE_DStream_t bitTail;
|
||||
bitTail.ptr = bitD1.ptr;
|
||||
bitTail.bitsConsumed = bitD1.bitsConsumed;
|
||||
bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer
|
||||
bitTail.start = start1;
|
||||
for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
|
||||
{
|
||||
HUF_DECODE_SYMBOL_0(0, bitTail);
|
||||
}
|
||||
|
||||
if (FSE_endOfDStream(&bitTail))
|
||||
return op-ostart;
|
||||
}
|
||||
|
||||
if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
|
||||
|
||||
return (size_t)-FSE_ERROR_corruptionDetected;
|
||||
}
|
||||
|
||||
if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
|
||||
|
||||
return (size_t)-FSE_ERROR_corruptionDetected;
|
||||
}
|
||||
|
||||
|
||||
@@ -1355,8 +1358,6 @@ static unsigned ZSTD_isLittleEndian(void)
|
||||
|
||||
static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
|
||||
|
||||
static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
|
||||
|
||||
static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
|
||||
|
||||
static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
|
||||
@@ -1381,16 +1382,9 @@ static U16 ZSTD_readLE16(const void* memPtr)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static U32 ZSTD_readLE32(const void* memPtr)
|
||||
static U32 ZSTD_readLE24(const void* memPtr)
|
||||
{
|
||||
if (ZSTD_isLittleEndian())
|
||||
return ZSTD_read32(memPtr);
|
||||
else
|
||||
{
|
||||
const BYTE* p = (const BYTE*)memPtr;
|
||||
return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
|
||||
}
|
||||
return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
static U32 ZSTD_readBE32(const void* memPtr)
|
||||
@@ -1704,13 +1698,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
seqState->prevOffset = seq->offset;
|
||||
if (litLength == MaxLL)
|
||||
{
|
||||
U32 add = dumps<de ? *dumps++ : 0;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) litLength += add;
|
||||
else
|
||||
{
|
||||
if (dumps<=(de-3))
|
||||
{
|
||||
litLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
litLength = ZSTD_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
}
|
||||
@@ -1732,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML)
|
||||
{
|
||||
U32 add = dumps<de ? *dumps++ : 0;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else
|
||||
{
|
||||
if (dumps<=(de-3))
|
||||
{
|
||||
matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
matchLength = ZSTD_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -217,6 +217,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
|
||||
{
|
||||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
|
||||
{
|
||||
if (MEM_isLittleEndian())
|
||||
@@ -3043,11 +3048,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
seqState->prevOffset = seq->offset;
|
||||
if (litLength == MaxLL)
|
||||
{
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) litLength += add;
|
||||
else
|
||||
else if (dumps + 3 <= de)
|
||||
{
|
||||
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
litLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
@@ -3073,11 +3078,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML)
|
||||
{
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else
|
||||
else if (dumps + 3 <= de)
|
||||
{
|
||||
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
matchLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
|
||||
@@ -219,6 +219,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
|
||||
{
|
||||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
|
||||
{
|
||||
if (MEM_isLittleEndian())
|
||||
@@ -2684,11 +2689,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
seqState->prevOffset = seq->offset;
|
||||
if (litLength == MaxLL)
|
||||
{
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) litLength += add;
|
||||
else
|
||||
else if (dumps + 3 <= de)
|
||||
{
|
||||
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
litLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
@@ -2714,11 +2719,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML)
|
||||
{
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else
|
||||
else if (dumps + 3 <= de)
|
||||
{
|
||||
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
matchLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
|
||||
@@ -189,6 +189,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
|
||||
{
|
||||
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
|
||||
}
|
||||
|
||||
MEM_STATIC U32 MEM_readLE32(const void* memPtr)
|
||||
{
|
||||
if (MEM_isLittleEndian())
|
||||
@@ -2808,13 +2813,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
|
||||
prevOffset = litLength ? seq->offset : seqState->prevOffset;
|
||||
if (litLength == MaxLL) {
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) litLength += add;
|
||||
else {
|
||||
litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
|
||||
else if (dumps + 3 <= de) {
|
||||
litLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
|
||||
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
}
|
||||
|
||||
@@ -2837,13 +2841,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
/* MatchLength */
|
||||
matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML) {
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else {
|
||||
matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
|
||||
else if (dumps + 3 <= de){
|
||||
matchLength = MEM_readLE24(dumps);
|
||||
dumps += 3;
|
||||
}
|
||||
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
|
||||
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
}
|
||||
matchLength += MINMATCH;
|
||||
|
||||
@@ -1998,91 +1998,92 @@ size_t HUFv05_decompress4X2_usingDTable(
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const U16* DTable)
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
const void* const dtPtr = DTable;
|
||||
const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
|
||||
/* Init */
|
||||
BITv05_DStream_t bitD1;
|
||||
BITv05_DStream_t bitD2;
|
||||
BITv05_DStream_t bitD3;
|
||||
BITv05_DStream_t bitD4;
|
||||
const size_t length1 = MEM_readLE16(istart);
|
||||
const size_t length2 = MEM_readLE16(istart+2);
|
||||
const size_t length3 = MEM_readLE16(istart+4);
|
||||
size_t length4;
|
||||
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
||||
const BYTE* const istart2 = istart1 + length1;
|
||||
const BYTE* const istart3 = istart2 + length2;
|
||||
const BYTE* const istart4 = istart3 + length3;
|
||||
const size_t segmentSize = (dstSize+3) / 4;
|
||||
BYTE* const opStart2 = ostart + segmentSize;
|
||||
BYTE* const opStart3 = opStart2 + segmentSize;
|
||||
BYTE* const opStart4 = opStart3 + segmentSize;
|
||||
BYTE* op1 = ostart;
|
||||
BYTE* op2 = opStart2;
|
||||
BYTE* op3 = opStart3;
|
||||
BYTE* op4 = opStart4;
|
||||
U32 endSignal;
|
||||
|
||||
/* Check */
|
||||
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
const void* const dtPtr = DTable;
|
||||
const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
|
||||
const U32 dtLog = DTable[0];
|
||||
size_t errorCode;
|
||||
|
||||
length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
||||
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
||||
errorCode = BITv05_initDStream(&bitD1, istart1, length1);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD2, istart2, length2);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD3, istart3, length3);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD4, istart4, length4);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
/* Init */
|
||||
BITv05_DStream_t bitD1;
|
||||
BITv05_DStream_t bitD2;
|
||||
BITv05_DStream_t bitD3;
|
||||
BITv05_DStream_t bitD4;
|
||||
const size_t length1 = MEM_readLE16(istart);
|
||||
const size_t length2 = MEM_readLE16(istart+2);
|
||||
const size_t length3 = MEM_readLE16(istart+4);
|
||||
size_t length4;
|
||||
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
||||
const BYTE* const istart2 = istart1 + length1;
|
||||
const BYTE* const istart3 = istart2 + length2;
|
||||
const BYTE* const istart4 = istart3 + length3;
|
||||
const size_t segmentSize = (dstSize+3) / 4;
|
||||
BYTE* const opStart2 = ostart + segmentSize;
|
||||
BYTE* const opStart3 = opStart2 + segmentSize;
|
||||
BYTE* const opStart4 = opStart3 + segmentSize;
|
||||
BYTE* op1 = ostart;
|
||||
BYTE* op2 = opStart2;
|
||||
BYTE* op3 = opStart3;
|
||||
BYTE* op4 = opStart4;
|
||||
U32 endSignal;
|
||||
|
||||
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
||||
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
|
||||
for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
|
||||
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
|
||||
length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
||||
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
||||
errorCode = BITv05_initDStream(&bitD1, istart1, length1);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD2, istart2, length2);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD3, istart3, length3);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
errorCode = BITv05_initDStream(&bitD4, istart4, length4);
|
||||
if (HUFv05_isError(errorCode)) return errorCode;
|
||||
|
||||
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
||||
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
|
||||
for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
|
||||
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
|
||||
HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
|
||||
endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
|
||||
}
|
||||
|
||||
/* check corruption */
|
||||
if (op1 > opStart2) return ERROR(corruption_detected);
|
||||
if (op2 > opStart3) return ERROR(corruption_detected);
|
||||
if (op3 > opStart4) return ERROR(corruption_detected);
|
||||
/* note : op4 supposed already verified within main loop */
|
||||
|
||||
/* finish bitStreams one by one */
|
||||
HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
|
||||
|
||||
/* check */
|
||||
endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
|
||||
if (!endSignal) return ERROR(corruption_detected);
|
||||
|
||||
/* decoded size */
|
||||
return dstSize;
|
||||
}
|
||||
|
||||
/* check corruption */
|
||||
if (op1 > opStart2) return ERROR(corruption_detected);
|
||||
if (op2 > opStart3) return ERROR(corruption_detected);
|
||||
if (op3 > opStart4) return ERROR(corruption_detected);
|
||||
/* note : op4 supposed already verified within main loop */
|
||||
|
||||
/* finish bitStreams one by one */
|
||||
HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
|
||||
HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
|
||||
|
||||
/* check */
|
||||
endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
|
||||
if (!endSignal) return ERROR(corruption_detected);
|
||||
|
||||
/* decoded size */
|
||||
return dstSize;
|
||||
}
|
||||
|
||||
|
||||
@@ -3150,14 +3151,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
litLength = FSEv05_peakSymbol(&(seqState->stateLL));
|
||||
prevOffset = litLength ? seq->offset : seqState->prevOffset;
|
||||
if (litLength == MaxLL) {
|
||||
U32 add = *dumps++;
|
||||
const U32 add = *dumps++;
|
||||
if (add < 255) litLength += add;
|
||||
else {
|
||||
litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */
|
||||
if (litLength&1) litLength>>=1, dumps += 3;
|
||||
else litLength = (U16)(litLength)>>1, dumps += 2;
|
||||
else if (dumps + 2 <= de) {
|
||||
litLength = MEM_readLE16(dumps);
|
||||
dumps += 2;
|
||||
if ((litLength & 1) && dumps < de) {
|
||||
litLength += *dumps << 16;
|
||||
dumps += 1;
|
||||
}
|
||||
litLength>>=1;
|
||||
}
|
||||
if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */
|
||||
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
}
|
||||
|
||||
@@ -3184,14 +3188,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
|
||||
/* MatchLength */
|
||||
matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
|
||||
if (matchLength == MaxML) {
|
||||
U32 add = *dumps++;
|
||||
const U32 add = dumps<de ? *dumps++ : 0;
|
||||
if (add < 255) matchLength += add;
|
||||
else {
|
||||
matchLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
|
||||
if (matchLength&1) matchLength>>=1, dumps += 3;
|
||||
else matchLength = (U16)(matchLength)>>1, dumps += 2;
|
||||
else if (dumps + 2 <= de) {
|
||||
matchLength = MEM_readLE16(dumps);
|
||||
dumps += 2;
|
||||
if ((matchLength & 1) && dumps < de) {
|
||||
matchLength += *dumps << 16;
|
||||
dumps += 1;
|
||||
}
|
||||
matchLength >>= 1;
|
||||
}
|
||||
if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */
|
||||
if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */
|
||||
}
|
||||
matchLength += MINMATCH;
|
||||
|
||||
@@ -3242,14 +3242,12 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
|
||||
}
|
||||
|
||||
/* FSE table descriptors */
|
||||
if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
{ U32 const LLtype = *ip >> 6;
|
||||
U32 const Offtype = (*ip >> 4) & 3;
|
||||
U32 const MLtype = (*ip >> 2) & 3;
|
||||
ip++;
|
||||
|
||||
/* check */
|
||||
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
|
||||
/* Build DTables */
|
||||
{ size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
|
||||
if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected);
|
||||
@@ -3672,7 +3670,7 @@ void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS
|
||||
blockProperties_t blockProperties = { bt_compressed, 0 };
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
|
||||
{ size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize);
|
||||
if (ZSTDv06_isError(frameHeaderSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
|
||||
return;
|
||||
|
||||
@@ -3470,14 +3470,12 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
|
||||
}
|
||||
|
||||
/* FSE table descriptors */
|
||||
if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
{ U32 const LLtype = *ip >> 6;
|
||||
U32 const OFtype = (*ip >> 4) & 3;
|
||||
U32 const MLtype = (*ip >> 2) & 3;
|
||||
ip++;
|
||||
|
||||
/* check */
|
||||
if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
|
||||
|
||||
/* Build DTables */
|
||||
{ size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
|
||||
if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
|
||||
@@ -3918,7 +3916,7 @@ void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS
|
||||
}
|
||||
|
||||
/* Frame Header */
|
||||
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
|
||||
{ size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize);
|
||||
if (ZSTDv07_isError(frameHeaderSize)) {
|
||||
ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
|
||||
return;
|
||||
|
||||
@@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
||||
size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
|
||||
size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
|
||||
assert(flushed <= produced);
|
||||
assert(jobPtr->consumed <= jobPtr->src.size);
|
||||
toFlush = produced - flushed;
|
||||
if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
|
||||
/* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
|
||||
/* if toFlush==0, nothing is available to flush.
|
||||
* However, jobID is expected to still be active:
|
||||
* if jobID was already completed and fully flushed,
|
||||
* ZSTDMT_flushProduced() should have already moved onto next job.
|
||||
* Therefore, some input has not yet been consumed. */
|
||||
if (toFlush==0) {
|
||||
assert(jobPtr->consumed < jobPtr->src.size);
|
||||
}
|
||||
}
|
||||
@@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
||||
|
||||
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
|
||||
{
|
||||
if (params.ldmParams.enableLdm)
|
||||
unsigned jobLog;
|
||||
if (params.ldmParams.enableLdm) {
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on chainLog instead. */
|
||||
return MAX(21, params.cParams.chainLog + 4);
|
||||
return MAX(20, params.cParams.windowLog + 2);
|
||||
jobLog = MAX(21, params.cParams.chainLog + 4);
|
||||
} else {
|
||||
jobLog = MAX(20, params.cParams.windowLog + 2);
|
||||
}
|
||||
return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
|
||||
}
|
||||
|
||||
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
|
||||
@@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
|
||||
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
||||
- overlapRLog;
|
||||
}
|
||||
assert(0 <= ovLog && ovLog <= 30);
|
||||
assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
|
||||
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
|
||||
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
||||
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
||||
@@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal(
|
||||
FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
||||
|
||||
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
||||
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
||||
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;
|
||||
|
||||
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
||||
if (mtctx->singleBlockingThread) {
|
||||
@@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal(
|
||||
if (mtctx->targetSectionSize == 0) {
|
||||
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
||||
}
|
||||
assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
|
||||
|
||||
if (params.rsyncable) {
|
||||
/* Aim for the targetsectionSize as the average job size. */
|
||||
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
|
||||
|
||||
@@ -50,6 +50,7 @@
|
||||
#ifndef ZSTDMT_JOBSIZE_MIN
|
||||
# define ZSTDMT_JOBSIZE_MIN (1 MB)
|
||||
#endif
|
||||
#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
|
||||
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user