mirror of
https://github.com/Xevion/easy7zip.git
synced 2025-12-07 16:07:05 -06:00
Update zstd to version 1.3.8
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#define MY_VER_MAJOR 18
|
||||
#define MY_VER_MINOR 05
|
||||
#define MY_VER_BUILD 0
|
||||
#define MY_VERSION_NUMBERS "18.05 ZS v1.3.7 R3"
|
||||
#define MY_VERSION_NUMBERS "18.05 ZS v1.3.8 R1"
|
||||
#define MY_VERSION MY_VERSION_NUMBERS
|
||||
|
||||
#ifdef MY_CPU_NAME
|
||||
@@ -10,7 +10,7 @@
|
||||
#define MY_VERSION_CPU MY_VERSION
|
||||
#endif
|
||||
|
||||
#define MY_DATE "2018-11-27"
|
||||
#define MY_DATE "2018-12-28"
|
||||
#undef MY_COPYRIGHT
|
||||
#undef MY_VERSION_COPYRIGHT_DATE
|
||||
#define MY_AUTHOR_NAME "Igor Pavlov, Tino Reichardt"
|
||||
|
||||
108
C/zstd/README.md
108
C/zstd/README.md
@@ -7,13 +7,32 @@ in order to make it easier to select or exclude features.
|
||||
|
||||
#### Building
|
||||
|
||||
`Makefile` script is provided, supporting all standard [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions),
|
||||
`Makefile` script is provided, supporting [Makefile conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html#Makefile-Conventions),
|
||||
including commands variables, staged install, directory variables and standard targets.
|
||||
- `make` : generates both static and dynamic libraries
|
||||
- `make install` : install libraries in default system directories
|
||||
- `make install` : install libraries and headers in target system directories
|
||||
|
||||
`libzstd` default scope includes compression, decompression, dictionary building,
|
||||
and decoding support for legacy formats >= v0.5.0.
|
||||
`libzstd` default scope is pretty large, including compression, decompression, dictionary builder,
|
||||
and support for decoding legacy formats >= v0.5.0.
|
||||
The scope can be reduced on demand (see paragraph _modular build_).
|
||||
|
||||
|
||||
#### Multithreading support
|
||||
|
||||
Multithreading is disabled by default when building with `make`.
|
||||
Enabling multithreading requires 2 conditions :
|
||||
- set build macro `ZSTD_MULTITHREAD` (`-DZSTD_MULTITHREAD` for `gcc`)
|
||||
- for POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
|
||||
|
||||
Both conditions are automatically applied when invoking `make lib-mt` target.
|
||||
|
||||
When linking a POSIX program with a multithreaded version of `libzstd`,
|
||||
note that it's necessary to request the `-pthread` flag during link stage.
|
||||
|
||||
Multithreading capabilities are exposed
|
||||
via the [advanced API defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/v1.3.8/lib/zstd.h#L592).
|
||||
This API is still labelled experimental,
|
||||
but is expected to become "stable" in the near future.
|
||||
|
||||
|
||||
#### API
|
||||
@@ -26,63 +45,70 @@ Zstandard's stable API is exposed within [lib/zstd.h](zstd.h).
|
||||
Optional advanced features are exposed via :
|
||||
|
||||
- `lib/common/zstd_errors.h` : translates `size_t` function results
|
||||
into an `ZSTD_ErrorCode`, for accurate error handling.
|
||||
into a `ZSTD_ErrorCode`, for accurate error handling.
|
||||
|
||||
- `ZSTD_STATIC_LINKING_ONLY` : if this macro is defined _before_ including `zstd.h`,
|
||||
it unlocks access to advanced experimental API,
|
||||
exposed in second part of `zstd.h`.
|
||||
These APIs are not "stable", their definition may change in the future.
|
||||
As a consequence, it shall ___never be used with dynamic library___ !
|
||||
it unlocks access to the experimental API,
|
||||
exposed in the second part of `zstd.h`.
|
||||
All definitions in the experimental APIs are unstable,
|
||||
they may still change in the future, or even be removed.
|
||||
As a consequence, experimental definitions shall ___never be used with dynamic library___ !
|
||||
Only static linking is allowed.
|
||||
|
||||
|
||||
#### Modular build
|
||||
|
||||
It's possible to compile only a limited set of features.
|
||||
It's possible to compile only a limited set of features within `libzstd`.
|
||||
The file structure is designed to make this selection manually achievable for any build system :
|
||||
|
||||
- Directory `lib/common` is always required, for all variants.
|
||||
|
||||
- Compression source code lies in `lib/compress`
|
||||
|
||||
- Decompression source code lies in `lib/decompress`
|
||||
|
||||
- It's possible to include only `compress` or only `decompress`, they don't depend on each other.
|
||||
|
||||
- `lib/dictBuilder` : makes it possible to generate dictionaries from a set of samples.
|
||||
The API is exposed in `lib/dictBuilder/zdict.h`.
|
||||
This module depends on both `lib/common` and `lib/compress` .
|
||||
- `lib/legacy` : source code to decompress legacy zstd formats, starting from `v0.1.0`.
|
||||
|
||||
- `lib/legacy` : makes it possible to decompress legacy zstd formats, starting from `v0.1.0`.
|
||||
This module depends on `lib/common` and `lib/decompress`.
|
||||
To enable this feature, define `ZSTD_LEGACY_SUPPORT` during compilation.
|
||||
Specifying a number limits versions supported to that version onward.
|
||||
For example, `ZSTD_LEGACY_SUPPORT=2` means : "support legacy formats >= v0.2.0".
|
||||
`ZSTD_LEGACY_SUPPORT=3` means : "support legacy formats >= v0.3.0", and so on.
|
||||
Currently, the default library setting is `ZST_LEGACY_SUPPORT=5`.
|
||||
It can be changed at build by any other value.
|
||||
Note that any number >= 8 translates into "do __not__ support legacy formats",
|
||||
since all versions of `zstd` >= v0.8 are compatible with v1+ specification.
|
||||
`ZSTD_LEGACY_SUPPORT=0` also means "do __not__ support legacy formats".
|
||||
Once enabled, this capability is transparently triggered within decompression functions.
|
||||
It's also possible to invoke directly legacy API, as exposed in `lib/legacy/zstd_legacy.h`.
|
||||
Each version also provides an additional dedicated set of advanced API.
|
||||
Conversely, `ZSTD_LEGACY_SUPPORT=0` means "do __not__ support legacy formats".
|
||||
By default, this build macro is set as `ZSTD_LEGACY_SUPPORT=5`.
|
||||
Decoding supported legacy format is a transparent capability triggered within decompression functions.
|
||||
It's also allowed to invoke legacy API directly, exposed in `lib/legacy/zstd_legacy.h`.
|
||||
Each version does also provide its own set of advanced API.
|
||||
For example, advanced API for version `v0.4` is exposed in `lib/legacy/zstd_v04.h` .
|
||||
Note : `lib/legacy` only supports _decoding_ legacy formats.
|
||||
- Similarly, you can define `ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
||||
and `ZSTD_LIB_DEPRECATED` as 0 to forgo compilation of the corresponding features. This will
|
||||
also disable compilation of all dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable
|
||||
dictBuilder).
|
||||
|
||||
- While invoking `make libzstd`, it's possible to define build macros
|
||||
`ZSTD_LIB_COMPRESSION, ZSTD_LIB_DECOMPRESSION`, `ZSTD_LIB_DICTBUILDER`,
|
||||
and `ZSTD_LIB_DEPRECATED` as `0` to forgo compilation of the corresponding features.
|
||||
This will also disable compilation of all dependencies
|
||||
(eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder).
|
||||
|
||||
#### Multithreading support
|
||||
- There are some additional build macros that can be used to minify the decoder.
|
||||
|
||||
Multithreading is disabled by default when building with `make`.
|
||||
Enabling multithreading requires 2 conditions :
|
||||
- set macro `ZSTD_MULTITHREAD`
|
||||
- on POSIX systems : compile with pthread (`-pthread` compilation flag for `gcc`)
|
||||
Zstandard often has more than one implementation of a piece of functionality,
|
||||
where each implementation optimizes for different scenarios. For example, the
|
||||
Huffman decoder has complementary implementations that decode the stream one
|
||||
symbol at a time or two symbols at a time. Zstd normally includes both (and
|
||||
dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1`
|
||||
or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding
|
||||
compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`
|
||||
and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of
|
||||
only one or the other of two decompression implementations. The smallest
|
||||
binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and
|
||||
`ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`.
|
||||
|
||||
Both conditions are automatically triggered by invoking `make lib-mt` target.
|
||||
Note that, when linking a POSIX program with a multithreaded version of `libzstd`,
|
||||
it's necessary to trigger `-pthread` flag during link stage.
|
||||
|
||||
Multithreading capabilities are exposed
|
||||
via [advanced API `ZSTD_compress_generic()` defined in `lib/zstd.h`](https://github.com/facebook/zstd/blob/dev/lib/zstd.h#L919).
|
||||
This API is still considered experimental,
|
||||
but is expected to become "stable" at some point in the future.
|
||||
For squeezing the last ounce of size out, you can also define
|
||||
`ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`,
|
||||
which removes the error messages that are otherwise returned by
|
||||
`ZSTD_getErrorName`.
|
||||
|
||||
|
||||
#### Windows : using MinGW+MSYS to create DLL
|
||||
@@ -113,8 +139,8 @@ Consider migrating code towards supported streaming API exposed in `zstd.h`.
|
||||
|
||||
The other files are not source code. There are :
|
||||
|
||||
- `LICENSE` : contains the BSD license text
|
||||
- `Makefile` : `make` script to build and install zstd library (static and dynamic)
|
||||
- `BUCK` : support for `buck` build system (https://buckbuild.com/)
|
||||
- `libzstd.pc.in` : for `pkg-config` (used in `make install`)
|
||||
- `Makefile` : `make` script to build and install zstd library (static and dynamic)
|
||||
- `README.md` : this file
|
||||
- `dll/` : resources directory for Windows compilation
|
||||
- `libzstd.pc.in` : script for `pkg-config` (used in `make install`)
|
||||
|
||||
@@ -389,7 +389,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
* Read (consume) next n bits from local register and update.
|
||||
* Pay attention to not read more than nbBits contained into local register.
|
||||
* @return : extracted value. */
|
||||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
{
|
||||
size_t const value = BIT_lookBits(bitD, nbBits);
|
||||
BIT_skipBits(bitD, nbBits);
|
||||
@@ -398,7 +398,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||
|
||||
/*! BIT_readBitsFast() :
|
||||
* unsafe version; only works only if nbBits >= 1 */
|
||||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
|
||||
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
|
||||
{
|
||||
size_t const value = BIT_lookBitsFast(bitD, nbBits);
|
||||
assert(nbBits >= 1);
|
||||
|
||||
@@ -15,6 +15,8 @@
|
||||
* Compiler specifics
|
||||
*********************************************************/
|
||||
/* force inlining */
|
||||
|
||||
#if !defined(ZSTD_NO_INLINE)
|
||||
#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
||||
# define INLINE_KEYWORD inline
|
||||
#else
|
||||
@@ -29,6 +31,13 @@
|
||||
# define FORCE_INLINE_ATTR
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define INLINE_KEYWORD
|
||||
#define FORCE_INLINE_ATTR
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
||||
* parameters. They must be inlined for the compiler to elimininate the constant
|
||||
@@ -89,23 +98,21 @@
|
||||
#endif
|
||||
|
||||
/* prefetch
|
||||
* can be disabled, by declaring NO_PREFETCH macro
|
||||
* All prefetch invocations use a single default locality 2,
|
||||
* generating instruction prefetcht1,
|
||||
* which, according to Intel, means "load data into L2 cache".
|
||||
* This is a good enough "middle ground" for the time being,
|
||||
* though in theory, it would be better to specialize locality depending on data being prefetched.
|
||||
* Tests could not determine any sensible difference based on locality value. */
|
||||
* can be disabled, by declaring NO_PREFETCH build macro */
|
||||
#if defined(NO_PREFETCH)
|
||||
# define PREFETCH(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
||||
#else
|
||||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
||||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
||||
# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
||||
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
||||
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
||||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
||||
# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
||||
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
||||
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
||||
# else
|
||||
# define PREFETCH(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
||||
# endif
|
||||
#endif /* NO_PREFETCH */
|
||||
|
||||
@@ -116,7 +123,7 @@
|
||||
size_t const _size = (size_t)(s); \
|
||||
size_t _pos; \
|
||||
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
|
||||
PREFETCH(_ptr + _pos); \
|
||||
PREFETCH_L2(_ptr + _pos); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
|
||||
__asm__(
|
||||
"pushl %%ebx\n\t"
|
||||
"cpuid\n\t"
|
||||
"movl %%ebx, %%eax\n\r"
|
||||
"movl %%ebx, %%eax\n\t"
|
||||
"popl %%ebx"
|
||||
: "=a"(f7b), "=c"(f7c)
|
||||
: "a"(7), "c"(0)
|
||||
|
||||
@@ -57,9 +57,9 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* static assert is triggered at compile time, leaving no runtime artefact,
|
||||
* but can only work with compile-time constants.
|
||||
* This variant can only be used inside a function. */
|
||||
/* static assert is triggered at compile time, leaving no runtime artefact.
|
||||
* static assert only works with compile-time constants.
|
||||
* Also, this variant can only be used inside a function. */
|
||||
#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
|
||||
|
||||
|
||||
@@ -70,9 +70,19 @@ extern "C" {
|
||||
# define DEBUGLEVEL 0
|
||||
#endif
|
||||
|
||||
|
||||
/* DEBUGFILE can be defined externally,
|
||||
* typically through compiler command line.
|
||||
* note : currently useless.
|
||||
* Value must be stderr or stdout */
|
||||
#ifndef DEBUGFILE
|
||||
# define DEBUGFILE stderr
|
||||
#endif
|
||||
|
||||
|
||||
/* recommended values for DEBUGLEVEL :
|
||||
* 0 : no debug, all run-time functions disabled
|
||||
* 1 : no display, enables assert() only
|
||||
* 0 : release mode, no debug, all run-time checks disabled
|
||||
* 1 : enables assert() only, no display
|
||||
* 2 : reserved, for currently active debug path
|
||||
* 3 : events once per object lifetime (CCtx, CDict, etc.)
|
||||
* 4 : events once per frame
|
||||
@@ -81,7 +91,7 @@ extern "C" {
|
||||
* 7+: events at every position (*very* verbose)
|
||||
*
|
||||
* It's generally inconvenient to output traces > 5.
|
||||
* In which case, it's possible to selectively enable higher verbosity levels
|
||||
* In which case, it's possible to selectively trigger high verbosity levels
|
||||
* by modifying g_debug_level.
|
||||
*/
|
||||
|
||||
@@ -95,11 +105,12 @@ extern "C" {
|
||||
|
||||
#if (DEBUGLEVEL>=2)
|
||||
# include <stdio.h>
|
||||
extern int g_debuglevel; /* here, this variable is only declared,
|
||||
it actually lives in debug.c,
|
||||
and is shared by the whole process.
|
||||
It's typically used to enable very verbose levels
|
||||
on selective conditions (such as position in src) */
|
||||
extern int g_debuglevel; /* the variable is only declared,
|
||||
it actually lives in debug.c,
|
||||
and is shared by the whole process.
|
||||
It's not thread-safe.
|
||||
It's useful when enabling very verbose levels
|
||||
on selective conditions (such as position in src) */
|
||||
|
||||
# define RAWLOG(l, ...) { \
|
||||
if (l<=g_debuglevel) { \
|
||||
|
||||
@@ -14,6 +14,10 @@
|
||||
|
||||
const char* ERR_getErrorString(ERR_enum code)
|
||||
{
|
||||
#ifdef ZSTD_STRIP_ERROR_STRINGS
|
||||
(void)code;
|
||||
return "Error strings stripped";
|
||||
#else
|
||||
static const char* const notErrorCode = "Unspecified error code";
|
||||
switch( code )
|
||||
{
|
||||
@@ -39,10 +43,12 @@ const char* ERR_getErrorString(ERR_enum code)
|
||||
case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
|
||||
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
|
||||
case PREFIX(srcSize_wrong): return "Src size is incorrect";
|
||||
case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
|
||||
/* following error codes are not stable and may be removed or changed in a future version */
|
||||
case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
|
||||
case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
|
||||
case PREFIX(maxCode):
|
||||
default: return notErrorCode;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -512,7 +512,7 @@ MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
|
||||
const U32 tableLog = MEM_read16(ptr);
|
||||
statePtr->value = (ptrdiff_t)1<<tableLog;
|
||||
statePtr->stateTable = u16ptr+2;
|
||||
statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
|
||||
statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
|
||||
statePtr->stateLog = tableLog;
|
||||
}
|
||||
|
||||
@@ -531,7 +531,7 @@ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U3
|
||||
}
|
||||
}
|
||||
|
||||
MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
|
||||
MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
|
||||
{
|
||||
FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
|
||||
const U16* const stateTable = (const U16*)(statePtr->stateTable);
|
||||
|
||||
@@ -115,7 +115,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
|
||||
/* symbol start positions */
|
||||
{ U32 u;
|
||||
cumul[0] = 0;
|
||||
for (u=1; u<=maxSymbolValue+1; u++) {
|
||||
for (u=1; u <= maxSymbolValue+1; u++) {
|
||||
if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
|
||||
cumul[u] = cumul[u-1] + 1;
|
||||
tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
|
||||
@@ -658,7 +658,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
||||
BYTE* op = ostart;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
|
||||
U32 count[FSE_MAX_SYMBOL_VALUE+1];
|
||||
unsigned count[FSE_MAX_SYMBOL_VALUE+1];
|
||||
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
|
||||
FSE_CTable* CTable = (FSE_CTable*)workSpace;
|
||||
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
|
||||
@@ -672,7 +672,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
|
||||
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
|
||||
{ CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
|
||||
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
|
||||
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
||||
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
|
||||
|
||||
@@ -73,6 +73,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
return largestCount;
|
||||
}
|
||||
|
||||
typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
|
||||
|
||||
/* HIST_count_parallel_wksp() :
|
||||
* store histogram into 4 intermediate tables, recombined at the end.
|
||||
@@ -85,8 +86,8 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
static size_t HIST_count_parallel_wksp(
|
||||
unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
unsigned checkMax,
|
||||
unsigned* const workSpace)
|
||||
HIST_checkInput_e check,
|
||||
U32* const workSpace)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*)source;
|
||||
const BYTE* const iend = ip+sourceSize;
|
||||
@@ -137,7 +138,7 @@ static size_t HIST_count_parallel_wksp(
|
||||
/* finish last symbols */
|
||||
while (ip<iend) Counting1[*ip++]++;
|
||||
|
||||
if (checkMax) { /* verify stats will fit into destination table */
|
||||
if (check) { /* verify stats will fit into destination table */
|
||||
U32 s; for (s=255; s>maxSymbolValue; s--) {
|
||||
Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
|
||||
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
|
||||
@@ -157,14 +158,18 @@ static size_t HIST_count_parallel_wksp(
|
||||
|
||||
/* HIST_countFast_wksp() :
|
||||
* Same as HIST_countFast(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
|
||||
* `workSpace` is a writable buffer which must be 4-bytes aligned,
|
||||
* `workSpaceSize` must be >= HIST_WKSP_SIZE
|
||||
*/
|
||||
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize,
|
||||
unsigned* workSpace)
|
||||
void* workSpace, size_t workSpaceSize)
|
||||
{
|
||||
if (sourceSize < 1500) /* heuristic threshold */
|
||||
return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
|
||||
}
|
||||
|
||||
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
||||
@@ -172,24 +177,27 @@ size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
|
||||
}
|
||||
|
||||
/* HIST_count_wksp() :
|
||||
* Same as HIST_count(), but using an externally provided scratch buffer.
|
||||
* `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
|
||||
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* source, size_t sourceSize, unsigned* workSpace)
|
||||
const void* source, size_t sourceSize,
|
||||
void* workSpace, size_t workSpaceSize)
|
||||
{
|
||||
if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
|
||||
if (*maxSymbolValuePtr < 255)
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
|
||||
return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
|
||||
*maxSymbolValuePtr = 255;
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
|
||||
return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
|
||||
}
|
||||
|
||||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
unsigned tmpCounters[HIST_WKSP_SIZE_U32];
|
||||
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
|
||||
return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
|
||||
}
|
||||
|
||||
@@ -41,11 +41,11 @@
|
||||
|
||||
/*! HIST_count():
|
||||
* Provides the precise count of each byte within a table 'count'.
|
||||
* 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
|
||||
* 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
|
||||
* Updates *maxSymbolValuePtr with actual largest symbol value detected.
|
||||
* @return : count of the most frequent symbol (which isn't identified).
|
||||
* or an error code, which can be tested using HIST_isError().
|
||||
* note : if return == srcSize, there is only one symbol.
|
||||
* @return : count of the most frequent symbol (which isn't identified).
|
||||
* or an error code, which can be tested using HIST_isError().
|
||||
* note : if return == srcSize, there is only one symbol.
|
||||
*/
|
||||
size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize);
|
||||
@@ -56,14 +56,16 @@ unsigned HIST_isError(size_t code); /**< tells if a return value is an error co
|
||||
/* --- advanced histogram functions --- */
|
||||
|
||||
#define HIST_WKSP_SIZE_U32 1024
|
||||
#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
|
||||
/** HIST_count_wksp() :
|
||||
* Same as HIST_count(), but using an externally provided scratch buffer.
|
||||
* Benefit is this function will use very little stack space.
|
||||
* `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
|
||||
* `workSpace` is a writable buffer which must be 4-bytes aligned,
|
||||
* `workSpaceSize` must be >= HIST_WKSP_SIZE
|
||||
*/
|
||||
size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned* workSpace);
|
||||
void* workSpace, size_t workSpaceSize);
|
||||
|
||||
/** HIST_countFast() :
|
||||
* same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
|
||||
@@ -74,11 +76,12 @@ size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
|
||||
/** HIST_countFast_wksp() :
|
||||
* Same as HIST_countFast(), but using an externally provided scratch buffer.
|
||||
* `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
|
||||
* `workSpace` is a writable buffer which must be 4-bytes aligned,
|
||||
* `workSpaceSize` must be >= HIST_WKSP_SIZE
|
||||
*/
|
||||
size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned* workSpace);
|
||||
void* workSpace, size_t workSpaceSize);
|
||||
|
||||
/*! HIST_count_simple() :
|
||||
* Same as HIST_countFast(), this function is unsafe,
|
||||
|
||||
26
C/zstd/huf.h
26
C/zstd/huf.h
@@ -173,15 +173,19 @@ typedef U32 HUF_DTable;
|
||||
* Advanced decompression functions
|
||||
******************************************/
|
||||
size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
|
||||
size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
|
||||
size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
|
||||
size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
|
||||
/* ****************************************
|
||||
@@ -228,7 +232,7 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
|
||||
#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
|
||||
#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree,
|
||||
const U32* count, U32 maxSymbolValue, U32 maxNbBits,
|
||||
const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
|
||||
void* workSpace, size_t wkspSize);
|
||||
|
||||
/*! HUF_readStats() :
|
||||
@@ -277,14 +281,22 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
|
||||
#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
|
||||
#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
|
||||
size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
|
||||
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
|
||||
|
||||
/* ====================== */
|
||||
@@ -306,24 +318,36 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
|
||||
|
||||
size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
||||
size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
|
||||
size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
|
||||
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
|
||||
#endif
|
||||
|
||||
/* BMI2 variants.
|
||||
* If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
|
||||
*/
|
||||
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
#endif
|
||||
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
|
||||
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
|
||||
|
||||
|
||||
@@ -88,13 +88,13 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
|
||||
BYTE* op = ostart;
|
||||
BYTE* const oend = ostart + dstSize;
|
||||
|
||||
U32 maxSymbolValue = HUF_TABLELOG_MAX;
|
||||
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
||||
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
||||
|
||||
FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
|
||||
BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
|
||||
|
||||
U32 count[HUF_TABLELOG_MAX+1];
|
||||
unsigned count[HUF_TABLELOG_MAX+1];
|
||||
S16 norm[HUF_TABLELOG_MAX+1];
|
||||
|
||||
/* init conditions */
|
||||
@@ -134,7 +134,7 @@ struct HUF_CElt_s {
|
||||
`CTable` : Huffman tree to save, using huf representation.
|
||||
@return : size of saved CTable */
|
||||
size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
||||
const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
|
||||
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
|
||||
{
|
||||
BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
|
||||
BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
|
||||
@@ -169,7 +169,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
|
||||
}
|
||||
|
||||
|
||||
size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize)
|
||||
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
|
||||
{
|
||||
BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
|
||||
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
||||
@@ -315,7 +315,7 @@ typedef struct {
|
||||
U32 current;
|
||||
} rankPos;
|
||||
|
||||
static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
|
||||
static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
|
||||
{
|
||||
rankPos rank[32];
|
||||
U32 n;
|
||||
@@ -347,7 +347,7 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
|
||||
*/
|
||||
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
|
||||
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
nodeElt* const huffNode0 = (nodeElt*)workSpace;
|
||||
nodeElt* const huffNode = huffNode0+1;
|
||||
@@ -421,7 +421,7 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
|
||||
* @return : maxNbBits
|
||||
* Note : count is used before tree is written, so they can safely overlap
|
||||
*/
|
||||
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
|
||||
size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
|
||||
{
|
||||
huffNodeTable nodeTable;
|
||||
return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
|
||||
@@ -610,13 +610,14 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
|
||||
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
||||
}
|
||||
|
||||
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
||||
|
||||
static size_t HUF_compressCTable_internal(
|
||||
BYTE* const ostart, BYTE* op, BYTE* const oend,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
|
||||
HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
|
||||
{
|
||||
size_t const cSize = singleStream ?
|
||||
size_t const cSize = (nbStreams==HUF_singleStream) ?
|
||||
HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
|
||||
HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
|
||||
if (HUF_isError(cSize)) { return cSize; }
|
||||
@@ -628,21 +629,21 @@ static size_t HUF_compressCTable_internal(
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
U32 count[HUF_SYMBOLVALUE_MAX + 1];
|
||||
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
||||
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
|
||||
huffNodeTable nodeTable;
|
||||
} HUF_compress_tables_t;
|
||||
|
||||
/* HUF_compress_internal() :
|
||||
* `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
|
||||
static size_t HUF_compress_internal (
|
||||
void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
unsigned singleStream,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
|
||||
const int bmi2)
|
||||
static size_t
|
||||
HUF_compress_internal (void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
unsigned maxSymbolValue, unsigned huffLog,
|
||||
HUF_nbStreams_e nbStreams,
|
||||
void* workSpace, size_t wkspSize,
|
||||
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
|
||||
const int bmi2)
|
||||
{
|
||||
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
|
||||
BYTE* const ostart = (BYTE*)dst;
|
||||
@@ -651,7 +652,7 @@ static size_t HUF_compress_internal (
|
||||
|
||||
/* checks & inits */
|
||||
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
||||
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
||||
if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
|
||||
if (!srcSize) return 0; /* Uncompressed */
|
||||
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
||||
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
||||
@@ -664,11 +665,11 @@ static size_t HUF_compress_internal (
|
||||
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
|
||||
return HUF_compressCTable_internal(ostart, op, oend,
|
||||
src, srcSize,
|
||||
singleStream, oldHufTable, bmi2);
|
||||
nbStreams, oldHufTable, bmi2);
|
||||
}
|
||||
|
||||
/* Scan input and build symbol stats */
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
|
||||
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
|
||||
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
||||
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
||||
}
|
||||
@@ -683,14 +684,15 @@ static size_t HUF_compress_internal (
|
||||
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
|
||||
return HUF_compressCTable_internal(ostart, op, oend,
|
||||
src, srcSize,
|
||||
singleStream, oldHufTable, bmi2);
|
||||
nbStreams, oldHufTable, bmi2);
|
||||
}
|
||||
|
||||
/* Build Huffman Tree */
|
||||
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
|
||||
{ CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
|
||||
maxSymbolValue, huffLog,
|
||||
table->nodeTable, sizeof(table->nodeTable)) );
|
||||
{ size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
|
||||
maxSymbolValue, huffLog,
|
||||
table->nodeTable, sizeof(table->nodeTable));
|
||||
CHECK_F(maxBits);
|
||||
huffLog = (U32)maxBits;
|
||||
/* Zero unused symbols in CTable, so we can check it for validity */
|
||||
memset(table->CTable + (maxSymbolValue + 1), 0,
|
||||
@@ -706,7 +708,7 @@ static size_t HUF_compress_internal (
|
||||
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
|
||||
return HUF_compressCTable_internal(ostart, op, oend,
|
||||
src, srcSize,
|
||||
singleStream, oldHufTable, bmi2);
|
||||
nbStreams, oldHufTable, bmi2);
|
||||
} }
|
||||
|
||||
/* Use the new huffman table */
|
||||
@@ -718,7 +720,7 @@ static size_t HUF_compress_internal (
|
||||
}
|
||||
return HUF_compressCTable_internal(ostart, op, oend,
|
||||
src, srcSize,
|
||||
singleStream, table->CTable, bmi2);
|
||||
nbStreams, table->CTable, bmi2);
|
||||
}
|
||||
|
||||
|
||||
@@ -728,7 +730,7 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, 1 /*single stream*/,
|
||||
maxSymbolValue, huffLog, HUF_singleStream,
|
||||
workSpace, wkspSize,
|
||||
NULL, NULL, 0, 0 /*bmi2*/);
|
||||
}
|
||||
@@ -740,7 +742,7 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, 1 /*single stream*/,
|
||||
maxSymbolValue, huffLog, HUF_singleStream,
|
||||
workSpace, wkspSize, hufTable,
|
||||
repeat, preferRepeat, bmi2);
|
||||
}
|
||||
@@ -762,7 +764,7 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, 0 /*4 streams*/,
|
||||
maxSymbolValue, huffLog, HUF_fourStreams,
|
||||
workSpace, wkspSize,
|
||||
NULL, NULL, 0, 0 /*bmi2*/);
|
||||
}
|
||||
@@ -777,7 +779,7 @@ size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
||||
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
|
||||
{
|
||||
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
||||
maxSymbolValue, huffLog, 0 /* 4 streams */,
|
||||
maxSymbolValue, huffLog, HUF_fourStreams,
|
||||
workSpace, wkspSize,
|
||||
hufTable, repeat, preferRepeat, bmi2);
|
||||
}
|
||||
|
||||
@@ -43,6 +43,19 @@
|
||||
#include "huf.h"
|
||||
#include "error_private.h"
|
||||
|
||||
/* **************************************************************
|
||||
* Macros
|
||||
****************************************************************/
|
||||
|
||||
/* These two optional macros force the use one way or another of the two
|
||||
* Huffman decompression implementations. You can't force in both directions
|
||||
* at the same time.
|
||||
*/
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1) && \
|
||||
defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
#error "Cannot force the use of the X1 and X2 decoders at the same time!"
|
||||
#endif
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
* Error Management
|
||||
@@ -58,6 +71,51 @@
|
||||
#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
* BMI2 Variant Wrappers
|
||||
****************************************************************/
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
#define HUF_DGEN(fn) \
|
||||
\
|
||||
static size_t fn##_default( \
|
||||
void* dst, size_t dstSize, \
|
||||
const void* cSrc, size_t cSrcSize, \
|
||||
const HUF_DTable* DTable) \
|
||||
{ \
|
||||
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
} \
|
||||
\
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
|
||||
void* dst, size_t dstSize, \
|
||||
const void* cSrc, size_t cSrcSize, \
|
||||
const HUF_DTable* DTable) \
|
||||
{ \
|
||||
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
} \
|
||||
\
|
||||
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
||||
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
||||
{ \
|
||||
if (bmi2) { \
|
||||
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
} \
|
||||
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define HUF_DGEN(fn) \
|
||||
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
||||
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
||||
{ \
|
||||
(void)bmi2; \
|
||||
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*-***************************/
|
||||
/* generic DTableDesc */
|
||||
/*-***************************/
|
||||
@@ -71,6 +129,8 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
||||
}
|
||||
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
|
||||
/*-***************************/
|
||||
/* single-symbol decoding */
|
||||
/*-***************************/
|
||||
@@ -307,46 +367,6 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
||||
const void *cSrc,
|
||||
size_t cSrcSize,
|
||||
const HUF_DTable *DTable);
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
#define HUF_DGEN(fn) \
|
||||
\
|
||||
static size_t fn##_default( \
|
||||
void* dst, size_t dstSize, \
|
||||
const void* cSrc, size_t cSrcSize, \
|
||||
const HUF_DTable* DTable) \
|
||||
{ \
|
||||
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
} \
|
||||
\
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
|
||||
void* dst, size_t dstSize, \
|
||||
const void* cSrc, size_t cSrcSize, \
|
||||
const HUF_DTable* DTable) \
|
||||
{ \
|
||||
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
} \
|
||||
\
|
||||
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
||||
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
||||
{ \
|
||||
if (bmi2) { \
|
||||
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
} \
|
||||
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define HUF_DGEN(fn) \
|
||||
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
||||
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
||||
{ \
|
||||
(void)bmi2; \
|
||||
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
||||
HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
|
||||
@@ -437,6 +457,10 @@ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
||||
return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
|
||||
#endif /* HUF_FORCE_DECOMPRESS_X2 */
|
||||
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X1
|
||||
|
||||
/* *************************/
|
||||
/* double-symbols decoding */
|
||||
@@ -911,6 +935,8 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
||||
return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
||||
}
|
||||
|
||||
#endif /* HUF_FORCE_DECOMPRESS_X1 */
|
||||
|
||||
|
||||
/* ***********************************/
|
||||
/* Universal decompression selectors */
|
||||
@@ -921,8 +947,18 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
||||
const HUF_DTable* DTable)
|
||||
{
|
||||
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)dtd;
|
||||
assert(dtd.tableType == 0);
|
||||
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)dtd;
|
||||
assert(dtd.tableType == 1);
|
||||
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
||||
#else
|
||||
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
||||
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
||||
@@ -930,11 +966,22 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
||||
const HUF_DTable* DTable)
|
||||
{
|
||||
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)dtd;
|
||||
assert(dtd.tableType == 0);
|
||||
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)dtd;
|
||||
assert(dtd.tableType == 1);
|
||||
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
||||
#else
|
||||
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
||||
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
|
||||
static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
|
||||
{
|
||||
@@ -956,6 +1003,7 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
|
||||
{{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
|
||||
{{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
|
||||
};
|
||||
#endif
|
||||
|
||||
/** HUF_selectDecoder() :
|
||||
* Tells which decoder is likely to decode faster,
|
||||
@@ -966,6 +1014,15 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
||||
{
|
||||
assert(dstSize > 0);
|
||||
assert(dstSize <= 128*1024);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)dstSize;
|
||||
(void)cSrcSize;
|
||||
return 0;
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)dstSize;
|
||||
(void)cSrcSize;
|
||||
return 1;
|
||||
#else
|
||||
/* decoder timing evaluation */
|
||||
{ U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
|
||||
U32 const D256 = (U32)(dstSize >> 8);
|
||||
@@ -973,14 +1030,18 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
||||
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
|
||||
DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
|
||||
return DTime1 < DTime0;
|
||||
} }
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
||||
|
||||
size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
||||
{
|
||||
#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
|
||||
#endif
|
||||
|
||||
/* validation checks */
|
||||
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
||||
@@ -989,7 +1050,17 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS
|
||||
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
|
||||
#else
|
||||
return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1002,8 +1073,18 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
|
||||
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
|
||||
#else
|
||||
return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
|
||||
HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1025,8 +1106,19 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
|
||||
if (cSrcSize == 0) return ERROR(corruption_detected);
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
||||
#else
|
||||
return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
||||
cSrcSize, workSpace, wkspSize):
|
||||
HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1041,10 +1133,22 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
||||
cSrcSize, workSpace, wkspSize);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
||||
cSrcSize, workSpace, wkspSize);
|
||||
#else
|
||||
return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
||||
cSrcSize, workSpace, wkspSize):
|
||||
HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
|
||||
cSrcSize, workSpace, wkspSize);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1060,10 +1164,21 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
||||
{
|
||||
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)dtd;
|
||||
assert(dtd.tableType == 0);
|
||||
return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)dtd;
|
||||
assert(dtd.tableType == 1);
|
||||
return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
||||
#else
|
||||
return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
||||
HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
||||
{
|
||||
const BYTE* ip = (const BYTE*) cSrc;
|
||||
@@ -1075,12 +1190,23 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
|
||||
|
||||
return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
||||
{
|
||||
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)dtd;
|
||||
assert(dtd.tableType == 0);
|
||||
return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)dtd;
|
||||
assert(dtd.tableType == 1);
|
||||
return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
||||
#else
|
||||
return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
||||
HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
||||
@@ -1090,7 +1216,17 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
|
||||
if (cSrcSize == 0) return ERROR(corruption_detected);
|
||||
|
||||
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
||||
#if defined(HUF_FORCE_DECOMPRESS_X1)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 0);
|
||||
return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
||||
#elif defined(HUF_FORCE_DECOMPRESS_X2)
|
||||
(void)algoNb;
|
||||
assert(algoNb == 1);
|
||||
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
||||
#else
|
||||
return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
|
||||
HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,8 +88,8 @@ static void* POOL_thread(void* opaque) {
|
||||
ctx->numThreadsBusy++;
|
||||
ctx->queueEmpty = ctx->queueHead == ctx->queueTail;
|
||||
/* Unlock the mutex, signal a pusher, and run the job */
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
|
||||
job.function(job.opaque);
|
||||
|
||||
|
||||
1564
C/zstd/zstd.h
1564
C/zstd/zstd.h
File diff suppressed because it is too large
Load Diff
@@ -30,8 +30,10 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
|
||||
/*-****************************************
|
||||
* ZSTD Error Management
|
||||
******************************************/
|
||||
#undef ZSTD_isError /* defined within zstd_internal.h */
|
||||
/*! ZSTD_isError() :
|
||||
* tells if a return value is an error code */
|
||||
* tells if a return value is an error code
|
||||
* symbol is required for external callers */
|
||||
unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
|
||||
|
||||
/*! ZSTD_getErrorName() :
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -48,12 +48,6 @@ extern "C" {
|
||||
typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
|
||||
typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
|
||||
|
||||
typedef enum {
|
||||
ZSTD_dictDefaultAttach = 0,
|
||||
ZSTD_dictForceAttach = 1,
|
||||
ZSTD_dictForceCopy = -1,
|
||||
} ZSTD_dictAttachPref_e;
|
||||
|
||||
typedef struct ZSTD_prefixDict_s {
|
||||
const void* dict;
|
||||
size_t dictSize;
|
||||
@@ -96,10 +90,10 @@ typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
|
||||
|
||||
typedef struct {
|
||||
/* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
|
||||
U32* litFreq; /* table of literals statistics, of size 256 */
|
||||
U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
|
||||
U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
|
||||
U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
|
||||
unsigned* litFreq; /* table of literals statistics, of size 256 */
|
||||
unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
|
||||
unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
|
||||
unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
|
||||
ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
|
||||
ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
|
||||
|
||||
@@ -139,7 +133,7 @@ struct ZSTD_matchState_t {
|
||||
U32* hashTable3;
|
||||
U32* chainTable;
|
||||
optState_t opt; /* optimal parser state */
|
||||
const ZSTD_matchState_t *dictMatchState;
|
||||
const ZSTD_matchState_t * dictMatchState;
|
||||
ZSTD_compressionParameters cParams;
|
||||
};
|
||||
|
||||
@@ -167,7 +161,7 @@ typedef struct {
|
||||
U32 hashLog; /* Log size of hashTable */
|
||||
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
|
||||
U32 minMatchLength; /* Minimum match length */
|
||||
U32 hashEveryLog; /* Log number of entries to skip */
|
||||
U32 hashRateLog; /* Log number of entries to skip */
|
||||
U32 windowLog; /* Window log for the LDM */
|
||||
} ldmParams_t;
|
||||
|
||||
@@ -196,9 +190,10 @@ struct ZSTD_CCtx_params_s {
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
|
||||
/* Multithreading: used to pass parameters to mtctx */
|
||||
unsigned nbWorkers;
|
||||
unsigned jobSize;
|
||||
unsigned overlapSizeLog;
|
||||
int nbWorkers;
|
||||
size_t jobSize;
|
||||
int overlapLog;
|
||||
int rsyncable;
|
||||
|
||||
/* Long distance matching parameters */
|
||||
ldmParams_t ldmParams;
|
||||
@@ -498,6 +493,64 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_ipow() :
|
||||
* Return base^exponent.
|
||||
*/
|
||||
static U64 ZSTD_ipow(U64 base, U64 exponent)
|
||||
{
|
||||
U64 power = 1;
|
||||
while (exponent) {
|
||||
if (exponent & 1) power *= base;
|
||||
exponent >>= 1;
|
||||
base *= base;
|
||||
}
|
||||
return power;
|
||||
}
|
||||
|
||||
#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
|
||||
|
||||
/** ZSTD_rollingHash_append() :
|
||||
* Add the buffer to the hash value.
|
||||
*/
|
||||
static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
|
||||
{
|
||||
BYTE const* istart = (BYTE const*)buf;
|
||||
size_t pos;
|
||||
for (pos = 0; pos < size; ++pos) {
|
||||
hash *= prime8bytes;
|
||||
hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
/** ZSTD_rollingHash_compute() :
|
||||
* Compute the rolling hash value of the buffer.
|
||||
*/
|
||||
MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
|
||||
{
|
||||
return ZSTD_rollingHash_append(0, buf, size);
|
||||
}
|
||||
|
||||
/** ZSTD_rollingHash_primePower() :
|
||||
* Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
|
||||
* over a window of length bytes.
|
||||
*/
|
||||
MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
|
||||
{
|
||||
return ZSTD_ipow(prime8bytes, length - 1);
|
||||
}
|
||||
|
||||
/** ZSTD_rollingHash_rotate() :
|
||||
* Rotate the rolling hash by one byte.
|
||||
*/
|
||||
MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
|
||||
{
|
||||
hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
|
||||
hash *= prime8bytes;
|
||||
hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
|
||||
return hash;
|
||||
}
|
||||
|
||||
/*-*************************************
|
||||
* Round buffer management
|
||||
***************************************/
|
||||
@@ -626,20 +679,23 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
|
||||
* dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
|
||||
* is below them. forceWindow and dictMatchState are therefore incompatible.
|
||||
*/
|
||||
MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||
void const* srcEnd, U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||
MEM_STATIC void
|
||||
ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
|
||||
void const* srcEnd,
|
||||
U32 maxDist,
|
||||
U32* loadedDictEndPtr,
|
||||
const ZSTD_matchState_t** dictMatchStatePtr)
|
||||
{
|
||||
U32 const current = (U32)((BYTE const*)srcEnd - window->base);
|
||||
U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: current=%u, maxDist=%u", current, maxDist);
|
||||
if (current > maxDist + loadedDictEnd) {
|
||||
U32 const newLowLimit = current - maxDist;
|
||||
U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
|
||||
U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
|
||||
DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
|
||||
(unsigned)blockEndIdx, (unsigned)maxDist);
|
||||
if (blockEndIdx > maxDist + loadedDictEnd) {
|
||||
U32 const newLowLimit = blockEndIdx - maxDist;
|
||||
if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
|
||||
if (window->dictLimit < window->lowLimit) {
|
||||
DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
|
||||
window->dictLimit, window->lowLimit);
|
||||
(unsigned)window->dictLimit, (unsigned)window->lowLimit);
|
||||
window->dictLimit = window->lowLimit;
|
||||
}
|
||||
if (loadedDictEndPtr)
|
||||
@@ -690,20 +746,23 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
|
||||
|
||||
|
||||
/* debug functions */
|
||||
#if (DEBUGLEVEL>=2)
|
||||
|
||||
MEM_STATIC double ZSTD_fWeight(U32 rawStat)
|
||||
{
|
||||
U32 const fp_accuracy = 8;
|
||||
U32 const fp_multiplier = (1 << fp_accuracy);
|
||||
U32 const stat = rawStat + 1;
|
||||
U32 const hb = ZSTD_highbit32(stat);
|
||||
U32 const newStat = rawStat + 1;
|
||||
U32 const hb = ZSTD_highbit32(newStat);
|
||||
U32 const BWeight = hb * fp_multiplier;
|
||||
U32 const FWeight = (stat << fp_accuracy) >> hb;
|
||||
U32 const FWeight = (newStat << fp_accuracy) >> hb;
|
||||
U32 const weight = BWeight + FWeight;
|
||||
assert(hb + fp_accuracy < 31);
|
||||
return (double)weight / fp_multiplier;
|
||||
}
|
||||
|
||||
/* display a table content,
|
||||
* listing each element, its frequency, and its predicted bit cost */
|
||||
MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
||||
{
|
||||
unsigned u, sum;
|
||||
@@ -715,6 +774,9 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
240
C/zstd/zstd_ddict.c
Normal file
240
C/zstd/zstd_ddict.c
Normal file
@@ -0,0 +1,240 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* zstd_ddict.c :
|
||||
* concentrates all logic that needs to know the internals of ZSTD_DDict object */
|
||||
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <string.h> /* memcpy, memmove, memset */
|
||||
#include "cpu.h" /* bmi2 */
|
||||
#include "mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "fse.h"
|
||||
#define HUF_STATIC_LINKING_ONLY
|
||||
#include "huf.h"
|
||||
#include "zstd_decompress_internal.h"
|
||||
#include "zstd_ddict.h"
|
||||
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
||||
# include "zstd_legacy.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
* Types
|
||||
*********************************************************/
|
||||
struct ZSTD_DDict_s {
|
||||
void* dictBuffer;
|
||||
const void* dictContent;
|
||||
size_t dictSize;
|
||||
ZSTD_entropyDTables_t entropy;
|
||||
U32 dictID;
|
||||
U32 entropyPresent;
|
||||
ZSTD_customMem cMem;
|
||||
}; /* typedef'd to ZSTD_DDict within "zstd.h" */
|
||||
|
||||
const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
|
||||
{
|
||||
assert(ddict != NULL);
|
||||
return ddict->dictContent;
|
||||
}
|
||||
|
||||
size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
|
||||
{
|
||||
assert(ddict != NULL);
|
||||
return ddict->dictSize;
|
||||
}
|
||||
|
||||
void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTD_copyDDictParameters");
|
||||
assert(dctx != NULL);
|
||||
assert(ddict != NULL);
|
||||
dctx->dictID = ddict->dictID;
|
||||
dctx->prefixStart = ddict->dictContent;
|
||||
dctx->virtualStart = ddict->dictContent;
|
||||
dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
|
||||
dctx->previousDstEnd = dctx->dictEnd;
|
||||
if (ddict->entropyPresent) {
|
||||
dctx->litEntropy = 1;
|
||||
dctx->fseEntropy = 1;
|
||||
dctx->LLTptr = ddict->entropy.LLTable;
|
||||
dctx->MLTptr = ddict->entropy.MLTable;
|
||||
dctx->OFTptr = ddict->entropy.OFTable;
|
||||
dctx->HUFptr = ddict->entropy.hufTable;
|
||||
dctx->entropy.rep[0] = ddict->entropy.rep[0];
|
||||
dctx->entropy.rep[1] = ddict->entropy.rep[1];
|
||||
dctx->entropy.rep[2] = ddict->entropy.rep[2];
|
||||
} else {
|
||||
dctx->litEntropy = 0;
|
||||
dctx->fseEntropy = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static size_t
|
||||
ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
|
||||
ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
ddict->dictID = 0;
|
||||
ddict->entropyPresent = 0;
|
||||
if (dictContentType == ZSTD_dct_rawContent) return 0;
|
||||
|
||||
if (ddict->dictSize < 8) {
|
||||
if (dictContentType == ZSTD_dct_fullDict)
|
||||
return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
|
||||
return 0; /* pure content mode */
|
||||
}
|
||||
{ U32 const magic = MEM_readLE32(ddict->dictContent);
|
||||
if (magic != ZSTD_MAGIC_DICTIONARY) {
|
||||
if (dictContentType == ZSTD_dct_fullDict)
|
||||
return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
|
||||
return 0; /* pure content mode */
|
||||
}
|
||||
}
|
||||
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
|
||||
|
||||
/* load entropy tables */
|
||||
CHECK_E( ZSTD_loadDEntropy(&ddict->entropy,
|
||||
ddict->dictContent, ddict->dictSize),
|
||||
dictionary_corrupted );
|
||||
ddict->entropyPresent = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
|
||||
ddict->dictBuffer = NULL;
|
||||
ddict->dictContent = dict;
|
||||
if (!dict) dictSize = 0;
|
||||
} else {
|
||||
void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
|
||||
ddict->dictBuffer = internalBuffer;
|
||||
ddict->dictContent = internalBuffer;
|
||||
if (!internalBuffer) return ERROR(memory_allocation);
|
||||
memcpy(internalBuffer, dict, dictSize);
|
||||
}
|
||||
ddict->dictSize = dictSize;
|
||||
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
|
||||
|
||||
/* parse dictionary content */
|
||||
CHECK_F( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType,
|
||||
ZSTD_customMem customMem)
|
||||
{
|
||||
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
|
||||
|
||||
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
|
||||
if (ddict == NULL) return NULL;
|
||||
ddict->cMem = customMem;
|
||||
{ size_t const initResult = ZSTD_initDDict_internal(ddict,
|
||||
dict, dictSize,
|
||||
dictLoadMethod, dictContentType);
|
||||
if (ZSTD_isError(initResult)) {
|
||||
ZSTD_freeDDict(ddict);
|
||||
return NULL;
|
||||
} }
|
||||
return ddict;
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_createDDict() :
|
||||
* Create a digested dictionary, to start decompression without startup delay.
|
||||
* `dict` content is copied inside DDict.
|
||||
* Consequently, `dict` can be released after `ZSTD_DDict` creation */
|
||||
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
|
||||
{
|
||||
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
||||
return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
|
||||
}
|
||||
|
||||
/*! ZSTD_createDDict_byReference() :
|
||||
* Create a digested dictionary, to start decompression without startup delay.
|
||||
* Dictionary content is simply referenced, it will be accessed during decompression.
|
||||
* Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
|
||||
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
|
||||
{
|
||||
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
||||
return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
|
||||
}
|
||||
|
||||
|
||||
const ZSTD_DDict* ZSTD_initStaticDDict(
|
||||
void* sBuffer, size_t sBufferSize,
|
||||
const void* dict, size_t dictSize,
|
||||
ZSTD_dictLoadMethod_e dictLoadMethod,
|
||||
ZSTD_dictContentType_e dictContentType)
|
||||
{
|
||||
size_t const neededSpace = sizeof(ZSTD_DDict)
|
||||
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
|
||||
ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
|
||||
assert(sBuffer != NULL);
|
||||
assert(dict != NULL);
|
||||
if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
|
||||
if (sBufferSize < neededSpace) return NULL;
|
||||
if (dictLoadMethod == ZSTD_dlm_byCopy) {
|
||||
memcpy(ddict+1, dict, dictSize); /* local copy */
|
||||
dict = ddict+1;
|
||||
}
|
||||
if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
|
||||
dict, dictSize,
|
||||
ZSTD_dlm_byRef, dictContentType) ))
|
||||
return NULL;
|
||||
return ddict;
|
||||
}
|
||||
|
||||
|
||||
size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
|
||||
{
|
||||
if (ddict==NULL) return 0; /* support free on NULL */
|
||||
{ ZSTD_customMem const cMem = ddict->cMem;
|
||||
ZSTD_free(ddict->dictBuffer, cMem);
|
||||
ZSTD_free(ddict, cMem);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*! ZSTD_estimateDDictSize() :
|
||||
* Estimate amount of memory that will be needed to create a dictionary for decompression.
|
||||
* Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
|
||||
size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
|
||||
{
|
||||
return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
|
||||
}
|
||||
|
||||
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
|
||||
{
|
||||
if (ddict==NULL) return 0; /* support sizeof on NULL */
|
||||
return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
|
||||
}
|
||||
|
||||
/*! ZSTD_getDictID_fromDDict() :
|
||||
* Provides the dictID of the dictionary loaded into `ddict`.
|
||||
* If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
|
||||
* Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
|
||||
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
|
||||
{
|
||||
if (ddict==NULL) return 0;
|
||||
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
|
||||
}
|
||||
44
C/zstd/zstd_ddict.h
Normal file
44
C/zstd/zstd_ddict.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ZSTD_DDICT_H
|
||||
#define ZSTD_DDICT_H
|
||||
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd.h" /* ZSTD_DDict, and several public functions */
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
* Interface
|
||||
*********************************************************/
|
||||
|
||||
/* note: several prototypes are already published in `zstd.h` :
|
||||
* ZSTD_createDDict()
|
||||
* ZSTD_createDDict_byReference()
|
||||
* ZSTD_createDDict_advanced()
|
||||
* ZSTD_freeDDict()
|
||||
* ZSTD_initStaticDDict()
|
||||
* ZSTD_sizeof_DDict()
|
||||
* ZSTD_estimateDDictSize()
|
||||
* ZSTD_getDictID_fromDict()
|
||||
*/
|
||||
|
||||
const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
|
||||
size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
|
||||
|
||||
void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
|
||||
|
||||
|
||||
|
||||
#endif /* ZSTD_DDICT_H */
|
||||
File diff suppressed because it is too large
Load Diff
1307
C/zstd/zstd_decompress_block.c
Normal file
1307
C/zstd/zstd_decompress_block.c
Normal file
File diff suppressed because it is too large
Load Diff
59
C/zstd/zstd_decompress_block.h
Normal file
59
C/zstd/zstd_decompress_block.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ZSTD_DEC_BLOCK_H
|
||||
#define ZSTD_DEC_BLOCK_H
|
||||
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "zstd.h" /* DCtx, and some public functions */
|
||||
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
||||
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
||||
|
||||
|
||||
/* === Prototypes === */
|
||||
|
||||
/* note: prototypes already published within `zstd.h` :
|
||||
* ZSTD_decompressBlock()
|
||||
*/
|
||||
|
||||
/* note: prototypes already published within `zstd_internal.h` :
|
||||
* ZSTD_getcBlockSize()
|
||||
* ZSTD_decodeSeqHeaders()
|
||||
*/
|
||||
|
||||
|
||||
/* ZSTD_decompressBlock_internal() :
|
||||
* decompress block, starting at `src`,
|
||||
* into destination buffer `dst`.
|
||||
* @return : decompressed block size,
|
||||
* or an error code (which can be tested using ZSTD_isError())
|
||||
*/
|
||||
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize, const int frame);
|
||||
|
||||
/* ZSTD_buildFSETable() :
|
||||
* generate FSE decoding table for one symbol (ll, ml or off)
|
||||
* this function must be called with valid parameters only
|
||||
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
||||
* in which case it cannot fail.
|
||||
* Internal use only.
|
||||
*/
|
||||
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
unsigned tableLog);
|
||||
|
||||
|
||||
#endif /* ZSTD_DEC_BLOCK_H */
|
||||
168
C/zstd/zstd_decompress_internal.h
Normal file
168
C/zstd/zstd_decompress_internal.h
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/* zstd_decompress_internal:
|
||||
* objects and definitions shared within lib/decompress modules */
|
||||
|
||||
#ifndef ZSTD_DECOMPRESS_INTERNAL_H
|
||||
#define ZSTD_DECOMPRESS_INTERNAL_H
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "mem.h" /* BYTE, U16, U32 */
|
||||
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
||||
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
* Constants
|
||||
*********************************************************/
|
||||
static const U32 LL_base[MaxLL+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 18, 20, 22, 24, 28, 32, 40,
|
||||
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
||||
0x2000, 0x4000, 0x8000, 0x10000 };
|
||||
|
||||
static const U32 OF_base[MaxOff+1] = {
|
||||
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
||||
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
||||
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
||||
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
||||
|
||||
static const U32 OF_bits[MaxOff+1] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31 };
|
||||
|
||||
static const U32 ML_base[MaxML+1] = {
|
||||
3, 4, 5, 6, 7, 8, 9, 10,
|
||||
11, 12, 13, 14, 15, 16, 17, 18,
|
||||
19, 20, 21, 22, 23, 24, 25, 26,
|
||||
27, 28, 29, 30, 31, 32, 33, 34,
|
||||
35, 37, 39, 41, 43, 47, 51, 59,
|
||||
67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
|
||||
0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
* Decompression types
|
||||
*********************************************************/
|
||||
typedef struct {
|
||||
U32 fastMode;
|
||||
U32 tableLog;
|
||||
} ZSTD_seqSymbol_header;
|
||||
|
||||
typedef struct {
|
||||
U16 nextState;
|
||||
BYTE nbAdditionalBits;
|
||||
BYTE nbBits;
|
||||
U32 baseValue;
|
||||
} ZSTD_seqSymbol;
|
||||
|
||||
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
||||
|
||||
typedef struct {
|
||||
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
||||
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
||||
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
||||
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
||||
U32 rep[ZSTD_REP_NUM];
|
||||
} ZSTD_entropyDTables_t;
|
||||
|
||||
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
||||
ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
|
||||
ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
|
||||
ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
|
||||
|
||||
typedef enum { zdss_init=0, zdss_loadHeader,
|
||||
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
|
||||
|
||||
struct ZSTD_DCtx_s
|
||||
{
|
||||
const ZSTD_seqSymbol* LLTptr;
|
||||
const ZSTD_seqSymbol* MLTptr;
|
||||
const ZSTD_seqSymbol* OFTptr;
|
||||
const HUF_DTable* HUFptr;
|
||||
ZSTD_entropyDTables_t entropy;
|
||||
U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
|
||||
const void* previousDstEnd; /* detect continuity */
|
||||
const void* prefixStart; /* start of current segment */
|
||||
const void* virtualStart; /* virtual start of previous segment if it was just before current one */
|
||||
const void* dictEnd; /* end of previous segment */
|
||||
size_t expected;
|
||||
ZSTD_frameHeader fParams;
|
||||
U64 decodedSize;
|
||||
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
||||
ZSTD_dStage stage;
|
||||
U32 litEntropy;
|
||||
U32 fseEntropy;
|
||||
XXH64_state_t xxhState;
|
||||
size_t headerSize;
|
||||
ZSTD_format_e format;
|
||||
const BYTE* litPtr;
|
||||
ZSTD_customMem customMem;
|
||||
size_t litSize;
|
||||
size_t rleSize;
|
||||
size_t staticSize;
|
||||
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
||||
|
||||
/* dictionary */
|
||||
ZSTD_DDict* ddictLocal;
|
||||
const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
|
||||
U32 dictID;
|
||||
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
||||
|
||||
/* streaming */
|
||||
ZSTD_dStreamStage streamStage;
|
||||
char* inBuff;
|
||||
size_t inBuffSize;
|
||||
size_t inPos;
|
||||
size_t maxWindowSize;
|
||||
char* outBuff;
|
||||
size_t outBuffSize;
|
||||
size_t outStart;
|
||||
size_t outEnd;
|
||||
size_t lhSize;
|
||||
void* legacyContext;
|
||||
U32 previousLegacyVersion;
|
||||
U32 legacyVersion;
|
||||
U32 hostageByte;
|
||||
int noForwardProgress;
|
||||
|
||||
/* workspace */
|
||||
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
||||
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
||||
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
||||
|
||||
|
||||
/*-*******************************************************
|
||||
* Shared internal functions
|
||||
*********************************************************/
|
||||
|
||||
/*! ZSTD_loadDEntropy() :
|
||||
* dict : must point at beginning of a valid zstd dictionary.
|
||||
* @return : size of entropy tables read */
|
||||
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
||||
const void* const dict, size_t const dictSize);
|
||||
|
||||
/*! ZSTD_checkContinuity() :
|
||||
* check if next `dst` follows previous position, where decompression ended.
|
||||
* If yes, do nothing (continue on current segment).
|
||||
* If not, classify previous segment as "external dictionary", and start a new segment.
|
||||
* This function cannot fail. */
|
||||
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
||||
|
||||
|
||||
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
||||
@@ -18,7 +18,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashLarge = ms->hashTable;
|
||||
U32 const hBitsL = cParams->hashLog;
|
||||
U32 const mls = cParams->searchLength;
|
||||
U32 const mls = cParams->minMatch;
|
||||
U32* const hashSmall = ms->chainTable;
|
||||
U32 const hBitsS = cParams->chainLog;
|
||||
const BYTE* const base = ms->window.base;
|
||||
@@ -309,7 +309,7 @@ size_t ZSTD_compressBlock_doubleFast(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
const U32 mls = ms->cParams.searchLength;
|
||||
const U32 mls = ms->cParams.minMatch;
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
@@ -329,7 +329,7 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
const U32 mls = ms->cParams.searchLength;
|
||||
const U32 mls = ms->cParams.minMatch;
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
@@ -483,7 +483,7 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
U32 const mls = ms->cParams.searchLength;
|
||||
U32 const mls = ms->cParams.minMatch;
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
|
||||
@@ -72,6 +72,7 @@ typedef enum {
|
||||
ZSTD_error_workSpace_tooSmall= 66,
|
||||
ZSTD_error_dstSize_tooSmall = 70,
|
||||
ZSTD_error_srcSize_wrong = 72,
|
||||
ZSTD_error_dstBuffer_null = 74,
|
||||
/* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
|
||||
ZSTD_error_frameIndex_tooLarge = 100,
|
||||
ZSTD_error_seekableIO = 102,
|
||||
|
||||
@@ -18,7 +18,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const hashTable = ms->hashTable;
|
||||
U32 const hBits = cParams->hashLog;
|
||||
U32 const mls = cParams->searchLength;
|
||||
U32 const mls = cParams->minMatch;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* ip = base + ms->nextToUpdate;
|
||||
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
|
||||
@@ -27,18 +27,18 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
|
||||
/* Always insert every fastHashFillStep position into the hash table.
|
||||
* Insert the other positions if their hash entry is empty.
|
||||
*/
|
||||
for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
|
||||
for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
|
||||
U32 const current = (U32)(ip - base);
|
||||
U32 i;
|
||||
for (i = 0; i < fastHashFillStep; ++i) {
|
||||
size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
|
||||
if (i == 0 || hashTable[hash] == 0)
|
||||
hashTable[hash] = current + i;
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
if (dtlm == ZSTD_dtlm_fast)
|
||||
break;
|
||||
}
|
||||
}
|
||||
size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
|
||||
hashTable[hash0] = current;
|
||||
if (dtlm == ZSTD_dtlm_fast) continue;
|
||||
/* Only load extra positions for ZSTD_dtlm_full */
|
||||
{ U32 p;
|
||||
for (p = 1; p < fastHashFillStep; ++p) {
|
||||
size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
|
||||
if (hashTable[hash] == 0) { /* not yet filled */
|
||||
hashTable[hash] = current + p;
|
||||
} } } }
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
@@ -235,7 +235,7 @@ size_t ZSTD_compressBlock_fast(
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32 const mls = cParams->searchLength;
|
||||
U32 const mls = cParams->minMatch;
|
||||
assert(ms->dictMatchState == NULL);
|
||||
switch(mls)
|
||||
{
|
||||
@@ -256,7 +256,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32 const mls = cParams->searchLength;
|
||||
U32 const mls = cParams->minMatch;
|
||||
assert(ms->dictMatchState != NULL);
|
||||
switch(mls)
|
||||
{
|
||||
@@ -375,7 +375,7 @@ size_t ZSTD_compressBlock_fast_extDict(
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
ZSTD_compressionParameters const* cParams = &ms->cParams;
|
||||
U32 const mls = cParams->searchLength;
|
||||
U32 const mls = cParams->minMatch;
|
||||
switch(mls)
|
||||
{
|
||||
default: /* includes case 3 */
|
||||
|
||||
@@ -41,6 +41,9 @@ extern "C" {
|
||||
|
||||
/* ---- static assert (debug) --- */
|
||||
#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
|
||||
#define ZSTD_isError ERR_isError /* for inlining */
|
||||
#define FSE_isError ERR_isError
|
||||
#define HUF_isError ERR_isError
|
||||
|
||||
|
||||
/*-*************************************
|
||||
@@ -75,7 +78,6 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
|
||||
#define BIT0 1
|
||||
|
||||
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
|
||||
#define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */
|
||||
static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
|
||||
static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
|
||||
|
||||
@@ -242,7 +244,7 @@ typedef struct {
|
||||
blockType_e blockType;
|
||||
U32 lastBlock;
|
||||
U32 origSize;
|
||||
} blockProperties_t;
|
||||
} blockProperties_t; /* declared here for decompress and fullbench */
|
||||
|
||||
/*! ZSTD_getcBlockSize() :
|
||||
* Provides the size of compressed block from block header `src` */
|
||||
@@ -250,6 +252,13 @@ typedef struct {
|
||||
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
blockProperties_t* bpPtr);
|
||||
|
||||
/*! ZSTD_decodeSeqHeaders() :
|
||||
* decode sequence header from src */
|
||||
/* Used by: decompress, fullbench (does not get its definition from here) */
|
||||
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -63,12 +63,13 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
|
||||
static void
|
||||
ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
U32 current, const BYTE* inputEnd,
|
||||
U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode)
|
||||
U32 nbCompares, U32 btLow,
|
||||
const ZSTD_dictMode_e dictMode)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32* const bt = ms->chainTable;
|
||||
U32 const btLog = cParams->chainLog - 1;
|
||||
U32 const btMask = (1 << btLog) - 1;
|
||||
U32* const bt = ms->chainTable;
|
||||
U32 const btLog = cParams->chainLog - 1;
|
||||
U32 const btMask = (1 << btLog) - 1;
|
||||
size_t commonLengthSmaller=0, commonLengthLarger=0;
|
||||
const BYTE* const base = ms->window.base;
|
||||
const BYTE* const dictBase = ms->window.dictBase;
|
||||
@@ -80,7 +81,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
const BYTE* match;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 matchIndex = *smallerPtr;
|
||||
U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
|
||||
@@ -93,6 +94,9 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
assert(matchIndex < current);
|
||||
/* note : all candidates are now supposed sorted,
|
||||
* but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
|
||||
* when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
|
||||
|
||||
if ( (dictMode != ZSTD_extDict)
|
||||
|| (matchIndex+matchLength >= dictLimit) /* both in current segment*/
|
||||
@@ -108,7 +112,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
|
||||
match = dictBase + matchIndex;
|
||||
matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
|
||||
if (matchIndex+matchLength >= dictLimit)
|
||||
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
||||
match = base + matchIndex; /* preparation for next read of match[matchLength] */
|
||||
}
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
|
||||
@@ -258,7 +262,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
&& (nbCandidates > 1) ) {
|
||||
DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
|
||||
matchIndex);
|
||||
*unsortedMark = previousCandidate;
|
||||
*unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
|
||||
previousCandidate = matchIndex;
|
||||
matchIndex = *nextCandidate;
|
||||
nextCandidate = bt + 2*(matchIndex&btMask);
|
||||
@@ -266,11 +270,13 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
nbCandidates --;
|
||||
}
|
||||
|
||||
/* nullify last candidate if it's still unsorted
|
||||
* simplification, detrimental to compression ratio, beneficial for speed */
|
||||
if ( (matchIndex > unsortLimit)
|
||||
&& (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
|
||||
DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
|
||||
matchIndex);
|
||||
*nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
|
||||
*nextCandidate = *unsortedMark = 0;
|
||||
}
|
||||
|
||||
/* batch sort stacked candidates */
|
||||
@@ -285,14 +291,14 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
|
||||
}
|
||||
|
||||
/* find longest match */
|
||||
{ size_t commonLengthSmaller=0, commonLengthLarger=0;
|
||||
{ size_t commonLengthSmaller = 0, commonLengthLarger = 0;
|
||||
const BYTE* const dictBase = ms->window.dictBase;
|
||||
const U32 dictLimit = ms->window.dictLimit;
|
||||
const BYTE* const dictEnd = dictBase + dictLimit;
|
||||
const BYTE* const prefixStart = base + dictLimit;
|
||||
U32* smallerPtr = bt + 2*(current&btMask);
|
||||
U32* largerPtr = bt + 2*(current&btMask) + 1;
|
||||
U32 matchEndIdx = current+8+1;
|
||||
U32 matchEndIdx = current + 8 + 1;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
size_t bestLength = 0;
|
||||
|
||||
@@ -386,7 +392,7 @@ ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
size_t* offsetPtr)
|
||||
{
|
||||
switch(ms->cParams.searchLength)
|
||||
switch(ms->cParams.minMatch)
|
||||
{
|
||||
default : /* includes case 3 */
|
||||
case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
|
||||
@@ -402,7 +408,7 @@ static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
size_t* offsetPtr)
|
||||
{
|
||||
switch(ms->cParams.searchLength)
|
||||
switch(ms->cParams.minMatch)
|
||||
{
|
||||
default : /* includes case 3 */
|
||||
case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
|
||||
@@ -418,7 +424,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
size_t* offsetPtr)
|
||||
{
|
||||
switch(ms->cParams.searchLength)
|
||||
switch(ms->cParams.minMatch)
|
||||
{
|
||||
default : /* includes case 3 */
|
||||
case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
|
||||
@@ -433,7 +439,7 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
|
||||
/* *********************************
|
||||
* Hash Chain
|
||||
***********************************/
|
||||
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
|
||||
#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
|
||||
|
||||
/* Update chains up to ip (excluded)
|
||||
Assumption : always within prefix (i.e. not within extDict) */
|
||||
@@ -463,7 +469,7 @@ static U32 ZSTD_insertAndFindFirstIndex_internal(
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.searchLength);
|
||||
return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
|
||||
}
|
||||
|
||||
|
||||
@@ -497,6 +503,7 @@ size_t ZSTD_HcFindBestMatch_generic (
|
||||
size_t currentMl=0;
|
||||
if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
|
||||
const BYTE* const match = base + matchIndex;
|
||||
assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
|
||||
if (match[ml] == ip[ml]) /* potentially better */
|
||||
currentMl = ZSTD_count(ip, match, iLimit);
|
||||
} else {
|
||||
@@ -559,7 +566,7 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
size_t* offsetPtr)
|
||||
{
|
||||
switch(ms->cParams.searchLength)
|
||||
switch(ms->cParams.minMatch)
|
||||
{
|
||||
default : /* includes case 3 */
|
||||
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
|
||||
@@ -575,7 +582,7 @@ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
size_t* offsetPtr)
|
||||
{
|
||||
switch(ms->cParams.searchLength)
|
||||
switch(ms->cParams.minMatch)
|
||||
{
|
||||
default : /* includes case 3 */
|
||||
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
|
||||
@@ -591,7 +598,7 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
|
||||
const BYTE* ip, const BYTE* const iLimit,
|
||||
size_t* offsetPtr)
|
||||
{
|
||||
switch(ms->cParams.searchLength)
|
||||
switch(ms->cParams.minMatch)
|
||||
{
|
||||
default : /* includes case 3 */
|
||||
case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
|
||||
|
||||
@@ -37,8 +37,8 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
|
||||
params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
|
||||
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
|
||||
}
|
||||
if (params->hashEveryLog == 0) {
|
||||
params->hashEveryLog = params->windowLog < params->hashLog
|
||||
if (params->hashRateLog == 0) {
|
||||
params->hashRateLog = params->windowLog < params->hashLog
|
||||
? 0
|
||||
: params->windowLog - params->hashLog;
|
||||
}
|
||||
@@ -119,20 +119,20 @@ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
|
||||
*
|
||||
* Gets the small hash, checksum, and tag from the rollingHash.
|
||||
*
|
||||
* If the tag matches (1 << ldmParams.hashEveryLog)-1, then
|
||||
* If the tag matches (1 << ldmParams.hashRateLog)-1, then
|
||||
* creates an ldmEntry from the offset, and inserts it into the hash table.
|
||||
*
|
||||
* hBits is the length of the small hash, which is the most significant hBits
|
||||
* of rollingHash. The checksum is the next 32 most significant bits, followed
|
||||
* by ldmParams.hashEveryLog bits that make up the tag. */
|
||||
* by ldmParams.hashRateLog bits that make up the tag. */
|
||||
static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
|
||||
U64 const rollingHash,
|
||||
U32 const hBits,
|
||||
U32 const offset,
|
||||
ldmParams_t const ldmParams)
|
||||
{
|
||||
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog);
|
||||
U32 const tagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
|
||||
U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
|
||||
U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
|
||||
if (tag == tagMask) {
|
||||
U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
|
||||
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
|
||||
@@ -143,56 +143,6 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
|
||||
}
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_getRollingHash() :
|
||||
* Get a 64-bit hash using the first len bytes from buf.
|
||||
*
|
||||
* Giving bytes s = s_1, s_2, ... s_k, the hash is defined to be
|
||||
* H(s) = s_1*(a^(k-1)) + s_2*(a^(k-2)) + ... + s_k*(a^0)
|
||||
*
|
||||
* where the constant a is defined to be prime8bytes.
|
||||
*
|
||||
* The implementation adds an offset to each byte, so
|
||||
* H(s) = (s_1 + HASH_CHAR_OFFSET)*(a^(k-1)) + ... */
|
||||
static U64 ZSTD_ldm_getRollingHash(const BYTE* buf, U32 len)
|
||||
{
|
||||
U64 ret = 0;
|
||||
U32 i;
|
||||
for (i = 0; i < len; i++) {
|
||||
ret *= prime8bytes;
|
||||
ret += buf[i] + LDM_HASH_CHAR_OFFSET;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_ipow() :
|
||||
* Return base^exp. */
|
||||
static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
|
||||
{
|
||||
U64 ret = 1;
|
||||
while (exp) {
|
||||
if (exp & 1) { ret *= base; }
|
||||
exp >>= 1;
|
||||
base *= base;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
|
||||
DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
|
||||
assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
|
||||
return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_updateHash() :
|
||||
* Updates hash by removing toRemove and adding toAdd. */
|
||||
static U64 ZSTD_ldm_updateHash(U64 hash, BYTE toRemove, BYTE toAdd, U64 hashPower)
|
||||
{
|
||||
hash -= ((toRemove + LDM_HASH_CHAR_OFFSET) * hashPower);
|
||||
hash *= prime8bytes;
|
||||
hash += toAdd + LDM_HASH_CHAR_OFFSET;
|
||||
return hash;
|
||||
}
|
||||
|
||||
/** ZSTD_ldm_countBackwardsMatch() :
|
||||
* Returns the number of bytes that match backwards before pIn and pMatch.
|
||||
*
|
||||
@@ -238,6 +188,7 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
|
||||
case ZSTD_btlazy2:
|
||||
case ZSTD_btopt:
|
||||
case ZSTD_btultra:
|
||||
case ZSTD_btultra2:
|
||||
break;
|
||||
default:
|
||||
assert(0); /* not possible : not a valid strategy id */
|
||||
@@ -261,9 +212,9 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
|
||||
const BYTE* cur = lastHashed + 1;
|
||||
|
||||
while (cur < iend) {
|
||||
rollingHash = ZSTD_ldm_updateHash(rollingHash, cur[-1],
|
||||
cur[ldmParams.minMatchLength-1],
|
||||
state->hashPower);
|
||||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
|
||||
cur[ldmParams.minMatchLength-1],
|
||||
state->hashPower);
|
||||
ZSTD_ldm_makeEntryAndInsertByTag(state,
|
||||
rollingHash, hBits,
|
||||
(U32)(cur - base), ldmParams);
|
||||
@@ -297,8 +248,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
U64 const hashPower = ldmState->hashPower;
|
||||
U32 const hBits = params->hashLog - params->bucketSizeLog;
|
||||
U32 const ldmBucketSize = 1U << params->bucketSizeLog;
|
||||
U32 const hashEveryLog = params->hashEveryLog;
|
||||
U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
|
||||
U32 const hashRateLog = params->hashRateLog;
|
||||
U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
|
||||
/* Prefix and extDict parameters */
|
||||
U32 const dictLimit = ldmState->window.dictLimit;
|
||||
U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
|
||||
@@ -324,16 +275,16 @@ static size_t ZSTD_ldm_generateSequences_internal(
|
||||
size_t forwardMatchLength = 0, backwardMatchLength = 0;
|
||||
ldmEntry_t* bestEntry = NULL;
|
||||
if (ip != istart) {
|
||||
rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
|
||||
lastHashed[minMatchLength],
|
||||
hashPower);
|
||||
rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
|
||||
lastHashed[minMatchLength],
|
||||
hashPower);
|
||||
} else {
|
||||
rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
|
||||
rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
|
||||
}
|
||||
lastHashed = ip;
|
||||
|
||||
/* Do not insert and do not look for a match */
|
||||
if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
|
||||
if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
|
||||
ip++;
|
||||
continue;
|
||||
}
|
||||
@@ -593,7 +544,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
||||
void const* src, size_t srcSize)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
unsigned const minMatch = cParams->searchLength;
|
||||
unsigned const minMatch = cParams->minMatch;
|
||||
ZSTD_blockCompressor const blockCompressor =
|
||||
ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
|
||||
/* Input bounds */
|
||||
|
||||
@@ -21,7 +21,7 @@ extern "C" {
|
||||
* Long distance matching
|
||||
***************************************/
|
||||
|
||||
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX
|
||||
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
|
||||
|
||||
/**
|
||||
* ZSTD_ldm_generateSequences():
|
||||
@@ -86,12 +86,8 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params);
|
||||
*/
|
||||
size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
|
||||
|
||||
/** ZSTD_ldm_getTableSize() :
|
||||
* Return prime8bytes^(minMatchLength-1) */
|
||||
U64 ZSTD_ldm_getHashPower(U32 minMatchLength);
|
||||
|
||||
/** ZSTD_ldm_adjustParameters() :
|
||||
* If the params->hashEveryLog is not set, set it to its default value based on
|
||||
* If the params->hashRateLog is not set, set it to its default value based on
|
||||
* windowLog and params->hashLog.
|
||||
*
|
||||
* Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
|
||||
#define ZSTD_MAX_PRICE (1<<30)
|
||||
|
||||
#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Price functions for optimal parser
|
||||
@@ -52,11 +54,15 @@ MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
|
||||
return weight;
|
||||
}
|
||||
|
||||
/* debugging function, @return price in bytes */
|
||||
#if (DEBUGLEVEL>=2)
|
||||
/* debugging function,
|
||||
* @return price in bytes as fractional value
|
||||
* for debug messages only */
|
||||
MEM_STATIC double ZSTD_fCost(U32 price)
|
||||
{
|
||||
return (double)price / (BITCOST_MULTIPLIER*8);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
||||
{
|
||||
@@ -67,29 +73,44 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
|
||||
}
|
||||
|
||||
|
||||
static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
|
||||
/* ZSTD_downscaleStat() :
|
||||
* reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
|
||||
* return the resulting sum of elements */
|
||||
static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
|
||||
{
|
||||
U32 s, sum=0;
|
||||
DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
|
||||
assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
|
||||
for (s=0; s<=lastEltIndex; s++) {
|
||||
for (s=0; s<lastEltIndex+1; s++) {
|
||||
table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
|
||||
sum += table[s];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static void ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
const BYTE* const src, size_t const srcSize,
|
||||
int optLevel)
|
||||
/* ZSTD_rescaleFreqs() :
|
||||
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
|
||||
* take hints from dictionary if there is one
|
||||
* or init from zero, using src for literals stats, or flat 1 for match symbols
|
||||
* otherwise downscale existing stats, to be used as seed for next block.
|
||||
*/
|
||||
static void
|
||||
ZSTD_rescaleFreqs(optState_t* const optPtr,
|
||||
const BYTE* const src, size_t const srcSize,
|
||||
int const optLevel)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
|
||||
optPtr->priceType = zop_dynamic;
|
||||
|
||||
if (optPtr->litLengthSum == 0) { /* first block : init */
|
||||
if (srcSize <= 1024) /* heuristic */
|
||||
if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */
|
||||
DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
|
||||
optPtr->priceType = zop_predef;
|
||||
}
|
||||
|
||||
assert(optPtr->symbolCosts != NULL);
|
||||
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
|
||||
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
|
||||
/* huffman table presumed generated by dictionary */
|
||||
optPtr->priceType = zop_dynamic;
|
||||
|
||||
assert(optPtr->litFreq != NULL);
|
||||
@@ -208,7 +229,9 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
|
||||
|
||||
/* dynamic statistics */
|
||||
{ U32 const llCode = ZSTD_LLcode(litLength);
|
||||
return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
|
||||
return (LL_bits[llCode] * BITCOST_MULTIPLIER)
|
||||
+ optPtr->litLengthSumBasePrice
|
||||
- WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -253,7 +276,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
|
||||
FORCE_INLINE_TEMPLATE U32
|
||||
ZSTD_getMatchPrice(U32 const offset,
|
||||
U32 const matchLength,
|
||||
const optState_t* const optPtr,
|
||||
const optState_t* const optPtr,
|
||||
int const optLevel)
|
||||
{
|
||||
U32 price;
|
||||
@@ -385,7 +408,6 @@ static U32 ZSTD_insertBt1(
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
U32 const matchLow = windowLow ? windowLow : 1;
|
||||
U32 matchEndIdx = current+8+1;
|
||||
size_t bestLength = 8;
|
||||
U32 nbCompares = 1U << cParams->searchLog;
|
||||
@@ -401,7 +423,8 @@ static U32 ZSTD_insertBt1(
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
hashTable[h] = current; /* Update Hash Table */
|
||||
|
||||
while (nbCompares-- && (matchIndex >= matchLow)) {
|
||||
assert(windowLow > 0);
|
||||
while (nbCompares-- && (matchIndex >= windowLow)) {
|
||||
U32* const nextPtr = bt + 2*(matchIndex & btMask);
|
||||
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
|
||||
assert(matchIndex < current);
|
||||
@@ -479,7 +502,7 @@ void ZSTD_updateTree_internal(
|
||||
const BYTE* const base = ms->window.base;
|
||||
U32 const target = (U32)(ip - base);
|
||||
U32 idx = ms->nextToUpdate;
|
||||
DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
||||
DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
||||
idx, target, dictMode);
|
||||
|
||||
while(idx < target)
|
||||
@@ -488,15 +511,18 @@ void ZSTD_updateTree_internal(
|
||||
}
|
||||
|
||||
void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
||||
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict);
|
||||
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
U32 ZSTD_insertBtAndGetAllMatches (
|
||||
ZSTD_matchState_t* ms,
|
||||
const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
|
||||
U32 rep[ZSTD_REP_NUM], U32 const ll0,
|
||||
ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
|
||||
U32 rep[ZSTD_REP_NUM],
|
||||
U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
|
||||
ZSTD_match_t* matches,
|
||||
const U32 lengthToBeat,
|
||||
U32 const mls /* template */)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||
@@ -542,6 +568,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
|
||||
DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
|
||||
|
||||
/* check repCode */
|
||||
assert(ll0 <= 1); /* necessarily 1 or 0 */
|
||||
{ U32 const lastR = ZSTD_REP_NUM + ll0;
|
||||
U32 repCode;
|
||||
for (repCode = ll0; repCode < lastR; repCode++) {
|
||||
@@ -724,7 +751,7 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
|
||||
ZSTD_match_t* matches, U32 const lengthToBeat)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
U32 const matchLengthSearch = cParams->searchLength;
|
||||
U32 const matchLengthSearch = cParams->minMatch;
|
||||
DEBUGLOG(8, "ZSTD_BtGetAllMatches");
|
||||
if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
|
||||
ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
|
||||
@@ -774,12 +801,30 @@ static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
||||
return sol.litlen + sol.mlen;
|
||||
}
|
||||
|
||||
#if 0 /* debug */
|
||||
|
||||
static void
|
||||
listStats(const U32* table, int lastEltID)
|
||||
{
|
||||
int const nbElts = lastEltID + 1;
|
||||
int enb;
|
||||
for (enb=0; enb < nbElts; enb++) {
|
||||
(void)table;
|
||||
//RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
|
||||
RAWLOG(2, "%4i,", table[enb]);
|
||||
}
|
||||
RAWLOG(2, " \n");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
seqStore_t* seqStore,
|
||||
U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize,
|
||||
const int optLevel, const ZSTD_dictMode_e dictMode)
|
||||
const void* src, size_t srcSize,
|
||||
const int optLevel,
|
||||
const ZSTD_dictMode_e dictMode)
|
||||
{
|
||||
optState_t* const optStatePtr = &ms->opt;
|
||||
const BYTE* const istart = (const BYTE*)src;
|
||||
@@ -792,14 +837,15 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
|
||||
U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
|
||||
U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
|
||||
U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
|
||||
|
||||
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
||||
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
||||
ZSTD_optimal_t lastSequence;
|
||||
|
||||
/* init */
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
|
||||
(U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
|
||||
assert(optLevel <= 2);
|
||||
ms->nextToUpdate3 = ms->nextToUpdate;
|
||||
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
|
||||
@@ -999,7 +1045,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
||||
U32 const offCode = opt[storePos].off;
|
||||
U32 const advance = llen + mlen;
|
||||
DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
|
||||
anchor - istart, llen, mlen);
|
||||
anchor - istart, (unsigned)llen, (unsigned)mlen);
|
||||
|
||||
if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
|
||||
assert(storePos == storeEnd); /* must be last sequence */
|
||||
@@ -1047,11 +1093,11 @@ size_t ZSTD_compressBlock_btopt(
|
||||
|
||||
|
||||
/* used in 2-pass strategy */
|
||||
static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
|
||||
static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
|
||||
{
|
||||
U32 s, sum=0;
|
||||
assert(ZSTD_FREQ_DIV+bonus > 0);
|
||||
for (s=0; s<=lastEltIndex; s++) {
|
||||
assert(ZSTD_FREQ_DIV+bonus >= 0);
|
||||
for (s=0; s<lastEltIndex+1; s++) {
|
||||
table[s] <<= ZSTD_FREQ_DIV+bonus;
|
||||
table[s]--;
|
||||
sum += table[s];
|
||||
@@ -1063,9 +1109,43 @@ static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
|
||||
MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
|
||||
{
|
||||
optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
|
||||
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
|
||||
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
|
||||
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
|
||||
optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
|
||||
optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
|
||||
optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
|
||||
}
|
||||
|
||||
/* ZSTD_initStats_ultra():
|
||||
* make a first compression pass, just to seed stats with more accurate starting values.
|
||||
* only works on first block, with no dictionary and no ldm.
|
||||
* this function cannot error, hence its constract must be respected.
|
||||
*/
|
||||
static void
|
||||
ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
||||
seqStore_t* seqStore,
|
||||
U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
|
||||
memcpy(tmpRep, rep, sizeof(tmpRep));
|
||||
|
||||
DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
|
||||
assert(ms->opt.litLengthSum == 0); /* first block */
|
||||
assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */
|
||||
assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */
|
||||
assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
|
||||
|
||||
ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
|
||||
|
||||
/* invalidate first scan from history */
|
||||
ZSTD_resetSeqStore(seqStore);
|
||||
ms->window.base -= srcSize;
|
||||
ms->window.dictLimit += (U32)srcSize;
|
||||
ms->window.lowLimit = ms->window.dictLimit;
|
||||
ms->nextToUpdate = ms->window.dictLimit;
|
||||
ms->nextToUpdate3 = ms->window.dictLimit;
|
||||
|
||||
/* re-inforce weight of collected statistics */
|
||||
ZSTD_upscaleStats(&ms->opt);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btultra(
|
||||
@@ -1073,33 +1153,34 @@ size_t ZSTD_compressBlock_btultra(
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
|
||||
#if 0
|
||||
/* 2-pass strategy (disabled)
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
|
||||
}
|
||||
|
||||
size_t ZSTD_compressBlock_btultra2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 const current = (U32)((const BYTE*)src - ms->window.base);
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
|
||||
|
||||
/* 2-pass strategy:
|
||||
* this strategy makes a first pass over first block to collect statistics
|
||||
* and seed next round's statistics with it.
|
||||
* After 1st pass, function forgets everything, and starts a new block.
|
||||
* Consequently, this can only work if no data has been previously loaded in tables,
|
||||
* aka, no dictionary, no prefix, no ldm preprocessing.
|
||||
* The compression ratio gain is generally small (~0.5% on first block),
|
||||
* the cost is 2x cpu time on first block. */
|
||||
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
||||
if ( (ms->opt.litLengthSum==0) /* first block */
|
||||
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
||||
&& (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
|
||||
U32 tmpRep[ZSTD_REP_NUM];
|
||||
DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
|
||||
assert(ms->nextToUpdate >= ms->window.dictLimit
|
||||
&& ms->nextToUpdate <= ms->window.dictLimit + 1);
|
||||
memcpy(tmpRep, rep, sizeof(tmpRep));
|
||||
ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
|
||||
ZSTD_resetSeqStore(seqStore);
|
||||
/* invalidate first scan from history */
|
||||
ms->window.base -= srcSize;
|
||||
ms->window.dictLimit += (U32)srcSize;
|
||||
ms->window.lowLimit = ms->window.dictLimit;
|
||||
ms->nextToUpdate = ms->window.dictLimit;
|
||||
ms->nextToUpdate3 = ms->window.dictLimit;
|
||||
/* re-inforce weight of collected statistics */
|
||||
ZSTD_upscaleStats(&ms->opt);
|
||||
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
||||
&& (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */
|
||||
&& (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */
|
||||
&& (srcSize > ZSTD_PREDEF_THRESHOLD)
|
||||
) {
|
||||
ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
|
||||
}
|
||||
|
||||
@@ -1130,3 +1211,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
|
||||
{
|
||||
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
|
||||
}
|
||||
|
||||
/* note : no btultra2 variant for extDict nor dictMatchState,
|
||||
* because btultra2 is not meant to work with dictionaries
|
||||
* and is only specific for the first block (no prefix) */
|
||||
|
||||
@@ -26,6 +26,10 @@ size_t ZSTD_compressBlock_btopt(
|
||||
size_t ZSTD_compressBlock_btultra(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
size_t ZSTD_compressBlock_btultra2(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
|
||||
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
@@ -41,6 +45,10 @@ size_t ZSTD_compressBlock_btultra_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
/* note : no btultra2 variant for extDict nor dictMatchState,
|
||||
* because btultra2 is not meant to work with dictionaries
|
||||
* and is only specific for the first block (no prefix) */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -240,17 +240,7 @@ MEM_STATIC size_t MEM_readLEST(const void* memPtr)
|
||||
/* *************************************
|
||||
* Types
|
||||
***************************************/
|
||||
#define ZSTD_WINDOWLOG_MAX 26
|
||||
#define ZSTD_WINDOWLOG_MIN 18
|
||||
#define ZSTD_WINDOWLOG_ABSOLUTEMIN 11
|
||||
#define ZSTD_CONTENTLOG_MAX (ZSTD_WINDOWLOG_MAX+1)
|
||||
#define ZSTD_CONTENTLOG_MIN 4
|
||||
#define ZSTD_HASHLOG_MAX 28
|
||||
#define ZSTD_HASHLOG_MIN 4
|
||||
#define ZSTD_SEARCHLOG_MAX (ZSTD_CONTENTLOG_MAX-1)
|
||||
#define ZSTD_SEARCHLOG_MIN 1
|
||||
#define ZSTD_SEARCHLENGTH_MAX 7
|
||||
#define ZSTD_SEARCHLENGTH_MIN 4
|
||||
|
||||
/** from faster to stronger */
|
||||
typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2 } ZSTD_strategy;
|
||||
|
||||
@@ -836,7 +836,7 @@ MEM_STATIC void BITv05_skipBits(BITv05_DStream_t* bitD, U32 nbBits)
|
||||
bitD->bitsConsumed += nbBits;
|
||||
}
|
||||
|
||||
MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits)
|
||||
MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, unsigned nbBits)
|
||||
{
|
||||
size_t value = BITv05_lookBits(bitD, nbBits);
|
||||
BITv05_skipBits(bitD, nbBits);
|
||||
@@ -845,7 +845,7 @@ MEM_STATIC size_t BITv05_readBits(BITv05_DStream_t* bitD, U32 nbBits)
|
||||
|
||||
/*!BITv05_readBitsFast :
|
||||
* unsafe version; only works only if nbBits >= 1 */
|
||||
MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, U32 nbBits)
|
||||
MEM_STATIC size_t BITv05_readBitsFast(BITv05_DStream_t* bitD, unsigned nbBits)
|
||||
{
|
||||
size_t value = BITv05_lookBitsFast(bitD, nbBits);
|
||||
BITv05_skipBits(bitD, nbBits);
|
||||
@@ -1162,7 +1162,7 @@ MEM_STATIC unsigned FSEv05_endOfDState(const FSEv05_DState_t* DStatePtr)
|
||||
/* **************************************************************
|
||||
* Complex types
|
||||
****************************************************************/
|
||||
typedef U32 DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)];
|
||||
typedef unsigned DTable_max_t[FSEv05_DTABLE_SIZE_U32(FSEv05_MAX_TABLELOG)];
|
||||
|
||||
|
||||
/* **************************************************************
|
||||
@@ -2191,7 +2191,7 @@ static void HUFv05_fillDTableX4(HUFv05_DEltX4* DTable, const U32 targetLog,
|
||||
}
|
||||
}
|
||||
|
||||
size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
|
||||
size_t HUFv05_readDTableX4 (unsigned* DTable, const void* src, size_t srcSize)
|
||||
{
|
||||
BYTE weightList[HUFv05_MAX_SYMBOL_VALUE + 1];
|
||||
sortedSymbol_t sortedSymbol[HUFv05_MAX_SYMBOL_VALUE + 1];
|
||||
@@ -2205,7 +2205,7 @@ size_t HUFv05_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
|
||||
void* dtPtr = DTable;
|
||||
HUFv05_DEltX4* const dt = ((HUFv05_DEltX4*)dtPtr) + 1;
|
||||
|
||||
HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */
|
||||
HUFv05_STATIC_ASSERT(sizeof(HUFv05_DEltX4) == sizeof(unsigned)); /* if compilation fails here, assertion is false */
|
||||
if (memLog > HUFv05_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
||||
//memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */
|
||||
|
||||
@@ -2332,7 +2332,7 @@ static inline size_t HUFv05_decodeStreamX4(BYTE* p, BITv05_DStream_t* bitDPtr, B
|
||||
size_t HUFv05_decompress1X4_usingDTable(
|
||||
void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const U32* DTable)
|
||||
const unsigned* DTable)
|
||||
{
|
||||
const BYTE* const istart = (const BYTE*) cSrc;
|
||||
BYTE* const ostart = (BYTE*) dst;
|
||||
@@ -2375,7 +2375,7 @@ size_t HUFv05_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t
|
||||
size_t HUFv05_decompress4X4_usingDTable(
|
||||
void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const U32* DTable)
|
||||
const unsigned* DTable)
|
||||
{
|
||||
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
||||
|
||||
@@ -2999,7 +2999,7 @@ static size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t
|
||||
const BYTE* ip = istart;
|
||||
const BYTE* const iend = istart + srcSize;
|
||||
U32 LLtype, Offtype, MLtype;
|
||||
U32 LLlog, Offlog, MLlog;
|
||||
unsigned LLlog, Offlog, MLlog;
|
||||
size_t dumpsLength;
|
||||
|
||||
/* check */
|
||||
@@ -3057,7 +3057,7 @@ static size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t
|
||||
break;
|
||||
case FSEv05_ENCODING_DYNAMIC :
|
||||
default : /* impossible */
|
||||
{ U32 max = MaxLL;
|
||||
{ unsigned max = MaxLL;
|
||||
headerSize = FSEv05_readNCount(norm, &max, &LLlog, ip, iend-ip);
|
||||
if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
|
||||
if (LLlog > LLFSEv05Log) return ERROR(corruption_detected);
|
||||
@@ -3081,7 +3081,7 @@ static size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t
|
||||
break;
|
||||
case FSEv05_ENCODING_DYNAMIC :
|
||||
default : /* impossible */
|
||||
{ U32 max = MaxOff;
|
||||
{ unsigned max = MaxOff;
|
||||
headerSize = FSEv05_readNCount(norm, &max, &Offlog, ip, iend-ip);
|
||||
if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
|
||||
if (Offlog > OffFSEv05Log) return ERROR(corruption_detected);
|
||||
@@ -3105,7 +3105,7 @@ static size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t
|
||||
break;
|
||||
case FSEv05_ENCODING_DYNAMIC :
|
||||
default : /* impossible */
|
||||
{ U32 max = MaxML;
|
||||
{ unsigned max = MaxML;
|
||||
headerSize = FSEv05_readNCount(norm, &max, &MLlog, ip, iend-ip);
|
||||
if (FSEv05_isError(headerSize)) return ERROR(GENERIC);
|
||||
if (MLlog > MLFSEv05Log) return ERROR(corruption_detected);
|
||||
@@ -3305,9 +3305,9 @@ static size_t ZSTDv05_decompressSequences(
|
||||
const BYTE* const litEnd = litPtr + dctx->litSize;
|
||||
int nbSeq=0;
|
||||
const BYTE* dumps = NULL;
|
||||
U32* DTableLL = dctx->LLTable;
|
||||
U32* DTableML = dctx->MLTable;
|
||||
U32* DTableOffb = dctx->OffTable;
|
||||
unsigned* DTableLL = dctx->LLTable;
|
||||
unsigned* DTableML = dctx->MLTable;
|
||||
unsigned* DTableOffb = dctx->OffTable;
|
||||
const BYTE* const base = (const BYTE*) (dctx->base);
|
||||
const BYTE* const vBase = (const BYTE*) (dctx->vBase);
|
||||
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
|
||||
@@ -3633,7 +3633,7 @@ static size_t ZSTDv05_loadEntropy(ZSTDv05_DCtx* dctx, const void* dict, size_t d
|
||||
{
|
||||
size_t hSize, offcodeHeaderSize, matchlengthHeaderSize, errorCode, litlengthHeaderSize;
|
||||
short offcodeNCount[MaxOff+1];
|
||||
U32 offcodeMaxValue=MaxOff, offcodeLog;
|
||||
unsigned offcodeMaxValue=MaxOff, offcodeLog;
|
||||
short matchlengthNCount[MaxML+1];
|
||||
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
|
||||
short litlengthNCount[MaxLL+1];
|
||||
|
||||
@@ -9,21 +9,19 @@
|
||||
*/
|
||||
|
||||
|
||||
/* ====== Tuning parameters ====== */
|
||||
#define ZSTDMT_NBWORKERS_MAX 200
|
||||
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
|
||||
#define ZSTDMT_OVERLAPLOG_DEFAULT 6
|
||||
|
||||
|
||||
/* ====== Compiler specifics ====== */
|
||||
#if defined(_MSC_VER)
|
||||
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
|
||||
#endif
|
||||
|
||||
|
||||
/* ====== Constants ====== */
|
||||
#define ZSTDMT_OVERLAPLOG_DEFAULT 0
|
||||
|
||||
|
||||
/* ====== Dependencies ====== */
|
||||
#include <string.h> /* memcpy, memset */
|
||||
#include <limits.h> /* INT_MAX */
|
||||
#include <limits.h> /* INT_MAX, UINT_MAX */
|
||||
#include "pool.h" /* threadpool */
|
||||
#include "threading.h" /* mutex */
|
||||
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
|
||||
@@ -57,9 +55,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
|
||||
static clock_t _ticksPerSecond = 0;
|
||||
if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
|
||||
|
||||
{ struct tms junk; clock_t newTicks = (clock_t) times(&junk);
|
||||
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
|
||||
}
|
||||
{ struct tms junk; clock_t newTicks = (clock_t) times(&junk);
|
||||
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
|
||||
} }
|
||||
|
||||
#define MUTEX_WAIT_TIME_DLEVEL 6
|
||||
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
|
||||
@@ -342,8 +340,8 @@ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
|
||||
|
||||
typedef struct {
|
||||
ZSTD_pthread_mutex_t poolMutex;
|
||||
unsigned totalCCtx;
|
||||
unsigned availCCtx;
|
||||
int totalCCtx;
|
||||
int availCCtx;
|
||||
ZSTD_customMem cMem;
|
||||
ZSTD_CCtx* cctx[1]; /* variable size */
|
||||
} ZSTDMT_CCtxPool;
|
||||
@@ -351,16 +349,16 @@ typedef struct {
|
||||
/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
|
||||
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
|
||||
{
|
||||
unsigned u;
|
||||
for (u=0; u<pool->totalCCtx; u++)
|
||||
ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
|
||||
int cid;
|
||||
for (cid=0; cid<pool->totalCCtx; cid++)
|
||||
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
|
||||
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
|
||||
ZSTD_free(pool, pool->cMem);
|
||||
}
|
||||
|
||||
/* ZSTDMT_createCCtxPool() :
|
||||
* implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
|
||||
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
|
||||
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
|
||||
ZSTD_customMem cMem)
|
||||
{
|
||||
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
|
||||
@@ -381,7 +379,7 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
|
||||
}
|
||||
|
||||
static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
|
||||
unsigned nbWorkers)
|
||||
int nbWorkers)
|
||||
{
|
||||
if (srcPool==NULL) return NULL;
|
||||
if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
|
||||
@@ -469,9 +467,9 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool*
|
||||
DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
|
||||
ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams);
|
||||
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
|
||||
assert(params.ldmParams.hashEveryLog < 32);
|
||||
assert(params.ldmParams.hashRateLog < 32);
|
||||
serialState->ldmState.hashPower =
|
||||
ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
|
||||
ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
|
||||
} else {
|
||||
memset(¶ms.ldmParams, 0, sizeof(params.ldmParams));
|
||||
}
|
||||
@@ -674,7 +672,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
|
||||
if (ZSTD_isError(initError)) JOB_ERROR(initError);
|
||||
} else { /* srcStart points at reloaded section */
|
||||
U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
|
||||
{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
|
||||
{ size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
|
||||
if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
|
||||
}
|
||||
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
|
||||
@@ -777,6 +775,14 @@ typedef struct {
|
||||
|
||||
static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
|
||||
|
||||
#define RSYNC_LENGTH 32
|
||||
|
||||
typedef struct {
|
||||
U64 hash;
|
||||
U64 hitMask;
|
||||
U64 primePower;
|
||||
} rsyncState_t;
|
||||
|
||||
struct ZSTDMT_CCtx_s {
|
||||
POOL_ctx* factory;
|
||||
ZSTDMT_jobDescription* jobs;
|
||||
@@ -790,6 +796,7 @@ struct ZSTDMT_CCtx_s {
|
||||
inBuff_t inBuff;
|
||||
roundBuff_t roundBuff;
|
||||
serialState_t serial;
|
||||
rsyncState_t rsync;
|
||||
unsigned singleBlockingThread;
|
||||
unsigned jobIDMask;
|
||||
unsigned doneJobID;
|
||||
@@ -859,7 +866,7 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
|
||||
{
|
||||
if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
|
||||
params->nbWorkers = nbWorkers;
|
||||
params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
|
||||
params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
|
||||
params->jobSize = 0;
|
||||
return nbWorkers;
|
||||
}
|
||||
@@ -969,52 +976,59 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
|
||||
}
|
||||
|
||||
/* Internal only */
|
||||
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
||||
ZSTDMT_parameter parameter, unsigned value) {
|
||||
size_t
|
||||
ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
|
||||
ZSTDMT_parameter parameter,
|
||||
int value)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
|
||||
switch(parameter)
|
||||
{
|
||||
case ZSTDMT_p_jobSize :
|
||||
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value);
|
||||
if ( (value > 0) /* value==0 => automatic job size */
|
||||
& (value < ZSTDMT_JOBSIZE_MIN) )
|
||||
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
|
||||
if ( value != 0 /* default */
|
||||
&& value < ZSTDMT_JOBSIZE_MIN)
|
||||
value = ZSTDMT_JOBSIZE_MIN;
|
||||
if (value > ZSTDMT_JOBSIZE_MAX)
|
||||
value = ZSTDMT_JOBSIZE_MAX;
|
||||
assert(value >= 0);
|
||||
if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
|
||||
params->jobSize = value;
|
||||
return value;
|
||||
case ZSTDMT_p_overlapSectionLog :
|
||||
if (value > 9) value = 9;
|
||||
DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
|
||||
params->overlapSizeLog = (value >= 9) ? 9 : value;
|
||||
|
||||
case ZSTDMT_p_overlapLog :
|
||||
DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
|
||||
if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
|
||||
if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
|
||||
params->overlapLog = value;
|
||||
return value;
|
||||
|
||||
case ZSTDMT_p_rsyncable :
|
||||
value = (value != 0);
|
||||
params->rsyncable = value;
|
||||
return value;
|
||||
|
||||
default :
|
||||
return ERROR(parameter_unsupported);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value)
|
||||
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
|
||||
{
|
||||
DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
|
||||
switch(parameter)
|
||||
{
|
||||
case ZSTDMT_p_jobSize :
|
||||
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
||||
case ZSTDMT_p_overlapSectionLog :
|
||||
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
||||
default :
|
||||
return ERROR(parameter_unsupported);
|
||||
}
|
||||
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
|
||||
}
|
||||
|
||||
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
|
||||
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
|
||||
{
|
||||
switch (parameter) {
|
||||
case ZSTDMT_p_jobSize:
|
||||
*value = mtctx->params.jobSize;
|
||||
assert(mtctx->params.jobSize <= INT_MAX);
|
||||
*value = (int)(mtctx->params.jobSize);
|
||||
break;
|
||||
case ZSTDMT_p_overlapSectionLog:
|
||||
*value = mtctx->params.overlapSizeLog;
|
||||
case ZSTDMT_p_overlapLog:
|
||||
*value = mtctx->params.overlapLog;
|
||||
break;
|
||||
case ZSTDMT_p_rsyncable:
|
||||
*value = mtctx->params.rsyncable;
|
||||
break;
|
||||
default:
|
||||
return ERROR(parameter_unsupported);
|
||||
@@ -1140,22 +1154,66 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
|
||||
/* ===== Multi-threaded compression ===== */
|
||||
/* ------------------------------------------ */
|
||||
|
||||
static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
|
||||
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
|
||||
{
|
||||
if (params.ldmParams.enableLdm)
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on chainLog instead. */
|
||||
return MAX(21, params.cParams.chainLog + 4);
|
||||
return MAX(20, params.cParams.windowLog + 2);
|
||||
}
|
||||
|
||||
static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params)
|
||||
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
|
||||
{
|
||||
unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
|
||||
if (params.ldmParams.enableLdm)
|
||||
return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog);
|
||||
return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog);
|
||||
switch(strat)
|
||||
{
|
||||
case ZSTD_btultra2:
|
||||
return 9;
|
||||
case ZSTD_btultra:
|
||||
case ZSTD_btopt:
|
||||
return 8;
|
||||
case ZSTD_btlazy2:
|
||||
case ZSTD_lazy2:
|
||||
return 7;
|
||||
case ZSTD_lazy:
|
||||
case ZSTD_greedy:
|
||||
case ZSTD_dfast:
|
||||
case ZSTD_fast:
|
||||
default:;
|
||||
}
|
||||
return 6;
|
||||
}
|
||||
|
||||
static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) {
|
||||
static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
|
||||
{
|
||||
assert(0 <= ovlog && ovlog <= 9);
|
||||
if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
|
||||
return ovlog;
|
||||
}
|
||||
|
||||
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
|
||||
{
|
||||
int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
|
||||
int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
|
||||
assert(0 <= overlapRLog && overlapRLog <= 8);
|
||||
if (params.ldmParams.enableLdm) {
|
||||
/* In Long Range Mode, the windowLog is typically oversized.
|
||||
* In which case, it's preferable to determine the jobSize
|
||||
* based on chainLog instead.
|
||||
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
|
||||
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
|
||||
- overlapRLog;
|
||||
}
|
||||
assert(0 <= ovLog && ovLog <= 30);
|
||||
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
|
||||
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
|
||||
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
|
||||
{
|
||||
assert(nbWorkers>0);
|
||||
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
|
||||
size_t const jobMaxSize = jobSizeTarget << 2;
|
||||
@@ -1178,7 +1236,7 @@ static size_t ZSTDMT_compress_advanced_internal(
|
||||
ZSTD_CCtx_params params)
|
||||
{
|
||||
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
|
||||
size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
|
||||
size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
|
||||
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
|
||||
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
|
||||
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
|
||||
@@ -1289,16 +1347,17 @@ static size_t ZSTDMT_compress_advanced_internal(
|
||||
}
|
||||
|
||||
size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
unsigned overlapLog)
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
int overlapLog)
|
||||
{
|
||||
ZSTD_CCtx_params cctxParams = mtctx->params;
|
||||
cctxParams.cParams = params.cParams;
|
||||
cctxParams.fParams = params.fParams;
|
||||
cctxParams.overlapSizeLog = overlapLog;
|
||||
assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
|
||||
cctxParams.overlapLog = overlapLog;
|
||||
return ZSTDMT_compress_advanced_internal(mtctx,
|
||||
dst, dstCapacity,
|
||||
src, srcSize,
|
||||
@@ -1311,8 +1370,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
const void* src, size_t srcSize,
|
||||
int compressionLevel)
|
||||
{
|
||||
U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
|
||||
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
|
||||
int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
|
||||
params.fParams.contentSizeFlag = 1;
|
||||
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
|
||||
}
|
||||
@@ -1339,8 +1398,8 @@ size_t ZSTDMT_initCStream_internal(
|
||||
if (params.nbWorkers != mtctx->params.nbWorkers)
|
||||
CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
|
||||
|
||||
if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
||||
if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
||||
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
|
||||
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
|
||||
|
||||
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
|
||||
if (mtctx->singleBlockingThread) {
|
||||
@@ -1375,14 +1434,24 @@ size_t ZSTDMT_initCStream_internal(
|
||||
mtctx->cdict = cdict;
|
||||
}
|
||||
|
||||
mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
|
||||
DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
|
||||
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
|
||||
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
|
||||
mtctx->targetSectionSize = params.jobSize;
|
||||
if (mtctx->targetSectionSize == 0) {
|
||||
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
|
||||
}
|
||||
if (params.rsyncable) {
|
||||
/* Aim for the targetsectionSize as the average job size. */
|
||||
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
|
||||
U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
|
||||
assert(jobSizeMB >= 1);
|
||||
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
|
||||
mtctx->rsync.hash = 0;
|
||||
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
|
||||
mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
|
||||
}
|
||||
if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
|
||||
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
|
||||
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
|
||||
DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
|
||||
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
|
||||
{
|
||||
@@ -1818,6 +1887,89 @@ static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
|
||||
return 1;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
size_t toLoad; /* The number of bytes to load from the input. */
|
||||
int flush; /* Boolean declaring if we must flush because we found a synchronization point. */
|
||||
} syncPoint_t;
|
||||
|
||||
/**
|
||||
* Searches through the input for a synchronization point. If one is found, we
|
||||
* will instruct the caller to flush, and return the number of bytes to load.
|
||||
* Otherwise, we will load as many bytes as possible and instruct the caller
|
||||
* to continue as normal.
|
||||
*/
|
||||
static syncPoint_t
|
||||
findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input)
|
||||
{
|
||||
BYTE const* const istart = (BYTE const*)input.src + input.pos;
|
||||
U64 const primePower = mtctx->rsync.primePower;
|
||||
U64 const hitMask = mtctx->rsync.hitMask;
|
||||
|
||||
syncPoint_t syncPoint;
|
||||
U64 hash;
|
||||
BYTE const* prev;
|
||||
size_t pos;
|
||||
|
||||
syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
|
||||
syncPoint.flush = 0;
|
||||
if (!mtctx->params.rsyncable)
|
||||
/* Rsync is disabled. */
|
||||
return syncPoint;
|
||||
if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH)
|
||||
/* Not enough to compute the hash.
|
||||
* We will miss any synchronization points in this RSYNC_LENGTH byte
|
||||
* window. However, since it depends only in the internal buffers, if the
|
||||
* state is already synchronized, we will remain synchronized.
|
||||
* Additionally, the probability that we miss a synchronization point is
|
||||
* low: RSYNC_LENGTH / targetSectionSize.
|
||||
*/
|
||||
return syncPoint;
|
||||
/* Initialize the loop variables. */
|
||||
if (mtctx->inBuff.filled >= RSYNC_LENGTH) {
|
||||
/* We have enough bytes buffered to initialize the hash.
|
||||
* Start scanning at the beginning of the input.
|
||||
*/
|
||||
pos = 0;
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH;
|
||||
hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH);
|
||||
} else {
|
||||
/* We don't have enough bytes buffered to initialize the hash, but
|
||||
* we know we have at least RSYNC_LENGTH bytes total.
|
||||
* Start scanning after the first RSYNC_LENGTH bytes less the bytes
|
||||
* already buffered.
|
||||
*/
|
||||
pos = RSYNC_LENGTH - mtctx->inBuff.filled;
|
||||
prev = (BYTE const*)mtctx->inBuff.buffer.start - pos;
|
||||
hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled);
|
||||
hash = ZSTD_rollingHash_append(hash, istart, pos);
|
||||
}
|
||||
/* Starting with the hash of the previous RSYNC_LENGTH bytes, roll
|
||||
* through the input. If we hit a synchronization point, then cut the
|
||||
* job off, and tell the compressor to flush the job. Otherwise, load
|
||||
* all the bytes and continue as normal.
|
||||
* If we go too long without a synchronization point (targetSectionSize)
|
||||
* then a block will be emitted anyways, but this is okay, since if we
|
||||
* are already synchronized we will remain synchronized.
|
||||
*/
|
||||
for (; pos < syncPoint.toLoad; ++pos) {
|
||||
BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH];
|
||||
/* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */
|
||||
hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower);
|
||||
if ((hash & hitMask) == hitMask) {
|
||||
syncPoint.toLoad = pos + 1;
|
||||
syncPoint.flush = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return syncPoint;
|
||||
}
|
||||
|
||||
size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx)
|
||||
{
|
||||
size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled;
|
||||
if (hintInSize==0) hintInSize = mtctx->targetSectionSize;
|
||||
return hintInSize;
|
||||
}
|
||||
|
||||
/** ZSTDMT_compressStream_generic() :
|
||||
* internal use only - exposed to be invoked from zstd_compress.c
|
||||
@@ -1844,7 +1996,8 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
}
|
||||
|
||||
/* single-pass shortcut (note : synchronous-mode) */
|
||||
if ( (mtctx->nextJobID == 0) /* just started */
|
||||
if ( (!mtctx->params.rsyncable) /* rsyncable mode is disabled */
|
||||
&& (mtctx->nextJobID == 0) /* just started */
|
||||
&& (mtctx->inBuff.filled == 0) /* nothing buffered */
|
||||
&& (!mtctx->jobReady) /* no job already created */
|
||||
&& (endOp == ZSTD_e_end) /* end order */
|
||||
@@ -1876,14 +2029,17 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
||||
DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
|
||||
}
|
||||
if (mtctx->inBuff.buffer.start != NULL) {
|
||||
size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
|
||||
syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input);
|
||||
if (syncPoint.flush && endOp == ZSTD_e_continue) {
|
||||
endOp = ZSTD_e_flush;
|
||||
}
|
||||
assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
|
||||
DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
|
||||
(U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
|
||||
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
|
||||
input->pos += toLoad;
|
||||
mtctx->inBuff.filled += toLoad;
|
||||
forwardInputProgress = toLoad>0;
|
||||
(U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
|
||||
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad);
|
||||
input->pos += syncPoint.toLoad;
|
||||
mtctx->inBuff.filled += syncPoint.toLoad;
|
||||
forwardInputProgress = syncPoint.toLoad>0;
|
||||
}
|
||||
if ((input->pos < input->size) && (endOp == ZSTD_e_end))
|
||||
endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
|
||||
|
||||
@@ -28,6 +28,16 @@
|
||||
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
||||
|
||||
|
||||
/* === Constants === */
|
||||
#ifndef ZSTDMT_NBWORKERS_MAX
|
||||
# define ZSTDMT_NBWORKERS_MAX 200
|
||||
#endif
|
||||
#ifndef ZSTDMT_JOBSIZE_MIN
|
||||
# define ZSTDMT_JOBSIZE_MIN (1 MB)
|
||||
#endif
|
||||
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
|
||||
|
||||
|
||||
/* === Memory management === */
|
||||
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
|
||||
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
||||
@@ -52,6 +62,7 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
||||
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
||||
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
|
||||
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
||||
@@ -60,16 +71,12 @@ ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);
|
||||
|
||||
/* === Advanced functions and parameters === */
|
||||
|
||||
#ifndef ZSTDMT_JOBSIZE_MIN
|
||||
# define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
|
||||
#endif
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
const ZSTD_CDict* cdict,
|
||||
ZSTD_parameters params,
|
||||
unsigned overlapLog);
|
||||
int overlapLog);
|
||||
|
||||
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
||||
const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
|
||||
@@ -84,8 +91,9 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
||||
/* ZSTDMT_parameter :
|
||||
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
|
||||
typedef enum {
|
||||
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
|
||||
ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
||||
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
|
||||
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
||||
ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
|
||||
} ZSTDMT_parameter;
|
||||
|
||||
/* ZSTDMT_setMTCtxParameter() :
|
||||
@@ -93,12 +101,12 @@ typedef enum {
|
||||
* The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
|
||||
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
|
||||
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
|
||||
|
||||
/* ZSTDMT_getMTCtxParameter() :
|
||||
* Query the ZSTDMT_CCtx for a parameter value.
|
||||
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
||||
ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
|
||||
ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
|
||||
|
||||
|
||||
/*! ZSTDMT_compressStream_generic() :
|
||||
@@ -129,7 +137,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
|
||||
|
||||
/*! ZSTDMT_CCtxParam_setMTCtxParameter()
|
||||
* like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
|
||||
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
|
||||
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
|
||||
|
||||
/*! ZSTDMT_CCtxParam_setNbWorkers()
|
||||
* Set nbWorkers, and clamp it.
|
||||
|
||||
Reference in New Issue
Block a user