From ab1004725347997f31f596b3682f36bd2b112fac Mon Sep 17 00:00:00 2001 From: conor42 Date: Mon, 5 Nov 2018 21:22:10 +1000 Subject: [PATCH] Add Fast LZMA2 codec --- .gitignore | 8 + C/fast-lzma2/atomic.h | 82 + C/fast-lzma2/compiler.h | 86 ++ C/fast-lzma2/count.h | 115 ++ C/fast-lzma2/data_block.h | 20 + C/fast-lzma2/fast-lzma2.h | 455 ++++++ C/fast-lzma2/fastpos_table.h | 262 ++++ C/fast-lzma2/fl2_common.c | 61 + C/fast-lzma2/fl2_compress.c | 1028 +++++++++++++ C/fast-lzma2/fl2_compress_internal.h | 100 ++ C/fast-lzma2/fl2_error_private.c | 35 + C/fast-lzma2/fl2_error_private.h | 75 + C/fast-lzma2/fl2_errors.h | 59 + C/fast-lzma2/fl2_internal.h | 113 ++ C/fast-lzma2/fl2pool.c | 201 +++ C/fast-lzma2/fl2pool.h | 60 + C/fast-lzma2/fl2threading.c | 75 + C/fast-lzma2/fl2threading.h | 120 ++ C/fast-lzma2/lzma2_enc.c | 2047 +++++++++++++++++++++++++ C/fast-lzma2/lzma2_enc.h | 64 + C/fast-lzma2/mem.h | 362 +++++ C/fast-lzma2/platform.h | 153 ++ C/fast-lzma2/radix_bitpack.c | 61 + C/fast-lzma2/radix_engine.h | 1075 +++++++++++++ C/fast-lzma2/radix_internal.h | 143 ++ C/fast-lzma2/radix_mf.c | 672 ++++++++ C/fast-lzma2/radix_mf.h | 60 + C/fast-lzma2/radix_struct.c | 62 + C/fast-lzma2/range_enc.c | 101 ++ C/fast-lzma2/range_enc.h | 157 ++ C/fast-lzma2/util.h | 765 +++++++++ CPP/7zip/7zip.mak | 8 + CPP/7zip/Bundles/Alone/makefile | 12 + CPP/7zip/Bundles/Format7z/makefile | 14 + CPP/7zip/Bundles/Format7zF/makefile | 12 + CPP/7zip/Bundles/Format7zFO/makefile | 12 + CPP/7zip/Bundles/Format7zUSB/makefile | 12 + CPP/7zip/Compress/Lzma2Encoder.cpp | 165 ++ CPP/7zip/Compress/Lzma2Encoder.h | 28 + CPP/7zip/Compress/Lzma2Register.cpp | 20 +- CPP/7zip/UI/GUI/CompressDialog.cpp | 108 +- 41 files changed, 9060 insertions(+), 8 deletions(-) create mode 100644 .gitignore create mode 100644 C/fast-lzma2/atomic.h create mode 100644 C/fast-lzma2/compiler.h create mode 100644 C/fast-lzma2/count.h create mode 100644 C/fast-lzma2/data_block.h create mode 100644 C/fast-lzma2/fast-lzma2.h create mode 100644 C/fast-lzma2/fastpos_table.h create mode 100644 C/fast-lzma2/fl2_common.c create mode 100644 C/fast-lzma2/fl2_compress.c create mode 100644 C/fast-lzma2/fl2_compress_internal.h create mode 100644 C/fast-lzma2/fl2_error_private.c create mode 100644 C/fast-lzma2/fl2_error_private.h create mode 100644 C/fast-lzma2/fl2_errors.h create mode 100644 C/fast-lzma2/fl2_internal.h create mode 100644 C/fast-lzma2/fl2pool.c create mode 100644 C/fast-lzma2/fl2pool.h create mode 100644 C/fast-lzma2/fl2threading.c create mode 100644 C/fast-lzma2/fl2threading.h create mode 100644 C/fast-lzma2/lzma2_enc.c create mode 100644 C/fast-lzma2/lzma2_enc.h create mode 100644 C/fast-lzma2/mem.h create mode 100644 C/fast-lzma2/platform.h create mode 100644 C/fast-lzma2/radix_bitpack.c create mode 100644 C/fast-lzma2/radix_engine.h create mode 100644 C/fast-lzma2/radix_internal.h create mode 100644 C/fast-lzma2/radix_mf.c create mode 100644 C/fast-lzma2/radix_mf.h create mode 100644 C/fast-lzma2/radix_struct.c create mode 100644 C/fast-lzma2/range_enc.c create mode 100644 C/fast-lzma2/range_enc.h create mode 100644 C/fast-lzma2/util.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..1a87183d --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ + +*.obj +*.exe +*.res +*.dll +*.pch +*.exp +*.lib diff --git a/C/fast-lzma2/atomic.h b/C/fast-lzma2/atomic.h new file mode 100644 index 00000000..aa4d9778 --- /dev/null +++ b/C/fast-lzma2/atomic.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 Conor McCarthy + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + */ + +#ifndef FL2_ATOMIC_H +#define FL2_ATOMIC_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* atomic add */ + +#if !defined(FL2_SINGLETHREAD) && defined(_WIN32) + +#ifdef WINVER +#undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +#undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#include + + +typedef LONG volatile FL2_atomic; +#define ATOMIC_INITIAL_VALUE -1 +#define FL2_atomic_increment(n) InterlockedIncrement(&n) +#define FL2_atomic_add(n, a) InterlockedAdd(&n, a) +#define FL2_nonAtomic_increment(n) (++n) + +#elif !defined(FL2_SINGLETHREAD) && defined(__GNUC__) + +typedef long FL2_atomic; +#define ATOMIC_INITIAL_VALUE 0 +#define FL2_atomic_increment(n) __sync_fetch_and_add(&n, 1) +#define FL2_atomic_add(n, a) __sync_fetch_and_add(&n, a) +#define FL2_nonAtomic_increment(n) (n++) + +#elif !defined(FL2_SINGLETHREAD) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) /* C11 */ + +#include + +typedef _Atomic long FL2_atomic; +#define ATOMIC_INITIAL_VALUE 0 +#define FL2_atomic_increment(n) atomic_fetch_add(&n, 1) +#define FL2_atomic_add(n, a) atomic_fetch_add(&n, a) +#define FL2_nonAtomic_increment(n) (n++) + +#else /* No atomics */ + +# ifndef FL2_SINGLETHREAD +# error No atomic operations available. Change compiler config or define FL2_SINGLETHREAD for the entire build. +# endif + +typedef long FL2_atomic; +#define ATOMIC_INITIAL_VALUE 0 +#define FL2_atomic_increment(n) (n++) +#define FL2_atomic_add(n, a) (n += (a)) +#define FL2_nonAtomic_increment(n) (n++) + +#endif /* FL2_SINGLETHREAD */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* FL2_ATOMIC_H */ diff --git a/C/fast-lzma2/compiler.h b/C/fast-lzma2/compiler.h new file mode 100644 index 00000000..dc3bfff3 --- /dev/null +++ b/C/fast-lzma2/compiler.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ +#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# ifdef __GNUC__ +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + +/* prefetch */ +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) +#elif defined(__GNUC__) +# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) +#else +# define PREFETCH(ptr) /* disabled */ +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +#endif /* ZSTD_COMPILER_H */ diff --git a/C/fast-lzma2/count.h b/C/fast-lzma2/count.h new file mode 100644 index 00000000..77f796a3 --- /dev/null +++ b/C/fast-lzma2/count.h @@ -0,0 +1,115 @@ +#ifndef ZSTD_COUNT_H_ +#define ZSTD_COUNT_H_ + +#include "mem.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes(register size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64(&r, (U64)val); + return (unsigned)(r >> 3); +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } + else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanForward(&r, (U32)val); + return (unsigned)(r >> 3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } + else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64(&r, val); + return (unsigned)(r >> 3); +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t) * 4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val >> n32)) { r = 4; } + else { r = 0; val >>= n32; } + if (!(val >> 16)) { r += 2; val >>= 8; } + else { val >>= 24; } + r += (!val); + return r; +# endif + } + else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse(&r, (unsigned long)val); + return (unsigned)(r >> 3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val >> 16)) { r = 2; val >>= 8; } + else { r = 0; val >>= 24; } + r += (!val); + return r; +# endif + } + } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t) - 1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn += sizeof(size_t); pMatch += sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn += sizeof(size_t); pMatch += sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } + } + if (MEM_64bits() && (pIn<(pInLimit - 3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn += 4; pMatch += 4; } + if ((pIn<(pInLimit - 1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn += 2; pMatch += 2; } + if ((pIn /* size_t */ + + +/* ===== FL2LIB_API : control library symbols visibility ===== */ +#ifndef FL2LIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define FL2LIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define FL2LIB_VISIBILITY +# endif +#endif +#if defined(FL2_DLL_EXPORT) && (FL2_DLL_EXPORT==1) +# define FL2LIB_API __declspec(dllexport) FL2LIB_VISIBILITY +#elif defined(FL2_DLL_IMPORT) && (FL2_DLL_IMPORT==1) +# define FL2LIB_API __declspec(dllimport) FL2LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FL2LIB_API FL2LIB_VISIBILITY +#endif + +/* ====== Calling convention ======*/ + +#if !defined _WIN32 || defined __x86_64__s || defined _M_X64 || (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8) +# define FL2LIB_CALL +#elif defined(__GNUC__) +# define FL2LIB_CALL __attribute__((cdecl)) +#elif defined(_MSC_VER) +# define FL2LIB_CALL __cdecl +#else +# define FL2LIB_CALL +#endif + +/******************************************************************************************************* +Introduction + +*********************************************************************************************************/ + +/*------ Version ------*/ +#define FL2_VERSION_MAJOR 0 +#define FL2_VERSION_MINOR 9 +#define FL2_VERSION_RELEASE 2 + +#define FL2_VERSION_NUMBER (FL2_VERSION_MAJOR *100*100 + FL2_VERSION_MINOR *100 + FL2_VERSION_RELEASE) +FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void); /**< useful to check dll version */ + +#define FL2_LIB_VERSION FL2_VERSION_MAJOR.FL2_VERSION_MINOR.FL2_VERSION_RELEASE +#define FL2_QUOTE(str) #str +#define FL2_EXPAND_AND_QUOTE(str) FL2_QUOTE(str) +#define FL2_VERSION_STRING FL2_EXPAND_AND_QUOTE(FL2_LIB_VERSION) +FL2LIB_API const char* FL2LIB_CALL FL2_versionString(void); + + +/*************************************** +* Simple API +***************************************/ + +#define FL2_MAXTHREADS 200 + +/*! FL2_compress() : + * Compresses `src` content as a single LZMA2 compressed stream into already allocated `dst`. + * Call FL2_compressMt() to use > 1 thread. Specify nbThreads = 0 to use all cores. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using FL2_isError()). */ +FL2LIB_API size_t FL2LIB_CALL FL2_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +FL2LIB_API size_t FL2LIB_CALL FL2_compressMt(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel, + unsigned nbThreads); + +/*! FL2_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using FL2_isError()). */ +FL2LIB_API size_t FL2LIB_CALL FL2_decompress(void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! FL2_findDecompressedSize() + * `src` should point to the start of a LZMA2 encoded stream. + * `srcSize` must be at least as large as the LZMA2 stream including end marker. + * @return : - decompressed size of the stream in `src`, if known + * - FL2_CONTENTSIZE_ERROR if an error occurred (e.g. corruption, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. */ +#define FL2_CONTENTSIZE_ERROR (size_t)-1 +FL2LIB_API size_t FL2LIB_CALL FL2_findDecompressedSize(const void *src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define FL2_COMPRESSBOUND(srcSize) ((srcSize) + (((srcSize) + 0xFFF) / 0x1000) * 3 + 6) /* this formula calculates the maximum size of data stored in uncompressed chunks */ +FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */ +FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code); /*!< provides readable string from an error code */ +FL2LIB_API int FL2LIB_CALL FL2_maxCLevel(void); /*!< maximum compression level available */ +FL2LIB_API int FL2LIB_CALL FL2_maxHighCLevel(void); /*!< maximum compression level available in high mode */ + +/*************************************** +* Explicit memory management +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * The context may not use the number of threads requested if the library is compiled for single-threaded + * compression or nbThreads > FL2_MAXTHREADS. Call FL2_CCtx_nbThreads to obtain the actual number. */ +typedef struct FL2_CCtx_s FL2_CCtx; +FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtx(void); +FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtxMt(unsigned nbThreads); +FL2LIB_API void FL2LIB_CALL FL2_freeCCtx(FL2_CCtx* cctx); + +FL2LIB_API unsigned FL2LIB_CALL FL2_CCtx_nbThreads(const FL2_CCtx* ctx); + +/*! FL2_compressCCtx() : + * Same as FL2_compress(), requires an allocated FL2_CCtx (see FL2_createCCtx()). */ +FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtx(FL2_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/************************************************ +* Caller-managed data buffer and overlap section +************************************************/ + +typedef struct { + unsigned char *data; + size_t start; /* start = 0 (first block) or overlap */ + size_t end; /* never < overlap */ + size_t bufSize; /* allocation size */ +} FL2_blockBuffer; + +typedef int (FL2LIB_CALL *FL2_progressFn)(size_t done, void* opaque); + +/* Get the size of the overlap section. */ +FL2LIB_API size_t FL2LIB_CALL FL2_blockOverlap(const FL2_CCtx* ctx); + +/* Copy the overlap section to the start to prepare for more data */ +FL2LIB_API void FL2LIB_CALL FL2_shiftBlock(FL2_CCtx* ctx, FL2_blockBuffer *block); +/* Copy the overlap to a different buffer. This allows a dual-buffer configuration where + * data is read into one block while the other is compressed. */ +FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(FL2_CCtx* ctx, FL2_blockBuffer *block, unsigned char *dst); + +FL2LIB_API void FL2LIB_CALL FL2_beginFrame(FL2_CCtx* const cctx); + +/*! FL2_compressCCtxBlock() : + * Same as FL2_compressCCtx except the caller is responsible for supplying an overlap section. + * The FL2_p_overlapFraction parameter will not be used. + * srcStart + srcSize should equal the dictionary size except on the last call. + * Can be called multiple times. FL2_endFrame() must be called when finished. + * For compatibility with this library the caller must write a property byte at + * the beginning of the output. Obtain it by calling FL2_dictSizeProp() before + * compressing the first block or after the last. No hash will be written, but + * the caller can calculate it using the interface in xxhash.h, write it at the end, + * and set bit 7 in the property byte. */ +FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock(FL2_CCtx* ctx, + void* dst, size_t dstCapacity, + const FL2_blockBuffer *block, + FL2_progressFn progress, void* opaque); + +/*! FL2_endFrame() : + * Write the end marker to terminate the LZMA2 stream. + * Must be called after compressing with FL2_compressCCtxBlock() */ +FL2LIB_API size_t FL2LIB_CALL FL2_endFrame(FL2_CCtx* ctx, + void* dst, size_t dstCapacity); + +typedef int (FL2LIB_CALL *FL2_writerFn)(const void* src, size_t srcSize, void* opaque); + +/*! FL2_compressCCtxBlock_toFn() : + * Same as FL2_compressCCtx except the caller is responsible for supplying an + * overlap section, and compressed data is written to a callback function. + * The FL2_p_overlapFraction parameter will not be used. + * Can be called multiple times. FL2_endFrame_toFn() must be called when finished. */ +FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock_toFn(FL2_CCtx* ctx, + FL2_writerFn writeFn, void* opaque, + const FL2_blockBuffer *block, + FL2_progressFn progress); + +/*! FL2_endFrame() : + * Write the end marker to a callback function to terminate the LZMA2 stream. + * Must be called after compressing with FL2_compressCCtxBlock_toFn() */ +FL2LIB_API size_t FL2LIB_CALL FL2_endFrame_toFn(FL2_CCtx* ctx, + FL2_writerFn writeFn, void* opaque); + +/*! FL2_dictSizeProp() : + * Get the dictionary size property. + * Intended for use with the FL2_p_omitProperties parameter for creating a + * 7-zip compatible LZMA2 stream. */ +FL2LIB_API unsigned char FL2LIB_CALL FL2_dictSizeProp(FL2_CCtx* ctx); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make the workload friendlier for the system's memory. + * Use one context per thread for parallel execution. */ +typedef struct CLzma2Dec_s FL2_DCtx; +FL2LIB_API FL2_DCtx* FL2LIB_CALL FL2_createDCtx(void); +FL2LIB_API size_t FL2LIB_CALL FL2_freeDCtx(FL2_DCtx* dctx); + +/*! FL2_decompressDCtx() : + * Same as FL2_decompress(), requires an allocated FL2_DCtx (see FL2_createDCtx()) */ +FL2LIB_API size_t FL2LIB_CALL FL2_decompressDCtx(FL2_DCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/**************************** +* Streaming +****************************/ + +typedef struct FL2_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} FL2_inBuffer; + +typedef struct FL2_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} FL2_outBuffer; + + + +/*-*********************************************************************** + * Streaming compression - HowTo + * + * A FL2_CStream object is required to track streaming operation. + * Use FL2_createCStream() and FL2_freeCStream() to create/release resources. + * FL2_CStream objects can be reused multiple times on consecutive compression operations. + * It is recommended to re-use FL2_CStream in situations where many streaming operations will be achieved consecutively, + * since it will play nicer with system's memory, by re-using already allocated memory. + * + * Start a new compression by initializing FL2_CStream. + * Use FL2_initCStream() to start a new compression operation. + * + * Use FL2_compressStream() repetitively to consume input stream. + * The function will automatically update both `pos` fields. + * It will always consume the entire input unless an error occurs, + * unlike the decompression function. + * @return : a size hint - remaining capacity to fill before compression occurs, + * or an error code, which can be tested using FL2_isError(). + * Note : it's just a hint, any other value will work fine. + * + * At any moment, it's possible, but not recommended, to flush whatever data remains + * within internal buffer using FL2_flushStream(). + * `output->pos` will be updated. + * Note 1 : this will reduce compression ratio because the algorithm is block-based. + * Note 2 : some content might still be left within internal buffers if `output->size` is too small. + * @return : nb of bytes still present within internal buffers (0 if they're empty) + * or an error code, which can be tested using FL2_isError(). + * + * FL2_endStream() instructs to finish a frame. + * It will perform a flush and write the LZMA2 termination byte (required). + * FL2_endStream() may not be able to flush full data if `output->size` is too small. + * In which case, call again FL2_endStream() to complete the flush. + * @return : 0 if stream fully completed and flushed, + * or >0 to indicate the nb of bytes still present within the internal buffers, + * or an error code, which can be tested using FL2_isError(). + * + * *******************************************************************/ + +typedef struct FL2_CStream_s FL2_CStream; + +/*===== FL2_CStream management functions =====*/ +FL2LIB_API FL2_CStream* FL2LIB_CALL FL2_createCStream(void); +FL2LIB_API size_t FL2LIB_CALL FL2_freeCStream(FL2_CStream* fcs); + +/*===== Streaming compression functions =====*/ +FL2LIB_API size_t FL2LIB_CALL FL2_initCStream(FL2_CStream* fcs, int compressionLevel); +FL2LIB_API size_t FL2LIB_CALL FL2_compressStream(FL2_CStream* fcs, FL2_outBuffer* output, FL2_inBuffer* input); +FL2LIB_API size_t FL2LIB_CALL FL2_flushStream(FL2_CStream* fcs, FL2_outBuffer* output); +FL2LIB_API size_t FL2LIB_CALL FL2_endStream(FL2_CStream* fcs, FL2_outBuffer* output); + + +/*-*************************************************************************** + * Streaming decompression - HowTo + * + * A FL2_DStream object is required to track streaming operations. + * Use FL2_createDStream() and FL2_freeDStream() to create/release resources. + * FL2_DStream objects can be re-used multiple times. + * + * Use FL2_initDStream() to start a new decompression operation. + * @return : recommended first input size + * + * Use FL2_decompressStream() repetitively to consume your input. + * The function will update both `pos` fields. + * If `input.pos < input.size`, some input has not been consumed. + * It's up to the caller to present again remaining data. + * More data must be loaded if `input.pos + LZMA_REQUIRED_INPUT_MAX >= input.size` + * If `output.pos < output.size`, decoder has flushed everything it could. + * @return : 0 when a frame is completely decoded and fully flushed, + * an error code, which can be tested using FL2_isError(), + * 1, which means there is still some decoding to do to complete current frame. + * *******************************************************************************/ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct FL2_DStream_s FL2_DStream; + +/*===== FL2_DStream management functions =====*/ +FL2LIB_API FL2_DStream* FL2LIB_CALL FL2_createDStream(void); +FL2LIB_API size_t FL2LIB_CALL FL2_freeDStream(FL2_DStream* fds); + +/*===== Streaming decompression functions =====*/ +FL2LIB_API size_t FL2LIB_CALL FL2_initDStream(FL2_DStream* fds); +FL2LIB_API size_t FL2LIB_CALL FL2_decompressStream(FL2_DStream* fds, FL2_outBuffer* output, FL2_inBuffer* input); + +/*-*************************************************************************** + * Compression parameters - HowTo + * + * Any function that takes a 'compressionLevel' parameter will replace any + * parameters affected by compression level that are already set. + * Call FL2_CCtx_setParameter with FL2_p_compressionLevel to set the level, + * then call FL2_CCtx_setParameter again with any other settings to change. + * Specify compressionLevel=0 when calling a compression function. + * *******************************************************************************/ + +#define FL2_DICTLOG_MAX_32 27 +#define FL2_DICTLOG_MAX_64 30 +#define FL2_DICTLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? FL2_DICTLOG_MAX_32 : FL2_DICTLOG_MAX_64)) +#define FL2_DICTLOG_MIN 20 +#define FL2_CHAINLOG_MAX 14 +#define FL2_CHAINLOG_MIN 4 +#define FL2_SEARCHLOG_MAX (FL2_CHAINLOG_MAX-1) +#define FL2_SEARCHLOG_MIN 0 +#define FL2_FASTLENGTH_MIN 6 /* only used by optimizer */ +#define FL2_FASTLENGTH_MAX 273 /* only used by optimizer */ +#define FL2_BLOCK_OVERLAP_MIN 0 +#define FL2_BLOCK_OVERLAP_MAX 14 +#define FL2_BLOCK_LOG_MIN 12 +#define FL2_BLOCK_LOG_MAX 32 +#define FL2_SEARCH_DEPTH_MIN 6 +#define FL2_SEARCH_DEPTH_MAX 254 +#define FL2_BUFFER_SIZE_LOG_MIN 6 +#define FL2_BUFFER_SIZE_LOG_MAX 12 +#define FL2_LC_MIN 0 +#define FL2_LC_MAX 4 +#define FL2_LP_MIN 0 +#define FL2_LP_MAX 4 +#define FL2_PB_MIN 0 +#define FL2_PB_MAX 4 + +typedef enum { + /* compression parameters */ + FL2_p_compressionLevel, /* Update all compression parameters according to pre-defined cLevel table + * Default level is FL2_CLEVEL_DEFAULT==9. + * Setting FL2_p_highCompression to 1 switches to an alternate cLevel table. + * Special: value 0 means "do not change cLevel". */ + FL2_p_highCompression, /* Maximize compression ratio for a given dictionary size. + * Has 9 levels instead of 12, with dictionaryLog 20 - 28. */ + FL2_p_7zLevel, /* For use by the 7-zip fork employing this library. 1 - 9 */ + FL2_p_dictionaryLog, /* Maximum allowed back-reference distance, expressed as power of 2. + * Must be clamped between FL2_DICTLOG_MIN and FL2_DICTLOG_MAX. + * Special: value 0 means "do not change dictionaryLog". */ + FL2_p_overlapFraction, /* The radix match finder is block-based, so some overlap is retained from + * each block to improve compression of the next. This value is expressed + * as n / 16 of the block size (dictionary size). Larger values are slower. + * Values above 2 mostly yield only a small improvement in compression. */ + FL2_p_blockSize, + FL2_p_bufferLog, /* Buffering speeds up the matchfinder. Buffer size is + * 2 ^ (dictionaryLog - bufferLog). Lower number = slower, better compression, + * higher memory usage. */ + FL2_p_chainLog, /* Size of the full-search table, as a power of 2. + * Resulting table size is (1 << (chainLog+2)). + * Larger tables result in better and slower compression. + * This parameter is useless when using "fast" strategy. + * Special: value 0 means "do not change chainLog". */ + FL2_p_searchLog, /* Number of search attempts, as a power of 2, made by the HC3 match finder + * used only in hybrid mode. + * More attempts result in slightly better and slower compression. + * This parameter is not used by the "fast" and "optimize" strategies. + * Special: value 0 means "do not change searchLog". */ + FL2_p_literalCtxBits, /* lc value for LZMA2 encoder */ + FL2_p_literalPosBits, /* lp value for LZMA2 encoder */ + FL2_p_posBits, /* pb value for LZMA2 encoder */ + FL2_p_searchDepth, /* Match finder will resolve string matches up to this length. If a longer + * match exists further back in the input, it will not be found. */ + FL2_p_fastLength, /* Only useful for strategies >= opt. + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger and slower. + * Special: value 0 means "do not change fastLength". */ + FL2_p_divideAndConquer, /* Split long chains of 2-byte matches into shorter chains with a small overlap + * during further processing. Allows buffering of all chains at length 2. + * Faster, less compression. Generally a good tradeoff. Enabled by default. */ + FL2_p_strategy, /* 1 = fast; 2 = optimize, 3 = ultra (hybrid mode). + * The higher the value of the selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "do not change strategy". */ +#ifndef NO_XXHASH + FL2_p_doXXHash, /* Calculate a 32-bit xxhash value from the input data and store it + * after the stream terminator. The value will be checked on decompression. + * 0 = do not calculate; 1 = calculate (default) */ +#endif + FL2_p_omitProperties, /* Omit the property byte at the start of the stream. For use within 7-zip */ + /* or other containers which store the property byte elsewhere. */ + /* Cannot be decoded by this library. */ +#ifdef RMF_REFERENCE + FL2_p_useReferenceMF /* Use the reference matchfinder for development purposes. SLOW. */ +#endif +} FL2_cParameter; + + +/*! FL2_CCtx_setParameter() : + * Set one compression parameter, selected by enum FL2_cParameter. + * @result : informational value (typically, the one being set, possibly corrected), + * or an error code (which can be tested with FL2_isError()). */ +FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_setParameter(FL2_CCtx* cctx, FL2_cParameter param, unsigned value); +FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cParameter param, unsigned value); + +/*************************************** +* Context memory usage +***************************************/ + +/*! FL2_estimate*() : +* These functions estimate memory usage of a CCtx before its creation or before any operation has begun. +* FL2_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one. +* To use FL2_estimateCCtxSize_usingCCtx, set the compression level and any other settings for the context, +* then call the function. Some allocation occurs when the context is created, but the large memory buffers +* used for string matching are allocated only when compression begins. */ + +FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize(int compressionLevel, unsigned nbThreads); /*!< memory usage determined by level */ +FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize_usingCCtx(const FL2_CCtx* cctx); /*!< memory usage determined by settings */ +FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize(int compressionLevel, unsigned nbThreads); +FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_usingCCtx(const FL2_CStream* fcs); + +#endif /* FAST_LZMA2_H */ + +#if defined (__cplusplus) +} +#endif diff --git a/C/fast-lzma2/fastpos_table.h b/C/fast-lzma2/fastpos_table.h new file mode 100644 index 00000000..fabe2852 --- /dev/null +++ b/C/fast-lzma2/fastpos_table.h @@ -0,0 +1,262 @@ +/* This file has been automatically generated by fastpos_tablegen.c. */ +/* Copied from the XZ project */ + + +static const BYTE distance_table[1 << kFastDistBits] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23 +}; diff --git a/C/fast-lzma2/fl2_common.c b/C/fast-lzma2/fl2_common.c new file mode 100644 index 00000000..85780c56 --- /dev/null +++ b/C/fast-lzma2/fl2_common.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * Modified for FL2 by Conor McCarthy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, calloc, free */ +#include /* memset */ +#include "fast-lzma2.h" +#include "fl2_error_private.h" +#include "fl2_internal.h" + + +/*-**************************************** +* Version +******************************************/ +FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void) { return FL2_VERSION_NUMBER; } + +FL2LIB_API const char* FL2LIB_CALL FL2_versionString(void) { return FL2_VERSION_STRING; } + + +FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize) +{ + return FL2_COMPRESSBOUND(srcSize); +} + +/*-**************************************** +* FL2 Error Management +******************************************/ +/*! FL2_isError() : + * tells if a return value is an error code */ +FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code) { return ERR_isError(code); } + +/*! FL2_getErrorName() : + * provides error code string from function result (useful for debugging) */ +FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! FL2_getError() : + * convert a `size_t` function result into a proper FL2_errorCode enum */ +FL2LIB_API FL2_ErrorCode FL2LIB_CALL FL2_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! FL2_getErrorString() : + * provides error code string from enum */ +FL2LIB_API const char* FL2LIB_CALL FL2_getErrorString(FL2_ErrorCode code) { return ERR_getFL2ErrorString(code); } + +/*! g_debuglog_enable : + * turn on/off debug traces (global switch) */ +#if defined(FL2_DEBUG) && (FL2_DEBUG >= 2) +int g_debuglog_enable = 1; +#endif + diff --git a/C/fast-lzma2/fl2_compress.c b/C/fast-lzma2/fl2_compress.c new file mode 100644 index 00000000..7785364b --- /dev/null +++ b/C/fast-lzma2/fl2_compress.c @@ -0,0 +1,1028 @@ +/* +* Copyright (c) 2018, Conor McCarthy +* All rights reserved. +* Parts based on zstd_compress.c copyright Yann Collet +* +* This source code is licensed under both the BSD-style license (found in the +* LICENSE file in the root directory of this source tree) and the GPLv2 (found +* in the COPYING file in the root directory of this source tree). +* You may select, at your option, one of the above-listed licenses. +*/ + +#include +#include "fast-lzma2.h" +#include "fl2_internal.h" +#include "platform.h" +#include "mem.h" +#include "util.h" +#include "fl2_compress_internal.h" +#include "fl2threading.h" +#include "fl2pool.h" +#include "radix_mf.h" +#include "lzma2_enc.h" + +#define MIN_BYTES_PER_THREAD 0x10000 + +#define ALIGNMENT_MASK (~(size_t)15) + +/*-===== Pre-defined compression levels =====-*/ + +#define FL2_CLEVEL_DEFAULT 9 +#define FL2_MAX_CLEVEL 12 +#define FL2_MAX_7Z_CLEVEL 9 +#define FL2_MAX_HIGH_CLEVEL 9 + +FL2LIB_API int FL2LIB_CALL FL2_maxCLevel(void) +{ + return FL2_MAX_CLEVEL; +} + +FL2LIB_API int FL2LIB_CALL FL2_maxHighCLevel(void) +{ + return FL2_MAX_HIGH_CLEVEL; +} + +static const FL2_compressionParameters FL2_defaultCParameters[FL2_MAX_CLEVEL + 1] = { + { 0,0,0,0,0,0,0 }, + { 20, 1, 7, 0, 6, 32, 1, 8, FL2_fast }, /* 1 */ + { 20, 2, 7, 0, 12, 32, 1, 8, FL2_fast }, /* 2 */ + { 21, 2, 7, 0, 14, 32, 1, 8, FL2_fast }, /* 3 */ + { 20, 2, 7, 0, 12, 32, 1, 8, FL2_opt }, /* 4 */ + { 21, 2, 7, 0, 14, 40, 1, 8, FL2_opt }, /* 5 */ + { 22, 2, 7, 0, 26, 40, 1, 8, FL2_opt }, /* 6 */ + { 23, 2, 8, 0, 42, 48, 1, 8, FL2_opt }, /* 7 */ + { 24, 2, 9, 0, 42, 48, 1, 8, FL2_ultra }, /* 8 */ + { 25, 2, 10, 0, 50, 64, 1, 8, FL2_ultra }, /* 9 */ + { 26, 2, 11, 1, 60, 64, 1, 9, FL2_ultra }, /* 10 */ + { 27, 2, 12, 2, 126, 96, 1, 10, FL2_ultra }, /* 11 */ + { 28, 2, 14, 3, 254, 160, 1, 10, FL2_ultra } /* 12 */ +}; + +static const FL2_compressionParameters FL2_7zCParameters[FL2_MAX_7Z_CLEVEL + 1] = { + { 0,0,0,0,0,0,0 }, + { 20, 1, 7, 0, 6, 32, 1, 8, FL2_fast }, /* 1 */ + { 20, 2, 7, 0, 12, 32, 1, 8, FL2_fast }, /* 2 */ + { 21, 2, 7, 0, 16, 32, 1, 8, FL2_fast }, /* 3 */ + { 20, 2, 7, 0, 16, 32, 1, 8, FL2_opt }, /* 4 */ + { 24, 2, 9, 0, 40, 48, 1, 8, FL2_ultra }, /* 5 */ + { 25, 2, 10, 0, 48, 64, 1, 8, FL2_ultra }, /* 6 */ + { 26, 2, 11, 1, 60, 96, 1, 9, FL2_ultra }, /* 7 */ + { 27, 2, 12, 2, 128, 128, 1, 10, FL2_ultra }, /* 8 */ + { 27, 3, 14, 3, 252, 160, 0, 10, FL2_ultra } /* 9 */ +}; + +static const FL2_compressionParameters FL2_highCParameters[FL2_MAX_HIGH_CLEVEL + 1] = { + { 0,0,0,0,0,0,0 }, + { 20, 3, 9, 1, 60, 128, 0, 8, FL2_ultra }, /* 1 */ + { 21, 3, 10, 1, 60, 128, 0, 8, FL2_ultra }, /* 2 */ + { 22, 3, 11, 2, 60, 128, 0, 8, FL2_ultra }, /* 3 */ + { 23, 3, 12, 2, 60, 128, 0, 8, FL2_ultra }, /* 4 */ + { 24, 3, 13, 3, 60, 128, 0, 8, FL2_ultra }, /* 5 */ + { 25, 3, 14, 3, 60, 160, 0, 8, FL2_ultra }, /* 6 */ + { 26, 3, 14, 4, 60, 160, 0, 8, FL2_ultra }, /* 7 */ + { 27, 3, 14, 4, 128, 160, 0, 8, FL2_ultra }, /* 8 */ + { 28, 3, 14, 5, 128, 160, 0, 9, FL2_ultra } /* 9 */ +}; + +void FL2_fillParameters(FL2_CCtx* const cctx, const FL2_compressionParameters* const params) +{ + FL2_lzma2Parameters* const cParams = &cctx->params.cParams; + RMF_parameters* const rParams = &cctx->params.rParams; + cParams->lc = 3; + cParams->lp = 0; + cParams->pb = 2; + cParams->fast_length = params->fastLength; + cParams->match_cycles = 1U << params->searchLog; + cParams->strategy = params->strategy; + cParams->second_dict_bits = params->chainLog; + cParams->random_filter = 0; + rParams->dictionary_log = MIN(params->dictionaryLog, FL2_DICTLOG_MAX); /* allow for reduced dict in 32-bit version */ + rParams->match_buffer_log = params->bufferLog; + rParams->overlap_fraction = params->overlapFraction; + rParams->block_size_log = rParams->dictionary_log + 2; + rParams->divide_and_conquer = params->divideAndConquer; + rParams->depth = params->searchDepth; +} + +FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtx(void) +{ + return FL2_createCCtxMt(1); +} + +FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtxMt(unsigned nbThreads) +{ + FL2_CCtx* cctx; + +#ifndef FL2_SINGLETHREAD + if (!nbThreads) { + nbThreads = UTIL_countPhysicalCores(); + nbThreads += !nbThreads; + } + if (nbThreads > FL2_MAXTHREADS) { + nbThreads = FL2_MAXTHREADS; + } +#else + nbThreads = 1; +#endif + + DEBUGLOG(3, "FL2_createCCtxMt : %u threads", nbThreads); + + cctx = malloc(sizeof(FL2_CCtx) + (nbThreads - 1) * sizeof(FL2_job)); + if (cctx == NULL) + return NULL; + + cctx->jobCount = nbThreads; + for (unsigned u = 0; u < nbThreads; ++u) { + cctx->jobs[u].enc = NULL; + } + + cctx->params.highCompression = 0; + FL2_CCtx_setParameter(cctx, FL2_p_compressionLevel, FL2_CLEVEL_DEFAULT); +#ifndef NO_XXHASH + cctx->params.doXXH = 1; +#endif + cctx->params.omitProp = 0; + +#ifdef RMF_REFERENCE + cctx->params.rParams.use_ref_mf = 0; +#endif + + cctx->matchTable = NULL; + +#ifndef FL2_SINGLETHREAD + cctx->factory = FL2POOL_create(nbThreads - 1); + if (nbThreads > 1 && cctx->factory == NULL) { + FL2_freeCCtx(cctx); + return NULL; + } +#endif + + for (unsigned u = 0; u < nbThreads; ++u) { + cctx->jobs[u].enc = FL2_lzma2Create(); + if (cctx->jobs[u].enc == NULL) { + FL2_freeCCtx(cctx); + return NULL; + } + cctx->jobs[u].cctx = cctx; + } + cctx->dictMax = 0; + cctx->block_total = 0; + + return cctx; +} + +FL2LIB_API void FL2LIB_CALL FL2_freeCCtx(FL2_CCtx* cctx) +{ + if (cctx == NULL) + return; + + DEBUGLOG(3, "FL2_freeCCtx : %u threads", cctx->jobCount); + + for (unsigned u = 0; u < cctx->jobCount; ++u) { + FL2_lzma2Free(cctx->jobs[u].enc); + } + +#ifndef FL2_SINGLETHREAD + FL2POOL_free(cctx->factory); +#endif + + RMF_freeMatchTable(cctx->matchTable); + free(cctx); +} + +FL2LIB_API unsigned FL2LIB_CALL FL2_CCtx_nbThreads(const FL2_CCtx* cctx) +{ + return cctx->jobCount; +} + +/* FL2_buildRadixTable() : FL2POOL_function type */ +static void FL2_buildRadixTable(void* const jobDescription, size_t n) +{ + const FL2_job* const job = (FL2_job*)jobDescription; + FL2_CCtx* const cctx = job->cctx; + + RMF_buildTable(cctx->matchTable, n, 1, cctx->curBlock, NULL, NULL, 0, 0); +} + +/* FL2_compressRadixChunk() : FL2POOL_function type */ +static void FL2_compressRadixChunk(void* const jobDescription, size_t n) +{ + const FL2_job* const job = (FL2_job*)jobDescription; + FL2_CCtx* const cctx = job->cctx; + + cctx->jobs[n].cSize = FL2_lzma2Encode(cctx->jobs[n].enc, cctx->matchTable, job->block, &cctx->params.cParams, NULL, NULL, 0, 0); +} + +static int FL2_initEncoders(FL2_CCtx* const cctx) +{ + for(unsigned u = 0; u < cctx->jobCount; ++u) { + if (FL2_lzma2HashAlloc(cctx->jobs[u].enc, &cctx->params.cParams) != 0) + return 1; + } + return 0; +} + +static size_t FL2_compressCurBlock(FL2_CCtx* const cctx, FL2_progressFn progress, void* opaque) +{ + size_t const encodeSize = (cctx->curBlock.end - cctx->curBlock.start); + size_t init_done; + U32 rmf_weight = ZSTD_highbit32((U32)cctx->curBlock.end); + U32 depth_weight = 2 + (cctx->params.rParams.depth >= 12) + (cctx->params.rParams.depth >= 28); + U32 enc_weight; + int err = 0; +#ifndef FL2_SINGLETHREAD + size_t mfThreads = cctx->curBlock.end / RMF_MIN_BYTES_PER_THREAD; + size_t nbThreads = MIN(cctx->jobCount, encodeSize / MIN_BYTES_PER_THREAD); + nbThreads += !nbThreads; +#else + size_t mfThreads = 1; + size_t nbThreads = 1; +#endif + + if (rmf_weight >= 20) { + rmf_weight = depth_weight * (rmf_weight - 10) + (rmf_weight - 19) * 12; + if (cctx->params.cParams.strategy == 0) + enc_weight = 20; + else if (cctx->params.cParams.strategy == 1) + enc_weight = 50; + else + enc_weight = 60 + cctx->params.cParams.second_dict_bits + ZSTD_highbit32(cctx->params.cParams.fast_length) * 3U; + rmf_weight = (rmf_weight << 4) / (rmf_weight + enc_weight); + enc_weight = 16 - rmf_weight; + } + else { + rmf_weight = 8; + enc_weight = 8; + } + + DEBUGLOG(5, "FL2_compressCurBlock : %u threads, %u start, %u bytes", (U32)nbThreads, (U32)cctx->curBlock.start, (U32)encodeSize); + + /* Free unsuitable match table before reallocating anything else */ + if (cctx->matchTable && !RMF_compatibleParameters(cctx->matchTable, &cctx->params.rParams, cctx->curBlock.end)) { + RMF_freeMatchTable(cctx->matchTable); + cctx->matchTable = NULL; + } + + if(FL2_initEncoders(cctx) != 0) /* Create hash objects together, leaving the (large) match table last */ + return FL2_ERROR(memory_allocation); + + if (!cctx->matchTable) { + cctx->matchTable = RMF_createMatchTable(&cctx->params.rParams, cctx->curBlock.end, cctx->jobCount); + if (cctx->matchTable == NULL) + return FL2_ERROR(memory_allocation); + } + else { + DEBUGLOG(5, "Have compatible match table"); + RMF_applyParameters(cctx->matchTable, &cctx->params.rParams, cctx->curBlock.end); + } + + { size_t sliceStart = cctx->curBlock.start; + size_t sliceSize = encodeSize / nbThreads; + cctx->jobs[0].block.data = cctx->curBlock.data; + cctx->jobs[0].block.start = sliceStart; + cctx->jobs[0].block.end = sliceStart + sliceSize; + + for (size_t u = 1; u < nbThreads; ++u) { + sliceStart += sliceSize; + cctx->jobs[u].block.data = cctx->curBlock.data; + cctx->jobs[u].block.start = sliceStart; + cctx->jobs[u].block.end = sliceStart + sliceSize; + } + cctx->jobs[nbThreads - 1].block.end = cctx->curBlock.end; + } + + /* update largest dict size used */ + cctx->dictMax = MAX(cctx->dictMax, cctx->curBlock.end); + + /* initialize to length 2 */ + init_done = RMF_initTable(cctx->matchTable, cctx->curBlock.data, cctx->curBlock.start, cctx->curBlock.end); + +#ifndef FL2_SINGLETHREAD + mfThreads = MIN(RMF_threadCount(cctx->matchTable), mfThreads); + for (size_t u = 1; u < mfThreads; ++u) { + FL2POOL_add(cctx->factory, FL2_buildRadixTable, &cctx->jobs[u], u); + } +#endif + + err = RMF_buildTable(cctx->matchTable, 0, mfThreads > 1, cctx->curBlock, progress, opaque, rmf_weight, init_done); + +#ifndef FL2_SINGLETHREAD + + FL2POOL_waitAll(cctx->factory); + + if (err) + return FL2_ERROR(canceled); + +#ifdef RMF_CHECK_INTEGRITY + err = RMF_integrityCheck(cctx->matchTable, cctx->curBlock.data, cctx->curBlock.start, cctx->curBlock.end, cctx->params.rParams.depth); + if (err) + return FL2_ERROR(internal); +#endif + + for (size_t u = 1; u < nbThreads; ++u) { + FL2POOL_add(cctx->factory, FL2_compressRadixChunk, &cctx->jobs[u], u); + } + + cctx->jobs[0].cSize = FL2_lzma2Encode(cctx->jobs[0].enc, cctx->matchTable, cctx->jobs[0].block, &cctx->params.cParams, progress, opaque, (rmf_weight * encodeSize) >> 4, enc_weight * (U32)nbThreads); + FL2POOL_waitAll(cctx->factory); + +#else /* FL2_SINGLETHREAD */ + + if (err) + return FL2_ERROR(canceled); + +#ifdef RMF_CHECK_INTEGRITY + err = RMF_integrityCheck(cctx->matchTable, cctx->curBlock.data, cctx->curBlock.start, cctx->curBlock.end, cctx->params.rParams.depth); + if (err) + return FL2_ERROR(internal); +#endif + cctx->jobs[0].cSize = FL2_lzma2Encode(cctx->jobs[0].enc, cctx->matchTable, cctx->jobs[0].block, &cctx->params.cParams, progress, opaque, (rmf_weight * encodeSize) >> 4, enc_weight); + +#endif + + return nbThreads; +} + +FL2LIB_API void FL2LIB_CALL FL2_beginFrame(FL2_CCtx* const cctx) +{ + cctx->dictMax = 0; + cctx->block_total = 0; +} + +static size_t FL2_compressBlock(FL2_CCtx* const cctx, + const void* const src, size_t srcStart, size_t const srcEnd, + void* const dst, size_t dstCapacity, + FL2_writerFn const writeFn, void* const opaque, + FL2_progressFn progress) +{ + BYTE* dstBuf = dst; + size_t outSize = 0; + size_t const dictionary_size = (size_t)1 << cctx->params.rParams.dictionary_log; + size_t const block_overlap = OVERLAP_FROM_DICT_LOG(cctx->params.rParams.dictionary_log, cctx->params.rParams.overlap_fraction); + + if (srcStart >= srcEnd) + return 0; + cctx->curBlock.data = src; + cctx->curBlock.start = srcStart; + + while (srcStart < srcEnd) { + size_t nbThreads; + + cctx->curBlock.end = cctx->curBlock.start + MIN(srcEnd - srcStart, dictionary_size - cctx->curBlock.start); + + nbThreads = FL2_compressCurBlock(cctx, progress, opaque); + if (FL2_isError(nbThreads)) + return nbThreads; + + for (size_t u = 0; u < nbThreads; ++u) { + const BYTE* const outBuf = RMF_getTableAsOutputBuffer(cctx->matchTable, cctx->jobs[u].block.start); + + if (FL2_isError(cctx->jobs[u].cSize)) + return cctx->jobs[u].cSize; + + DEBUGLOG(5, "Write thread %u : %u bytes", (U32)u, (U32)cctx->jobs[u].cSize); + + if (writeFn == NULL && dstCapacity < cctx->jobs[u].cSize) { + return FL2_ERROR(dstSize_tooSmall); + } + if (writeFn != NULL) { + if(writeFn(outBuf, cctx->jobs[u].cSize, opaque)) + return FL2_ERROR(write_failed); + outSize += cctx->jobs[u].cSize; + } + else { + memcpy(dstBuf, outBuf, cctx->jobs[u].cSize); + dstBuf += cctx->jobs[u].cSize; + dstCapacity -= cctx->jobs[u].cSize; + } + } + srcStart += cctx->curBlock.end - cctx->curBlock.start; + cctx->block_total += cctx->curBlock.end - cctx->curBlock.start; + if (cctx->params.rParams.block_size_log && cctx->block_total + MIN(cctx->curBlock.end - block_overlap, srcEnd - srcStart) > ((U64)1 << cctx->params.rParams.block_size_log)) { + /* periodically reset the dictionary for mt decompression */ + cctx->curBlock.start = 0; + cctx->block_total = 0; + } + else { + cctx->curBlock.start = block_overlap; + } + cctx->curBlock.data += cctx->curBlock.end - cctx->curBlock.start; + } + return (writeFn != NULL) ? outSize : dstBuf - (const BYTE*)dst; +} + +static BYTE FL2_getProp(FL2_CCtx* cctx, size_t dictionary_size) +{ + return FL2_getDictSizeProp(dictionary_size) +#ifndef NO_XXHASH + | (BYTE)((cctx->params.doXXH != 0) << FL2_PROP_HASH_BIT) +#endif + ; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtx(FL2_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + BYTE* dstBuf = dst; + BYTE* const end = dstBuf + dstCapacity; + size_t cSize = 0; + + if (compressionLevel > 0) + FL2_CCtx_setParameter(cctx, FL2_p_compressionLevel, compressionLevel); + + DEBUGLOG(4, "FL2_compressCCtx : level %u, %u src => %u avail", cctx->params.compressionLevel, (U32)srcSize, (U32)dstCapacity); + + if (dstCapacity < 2U - cctx->params.omitProp) /* empty LZMA2 stream is byte sequence {0, 0} */ + return FL2_ERROR(dstSize_tooSmall); + + FL2_beginFrame(cctx); + + dstBuf += !cctx->params.omitProp; + cSize = FL2_compressBlock(cctx, src, 0, srcSize, dstBuf, end - dstBuf, NULL, NULL, NULL); + if(!cctx->params.omitProp) + dstBuf[-1] = FL2_getProp(cctx, cctx->dictMax); + + if (FL2_isError(cSize)) + return cSize; + + dstBuf += cSize; + if(dstBuf >= end) + return FL2_ERROR(dstSize_tooSmall); + *dstBuf++ = LZMA2_END_MARKER; + +#ifndef NO_XXHASH + if (cctx->params.doXXH && !cctx->params.omitProp) { + XXH32_canonical_t canonical; + DEBUGLOG(5, "Writing hash"); + if(end - dstBuf < XXHASH_SIZEOF) + return FL2_ERROR(dstSize_tooSmall); + XXH32_canonicalFromHash(&canonical, XXH32(src, srcSize, 0)); + memcpy(dstBuf, &canonical, XXHASH_SIZEOF); + dstBuf += XXHASH_SIZEOF; + } +#endif + return dstBuf - (BYTE*)dst; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_blockOverlap(const FL2_CCtx* cctx) +{ + return OVERLAP_FROM_DICT_LOG(cctx->params.rParams.dictionary_log, cctx->params.rParams.overlap_fraction); +} + +FL2LIB_API void FL2LIB_CALL FL2_shiftBlock(FL2_CCtx* cctx, FL2_blockBuffer *block) +{ + FL2_shiftBlock_switch(cctx, block, NULL); +} + +FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(FL2_CCtx* cctx, FL2_blockBuffer *block, unsigned char *dst) +{ + size_t const block_overlap = OVERLAP_FROM_DICT_LOG(cctx->params.rParams.dictionary_log, cctx->params.rParams.overlap_fraction); + + if (block_overlap == 0) { + block->start = 0; + block->end = 0; + } + else if (block->end > block_overlap) { + size_t const from = (block->end - block_overlap) & ALIGNMENT_MASK; + size_t overlap = block->end - from; + + cctx->block_total += block->end - block->start; + if (cctx->params.rParams.block_size_log && cctx->block_total + from > ((U64)1 << cctx->params.rParams.block_size_log)) { + /* periodically reset the dictionary for mt decompression */ + overlap = 0; + cctx->block_total = 0; + } + else if (overlap <= from || dst != NULL) { + DEBUGLOG(5, "Copy overlap data : %u bytes", (U32)overlap); + memcpy(dst ? dst : block->data, block->data + from, overlap); + } + else if (from != 0) { + DEBUGLOG(5, "Move overlap data : %u bytes", (U32)overlap); + memmove(block->data, block->data + from, overlap); + } + block->start = overlap; + block->end = overlap; + } + else { + block->start = block->end; + } +} + +FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock(FL2_CCtx* cctx, + void* dst, size_t dstCapacity, + const FL2_blockBuffer *block, + FL2_progressFn progress, void* opaque) +{ + return FL2_compressBlock(cctx, block->data, block->start, block->end, dst, dstCapacity, NULL, opaque, progress); +} + +FL2LIB_API size_t FL2LIB_CALL FL2_endFrame(FL2_CCtx* ctx, + void* dst, size_t dstCapacity) +{ + if (!dstCapacity) + return FL2_ERROR(dstSize_tooSmall); + *(BYTE*)dst = LZMA2_END_MARKER; + return 1; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock_toFn(FL2_CCtx* cctx, + FL2_writerFn writeFn, void* opaque, + const FL2_blockBuffer *block, + FL2_progressFn progress) +{ + return FL2_compressBlock(cctx, block->data, block->start, block->end, NULL, 0, writeFn, opaque, progress); +} + +FL2LIB_API size_t FL2LIB_CALL FL2_endFrame_toFn(FL2_CCtx* ctx, + FL2_writerFn writeFn, void* opaque) +{ + BYTE c = LZMA2_END_MARKER; + if(writeFn(&c, 1, opaque)) + return FL2_ERROR(write_failed); + return 1; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_compressMt(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel, + unsigned nbThreads) +{ + size_t cSize; + FL2_CCtx* const cctx = FL2_createCCtxMt(nbThreads); + if (cctx == NULL) + return FL2_ERROR(memory_allocation); + + cSize = FL2_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + + FL2_freeCCtx(cctx); + return cSize; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + return FL2_compressMt(dst, dstCapacity, src, srcSize, compressionLevel, 1); +} + +FL2LIB_API BYTE FL2LIB_CALL FL2_dictSizeProp(FL2_CCtx* cctx) +{ + return FL2_getDictSizeProp(cctx->dictMax ? cctx->dictMax : (size_t)1 << cctx->params.rParams.dictionary_log); +} + +#define CLAMPCHECK(val,min,max) { \ + if (((val)<(min)) | ((val)>(max))) { \ + return FL2_ERROR(parameter_outOfBound); \ +} } + +FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_setParameter(FL2_CCtx* cctx, FL2_cParameter param, unsigned value) +{ + switch (param) + { + case FL2_p_compressionLevel: + if (value > 0) { /* 0 : does not change current level */ + if (cctx->params.highCompression) { + if ((int)value > FL2_MAX_HIGH_CLEVEL) value = FL2_MAX_HIGH_CLEVEL; + FL2_fillParameters(cctx, &FL2_highCParameters[value]); + } + else { + if ((int)value > FL2_MAX_CLEVEL) value = FL2_MAX_CLEVEL; + FL2_fillParameters(cctx, &FL2_defaultCParameters[value]); + } + cctx->params.compressionLevel = value; + } + return cctx->params.compressionLevel; + + case FL2_p_highCompression: + if ((int)value >= 0) { /* < 0 : does not change highCompression */ + cctx->params.highCompression = value != 0; + FL2_CCtx_setParameter(cctx, FL2_p_compressionLevel, cctx->params.compressionLevel); + } + return cctx->params.highCompression; + + case FL2_p_7zLevel: + if (value > 0) { /* 0 : does not change current level */ + if ((int)value > FL2_MAX_7Z_CLEVEL) value = FL2_MAX_7Z_CLEVEL; + FL2_fillParameters(cctx, &FL2_7zCParameters[value]); + cctx->params.compressionLevel = value; + } + return cctx->params.compressionLevel; + + case FL2_p_dictionaryLog: + if (value) { /* 0 : does not change current dictionaryLog */ + CLAMPCHECK(value, FL2_DICTLOG_MIN, FL2_DICTLOG_MAX); + cctx->params.rParams.dictionary_log = value; + } + return cctx->params.rParams.dictionary_log; + + case FL2_p_overlapFraction: + if ((int)value >= 0) { /* < 0 : does not change current overlapFraction */ + CLAMPCHECK(value, FL2_BLOCK_OVERLAP_MIN, FL2_BLOCK_OVERLAP_MAX); + cctx->params.rParams.overlap_fraction = value; + } + return cctx->params.rParams.overlap_fraction; + + case FL2_p_blockSize: + if ((int)value >= 0) { /* < 0 : does not change current overlapFraction */ + CLAMPCHECK(value, FL2_BLOCK_LOG_MIN, FL2_BLOCK_LOG_MAX); + cctx->params.rParams.block_size_log = value; + } + return cctx->params.rParams.block_size_log; + + case FL2_p_bufferLog: + if (value) { /* 0 : does not change current bufferLog */ + CLAMPCHECK(value, FL2_BUFFER_SIZE_LOG_MIN, FL2_BUFFER_SIZE_LOG_MAX); + cctx->params.rParams.match_buffer_log = value; + } + return cctx->params.rParams.match_buffer_log; + + case FL2_p_chainLog: + if (value) { /* 0 : does not change current chainLog */ + CLAMPCHECK(value, FL2_CHAINLOG_MIN, FL2_CHAINLOG_MAX); + cctx->params.cParams.second_dict_bits = value; + } + return cctx->params.cParams.second_dict_bits; + + case FL2_p_searchLog: + if ((int)value >= 0) { /* < 0 : does not change current searchLog */ + CLAMPCHECK(value, FL2_SEARCHLOG_MIN, FL2_SEARCHLOG_MAX); + cctx->params.cParams.match_cycles = 1U << value; + } + return value; + + case FL2_p_literalCtxBits: + if ((int)value >= 0) { /* < 0 : does not change current lc */ + CLAMPCHECK(value, FL2_LC_MIN, FL2_LC_MAX); + cctx->params.cParams.lc = value; + } + return cctx->params.cParams.lc; + + case FL2_p_literalPosBits: + if ((int)value >= 0) { /* < 0 : does not change current lp */ + CLAMPCHECK(value, FL2_LP_MIN, FL2_LP_MAX); + cctx->params.cParams.lp = value; + } + return cctx->params.cParams.lp; + + case FL2_p_posBits: + if ((int)value >= 0) { /* < 0 : does not change current pb */ + CLAMPCHECK(value, FL2_PB_MIN, FL2_PB_MAX); + cctx->params.cParams.pb = value; + } + return cctx->params.cParams.pb; + + case FL2_p_searchDepth: + if (value) { /* 0 : does not change current depth */ + CLAMPCHECK(value, FL2_SEARCH_DEPTH_MIN, FL2_SEARCH_DEPTH_MAX); + cctx->params.rParams.depth = value; + } + return cctx->params.rParams.depth; + + case FL2_p_fastLength: + if (value) { /* 0 : does not change current fast_length */ + CLAMPCHECK(value, FL2_FASTLENGTH_MIN, FL2_FASTLENGTH_MAX); + cctx->params.cParams.fast_length = value; + } + return cctx->params.cParams.fast_length; + + case FL2_p_divideAndConquer: + if ((int)value >= 0) { /* < 0 : does not change current divide_and_conquer */ + cctx->params.rParams.divide_and_conquer = value; + } + return cctx->params.rParams.divide_and_conquer; + + case FL2_p_strategy: + if ((int)value >= 0) { /* < 0 : does not change current strategy */ + CLAMPCHECK(value, (unsigned)FL2_fast, (unsigned)FL2_ultra); + cctx->params.cParams.strategy = (FL2_strategy)value; + } + return (size_t)cctx->params.cParams.strategy; + +#ifndef NO_XXHASH + case FL2_p_doXXHash: + if ((int)value >= 0) { /* < 0 : does not change doXXHash */ + cctx->params.doXXH = value != 0; + } + return cctx->params.doXXH; +#endif + + case FL2_p_omitProperties: + if ((int)value >= 0) { /* < 0 : does not change omitProp */ + cctx->params.omitProp = value != 0; + } + return cctx->params.omitProp; +#ifdef RMF_REFERENCE + case FL2_p_useReferenceMF: + if ((int)value >= 0) { /* < 0 : does not change useRefMF */ + cctx->params.rParams.use_ref_mf = value != 0; + } + return cctx->params.rParams.use_ref_mf; +#endif + default: return FL2_ERROR(parameter_unsupported); + } +} + +FL2LIB_API FL2_CStream* FL2LIB_CALL FL2_createCStream(void) +{ + FL2_CCtx* const cctx = FL2_createCCtx(); + FL2_CStream* const fcs = malloc(sizeof(FL2_CStream)); + + DEBUGLOG(3, "FL2_createCStream"); + + if (cctx == NULL || fcs == NULL) { + free(cctx); + free(fcs); + return NULL; + } + fcs->cctx = cctx; + fcs->inBuff.bufSize = 0; + fcs->inBuff.data = NULL; + fcs->inBuff.start = 0; + fcs->inBuff.end = 0; +#ifndef NO_XXHASH + fcs->xxh = NULL; +#endif + fcs->out_thread = 0; + fcs->thread_count = 0; + fcs->out_pos = 0; + fcs->hash_pos = 0; + fcs->end_marked = 0; + fcs->wrote_prop = 0; + return fcs; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_freeCStream(FL2_CStream* fcs) +{ + if (fcs == NULL) + return 0; + + DEBUGLOG(3, "FL2_freeCStream"); + + free(fcs->inBuff.data); +#ifndef NO_XXHASH + XXH32_freeState(fcs->xxh); +#endif + FL2_freeCCtx(fcs->cctx); + free(fcs); + return 0; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_initCStream(FL2_CStream* fcs, int compressionLevel) +{ + DEBUGLOG(4, "FL2_initCStream level %d", compressionLevel); + + fcs->inBuff.start = 0; + fcs->inBuff.end = 0; + fcs->out_thread = 0; + fcs->thread_count = 0; + fcs->out_pos = 0; + fcs->hash_pos = 0; + fcs->end_marked = 0; + fcs->wrote_prop = 0; + + FL2_CCtx_setParameter(fcs->cctx, FL2_p_compressionLevel, compressionLevel); + +#ifndef NO_XXHASH + if (fcs->cctx->params.doXXH && !fcs->cctx->params.omitProp) { + if (fcs->xxh == NULL) { + fcs->xxh = XXH32_createState(); + if (fcs->xxh == NULL) + return FL2_ERROR(memory_allocation); + } + XXH32_reset(fcs->xxh, 0); + } +#endif + + FL2_beginFrame(fcs->cctx); + return 0; +} + +static size_t FL2_compressStream_internal(FL2_CStream* const fcs, + FL2_outBuffer* const output, int const ending) +{ + FL2_CCtx* const cctx = fcs->cctx; + + if (output->pos >= output->size) + return 0; + + if (fcs->out_thread == fcs->thread_count) { + if (fcs->inBuff.start < fcs->inBuff.end) { +#ifndef NO_XXHASH + if (cctx->params.doXXH && !cctx->params.omitProp) { + XXH32_update(fcs->xxh, fcs->inBuff.data + fcs->inBuff.start, fcs->inBuff.end - fcs->inBuff.start); + } +#endif + cctx->curBlock.data = fcs->inBuff.data; + cctx->curBlock.start = fcs->inBuff.start; + cctx->curBlock.end = fcs->inBuff.end; + + fcs->out_thread = 0; + fcs->thread_count = FL2_compressCurBlock(cctx, NULL, NULL); + if (FL2_isError(fcs->thread_count)) + return fcs->thread_count; + + fcs->inBuff.start = fcs->inBuff.end; + } + if (!fcs->wrote_prop && !cctx->params.omitProp) { + size_t dictionary_size = ending ? cctx->dictMax : (size_t)1 << cctx->params.rParams.dictionary_log; + ((BYTE*)output->dst)[output->pos] = FL2_getProp(cctx, dictionary_size); + DEBUGLOG(4, "Writing property byte : 0x%X", ((BYTE*)output->dst)[output->pos]); + ++output->pos; + fcs->wrote_prop = 1; + } + } + for (; fcs->out_thread < fcs->thread_count; ++fcs->out_thread) { + const BYTE* const outBuf = RMF_getTableAsOutputBuffer(cctx->matchTable, cctx->jobs[fcs->out_thread].block.start) + fcs->out_pos; + BYTE* const dstBuf = (BYTE*)output->dst + output->pos; + size_t const dstCapacity = output->size - output->pos; + size_t to_write = cctx->jobs[fcs->out_thread].cSize; + + if (FL2_isError(to_write)) + return to_write; + + to_write = MIN(to_write - fcs->out_pos, dstCapacity); + + DEBUGLOG(5, "CStream : writing %u bytes", (U32)to_write); + + memcpy(dstBuf, outBuf, to_write); + fcs->out_pos += to_write; + output->pos += to_write; + + if (fcs->out_pos < cctx->jobs[fcs->out_thread].cSize) + break; + + fcs->out_pos = 0; + } + return 0; +} + +static size_t FL2_remainingOutputSize(FL2_CStream* const fcs) +{ + FL2_CCtx* const cctx = fcs->cctx; + size_t pos = fcs->out_pos; + size_t total = 0; + + if (FL2_isError(fcs->thread_count)) + return fcs->thread_count; + + for (size_t u = fcs->out_thread; u < fcs->thread_count; ++u) { + size_t to_write = cctx->jobs[u].cSize; + + if (FL2_isError(to_write)) + return to_write; + total += to_write - pos; + pos = 0; + } + return total; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_compressStream(FL2_CStream* fcs, FL2_outBuffer* output, FL2_inBuffer* input) +{ + FL2_blockBuffer* const inBuff = &fcs->inBuff; + FL2_CCtx* const cctx = fcs->cctx; + size_t block_overlap = OVERLAP_FROM_DICT_LOG(cctx->params.rParams.dictionary_log, cctx->params.rParams.overlap_fraction); + + if (FL2_isError(fcs->thread_count)) + return fcs->thread_count; + + if (output->pos < output->size) while (input->pos < input->size) { + /* read input and/or write output until a buffer is full */ + if (inBuff->data == NULL) { + inBuff->bufSize = (size_t)1 << cctx->params.rParams.dictionary_log; + + DEBUGLOG(3, "Allocating input buffer : %u bytes", (U32)inBuff->bufSize); + + inBuff->data = malloc(inBuff->bufSize); + + if (inBuff->data == NULL) + return FL2_ERROR(memory_allocation); + + inBuff->start = 0; + inBuff->end = 0; + } + if (inBuff->start > block_overlap && input->pos < input->size) { + FL2_shiftBlock(fcs->cctx, inBuff); + } + if (fcs->out_thread == fcs->thread_count) { + /* no compressed output to write, so read */ + size_t const toRead = MIN(input->size - input->pos, inBuff->bufSize - inBuff->end); + + DEBUGLOG(5, "CStream : reading %u bytes", (U32)toRead); + + memcpy(inBuff->data + inBuff->end, (char*)input->src + input->pos, toRead); + input->pos += toRead; + inBuff->end += toRead; + } + if (inBuff->end == inBuff->bufSize || fcs->out_thread < fcs->thread_count) { + CHECK_F(FL2_compressStream_internal(fcs, output, 0)); + } + /* compressed output remains, so output buffer is full */ + if (fcs->out_thread < fcs->thread_count) + break; + } + return (inBuff->data == NULL) ? (size_t)1 << cctx->params.rParams.dictionary_log : inBuff->bufSize - inBuff->end; +} + +static size_t FL2_flushStream_internal(FL2_CStream* fcs, FL2_outBuffer* output, int ending) +{ + if (FL2_isError(fcs->thread_count)) + return fcs->thread_count; + + DEBUGLOG(4, "FL2_flushStream_internal : %u to compress, %u to write", + (U32)(fcs->inBuff.end - fcs->inBuff.start), + (U32)FL2_remainingOutputSize(fcs)); + + CHECK_F(FL2_compressStream_internal(fcs, output, ending)); + + return FL2_remainingOutputSize(fcs); +} + +FL2LIB_API size_t FL2LIB_CALL FL2_flushStream(FL2_CStream* fcs, FL2_outBuffer* output) +{ + return FL2_flushStream_internal(fcs, output, 0); +} + +FL2LIB_API size_t FL2LIB_CALL FL2_endStream(FL2_CStream* fcs, FL2_outBuffer* output) +{ + { size_t cSize = FL2_flushStream_internal(fcs, output, 1); + if (cSize != 0) + return cSize; + } + + if(!fcs->end_marked) { + if (output->pos >= output->size) + return 1; + DEBUGLOG(4, "Writing end marker"); + ((BYTE*)output->dst)[output->pos] = LZMA2_END_MARKER; + ++output->pos; + fcs->end_marked = 1; + } + +#ifndef NO_XXHASH + if (fcs->cctx->params.doXXH && !fcs->cctx->params.omitProp && fcs->hash_pos < XXHASH_SIZEOF) { + size_t const to_write = MIN(output->size - output->pos, XXHASH_SIZEOF - fcs->hash_pos); + XXH32_canonical_t canonical; + + if (output->pos >= output->size) + return 1; + + XXH32_canonicalFromHash(&canonical, XXH32_digest(fcs->xxh)); + DEBUGLOG(4, "Writing XXH32 : %u bytes", (U32)to_write); + memcpy((BYTE*)output->dst + output->pos, canonical.digest + fcs->hash_pos, to_write); + output->pos += to_write; + fcs->hash_pos += to_write; + return fcs->hash_pos < XXHASH_SIZEOF; + } +#endif + return 0; +} + +FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cParameter param, unsigned value) +{ + if (fcs->inBuff.start < fcs->inBuff.end) + return FL2_ERROR(stage_wrong); + return FL2_CCtx_setParameter(fcs->cctx, param, value); +} + + +size_t FL2_memoryUsage_internal(unsigned const dictionaryLog, unsigned const bufferLog, unsigned const searchDepth, + unsigned chainLog, FL2_strategy strategy, + unsigned nbThreads) +{ + size_t size = RMF_memoryUsage(dictionaryLog, bufferLog, searchDepth, nbThreads); + return size + FL2_lzma2MemoryUsage(chainLog, strategy, nbThreads); +} + +FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize(int compressionLevel, unsigned nbThreads) +{ + return FL2_memoryUsage_internal(FL2_defaultCParameters[compressionLevel].dictionaryLog, + FL2_defaultCParameters[compressionLevel].bufferLog, + FL2_defaultCParameters[compressionLevel].searchDepth, + FL2_defaultCParameters[compressionLevel].chainLog, + FL2_defaultCParameters[compressionLevel].strategy, + nbThreads); +} + +FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize_usingCCtx(const FL2_CCtx * cctx) +{ + return FL2_memoryUsage_internal(cctx->params.rParams.dictionary_log, + cctx->params.rParams.match_buffer_log, + cctx->params.rParams.depth, + cctx->params.cParams.second_dict_bits, + cctx->params.cParams.strategy, + cctx->jobCount); +} + +FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize(int compressionLevel, unsigned nbThreads) +{ + return FL2_estimateCCtxSize(compressionLevel, nbThreads) + + ((size_t)1 << FL2_defaultCParameters[compressionLevel].dictionaryLog); +} + +FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_usingCCtx(const FL2_CStream* fcs) +{ + return FL2_estimateCCtxSize_usingCCtx(fcs->cctx) + + ((size_t)1 << fcs->cctx->params.rParams.dictionary_log); +} diff --git a/C/fast-lzma2/fl2_compress_internal.h b/C/fast-lzma2/fl2_compress_internal.h new file mode 100644 index 00000000..ae69bd8f --- /dev/null +++ b/C/fast-lzma2/fl2_compress_internal.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018, Conor McCarthy + * All rights reserved. + * Parts based on zstd_compress_internal.h copyright Yann Collet + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef FL2_COMPRESS_H +#define FL2_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "data_block.h" +#include "radix_internal.h" +#include "lzma2_enc.h" +#include "fast-lzma2.h" +#include "fl2threading.h" +#include "fl2pool.h" +#ifndef NO_XXHASH +# include "xxhash.h" +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +typedef struct { + unsigned dictionaryLog; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory, slower */ + unsigned overlapFraction; /* overlap between consecutive blocks in 1/16 units: larger == more compression, slower */ + unsigned chainLog; /* fully searched segment : larger == more compression, slower, more memory; hybrid mode only (ultra) */ + unsigned searchLog; /* nb of searches : larger == more compression, slower; hybrid mode only (ultra) */ + unsigned searchDepth; /* maximum depth for resolving string matches : larger == more compression, slower; >= 64 == more memory, slower */ + unsigned fastLength; /* acceptable match size for parser, not less than searchDepth : larger == more compression, slower; fast bytes parameter from 7-zip */ + unsigned divideAndConquer; /* split long chains of 2-byte matches into shorter chains with a small overlap : faster, somewhat less compression; enabled by default */ + unsigned bufferLog; /* buffer size for processing match chains is (dictionaryLog - bufferLog) : when divideAndConquer enabled, affects compression; */ + /* when divideAndConquer disabled, affects speed in a hardware-dependent manner */ + FL2_strategy strategy; /* encoder strategy : fast, optimized or ultra (hybrid) */ +} FL2_compressionParameters; + +/*-************************************* +* Context memory management +***************************************/ + +typedef struct { + FL2_lzma2Parameters cParams; + RMF_parameters rParams; + unsigned compressionLevel; + BYTE highCompression; +#ifndef NO_XXHASH + BYTE doXXH; +#endif + BYTE omitProp; +} FL2_CCtx_params; + +typedef struct { + FL2_CCtx* cctx; + FL2_lzmaEncoderCtx* enc; + FL2_dataBlock block; + size_t cSize; +} FL2_job; + +struct FL2_CCtx_s { + FL2_CCtx_params params; +#ifndef FL2_SINGLETHREAD + FL2POOL_ctx* factory; +#endif + FL2_dataBlock curBlock; + size_t dictMax; + U64 block_total; + FL2_matchTable* matchTable; + unsigned jobCount; + FL2_job jobs[1]; +}; + +struct FL2_CStream_s { + FL2_CCtx* cctx; + FL2_blockBuffer inBuff; +#ifndef NO_XXHASH + XXH32_state_t *xxh; +#endif + size_t thread_count; + size_t out_thread; + size_t out_pos; + size_t hash_pos; + BYTE end_marked; + BYTE wrote_prop; +}; + +#if defined (__cplusplus) +} +#endif + + +#endif /* FL2_COMPRESS_H */ diff --git a/C/fast-lzma2/fl2_error_private.c b/C/fast-lzma2/fl2_error_private.c new file mode 100644 index 00000000..66289586 --- /dev/null +++ b/C/fast-lzma2/fl2_error_private.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * Modified for FL2 by Conor McCarthy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +#include "fl2_error_private.h" + +const char* ERR_getFL2ErrorString(ERR_enum code) +{ + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(maxCode): + default: return notErrorCode; + } +} diff --git a/C/fast-lzma2/fl2_error_private.h b/C/fast-lzma2/fl2_error_private.h new file mode 100644 index 00000000..32532a9b --- /dev/null +++ b/C/fast-lzma2/fl2_error_private.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * Modified for FL2 by Conor McCarthy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +#include /* size_t */ +#include "fl2_errors.h" /* enum list */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef FL2_ErrorCode ERR_enum; +#define PREFIX(name) FL2_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#define FL2_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > FL2_ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getFL2ErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getFL2ErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ diff --git a/C/fast-lzma2/fl2_errors.h b/C/fast-lzma2/fl2_errors.h new file mode 100644 index 00000000..d669618f --- /dev/null +++ b/C/fast-lzma2/fl2_errors.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * Modified for FL2 by Conor McCarthy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef FL2_ERRORS_H_398273423 +#define FL2_ERRORS_H_398273423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include /* size_t */ + +#include "fast-lzma2.h" + +/*-**************************************** + * error codes list + * note : this API is still considered unstable + * and shall not be used with a dynamic library. + * only static linking is allowed + ******************************************/ +typedef enum { + FL2_error_no_error = 0, + FL2_error_GENERIC = 1, + FL2_error_internal = 2, + FL2_error_corruption_detected = 3, + FL2_error_checksum_wrong = 4, + FL2_error_parameter_unsupported = 5, + FL2_error_parameter_outOfBound = 6, + FL2_error_stage_wrong = 7, + FL2_error_init_missing = 8, + FL2_error_memory_allocation = 9, + FL2_error_dstSize_tooSmall = 10, + FL2_error_srcSize_wrong = 11, + FL2_error_write_failed = 12, + FL2_error_canceled = 13, + FL2_error_maxCode = 20 /* never EVER use this value directly, it can change in future versions! Use FL2_isError() instead */ +} FL2_ErrorCode; + +/*! FL2_getErrorCode() : + convert a `size_t` function result into a `FL2_ErrorCode` enum type, + which can be used to compare with enum list published above */ +FL2LIB_API FL2_ErrorCode FL2LIB_CALL FL2_getErrorCode(size_t functionResult); +FL2LIB_API const char* FL2LIB_CALL FL2_getErrorString(FL2_ErrorCode code); /**< Same as FL2_getErrorName, but using a `FL2_ErrorCode` enum argument */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* FL2_ERRORS_H_398273423 */ diff --git a/C/fast-lzma2/fl2_internal.h b/C/fast-lzma2/fl2_internal.h new file mode 100644 index 00000000..aedda654 --- /dev/null +++ b/C/fast-lzma2/fl2_internal.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * Modified for FL2 by Conor McCarthy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef FL2_INTERNAL_H_ +#define FL2_INTERNAL_H_ + + +/*-************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "compiler.h" +#include "fl2_error_private.h" + + +#if defined (__cplusplus) +extern "C" { +#endif + +#define FL2_PROP_HASH_BIT 7 +#define FL2_LZMA_PROP_MASK 0x3FU +#ifndef NO_XXHASH +# define XXHASH_SIZEOF sizeof(XXH32_canonical_t) +#endif + +/*-************************************* +* Debug +***************************************/ +#if defined(FL2_DEBUG) && (FL2_DEBUG>=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define FL2_STATIC_ASSERT(c) { enum { FL2_static_assert = 1/(int)(!!(c)) }; } + +#if defined(FL2_DEBUG) && (FL2_DEBUG>=2) +# include +extern int g_debuglog_enable; +/* recommended values for FL2_DEBUG display levels : + * 1 : no display, enables assert() only + * 2 : reserved for currently active debugging path + * 3 : events once per object lifetime (CCtx, CDict) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (*very* verbose) */ +# define RAWLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=FL2_DEBUG)) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) & (l<=FL2_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) +#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ +#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return FL2_ERROR(e); } /* check and send Error code */ + +MEM_STATIC U32 ZSTD_highbit32(U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse(&r, val); + return (unsigned)r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; + return r; +# endif + } +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* FL2_INTERNAL_H_ */ diff --git a/C/fast-lzma2/fl2pool.c b/C/fast-lzma2/fl2pool.c new file mode 100644 index 00000000..24d4f9e6 --- /dev/null +++ b/C/fast-lzma2/fl2pool.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * Modified for FL2 by Conor McCarthy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +#include /* size_t */ +#include /* malloc, calloc */ +#include "fl2pool.h" +#include "fl2_internal.h" + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifndef FL2_SINGLETHREAD + +#include "fl2threading.h" /* pthread adaptation */ + +/* A job is a function and an opaque argument */ +typedef struct FL2POOL_job_s { + FL2POOL_function function; + void *opaque; + size_t n; +} FL2POOL_job; + +struct FL2POOL_ctx_s { + /* Keep track of the threads */ + ZSTD_pthread_t *threads; + size_t numThreads; + + /* The queue is a single job */ + FL2POOL_job queue; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* FL2POOL_thread() : + Work thread for the thread pool. + Waits for jobs and executes them. + @returns : NULL on failure else non-null. +*/ +static void* FL2POOL_thread(void* opaque) { + FL2POOL_ctx* const ctx = (FL2POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while (ctx->queueEmpty && !ctx->shutdown) { + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* empty => shutting down: so stop */ + if (ctx->queueEmpty) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + /* Pop a job off the queue */ + { FL2POOL_job const job = ctx->queue; + ctx->queueEmpty = 1; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + + job.function(job.opaque, job.n); + + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } + } /* for (;;) */ + /* Unreachable */ +} + +FL2POOL_ctx* FL2POOL_create(size_t numThreads) { + FL2POOL_ctx* ctx; + /* Check the parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (FL2POOL_ctx*)calloc(1, sizeof(FL2POOL_ctx)); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate empty + * and full queues. + */ + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)malloc(numThreads * sizeof(ZSTD_pthread_t)); + ctx->numThreads = 0; + /* Check for errors */ + if (!ctx->threads) { FL2POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (FL2_pthread_create(&ctx->threads[i], NULL, &FL2POOL_thread, ctx)) { + ctx->numThreads = i; + FL2POOL_free(ctx); + return NULL; + } } + ctx->numThreads = numThreads; + } + return ctx; +} + +/*! FL2POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void FL2POOL_join(FL2POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->numThreads; ++i) { + FL2_pthread_join(ctx->threads[i], NULL); + } } +} + +void FL2POOL_free(FL2POOL_ctx *ctx) { + if (!ctx) { return; } + FL2POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + free(ctx->threads); + free(ctx); +} + +size_t FL2POOL_sizeof(FL2POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->numThreads * sizeof(ZSTD_pthread_t); +} + +void FL2POOL_add(void* ctxVoid, FL2POOL_function function, void *opaque, size_t n) { + FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid; + if (!ctx) + return; + + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + { FL2POOL_job const job = {function, opaque, n}; + + /* Wait until there is space in the queue for the new job */ + while (!ctx->queueEmpty && !ctx->shutdown) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + /* The queue is still going => there is space */ + if (!ctx->shutdown) { + ctx->numThreadsBusy++; + ctx->queueEmpty = 0; + ctx->queue = job; + } + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void FL2POOL_waitAll(void *ctxVoid) +{ + FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid; + if (!ctx) { return; } + + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + while (ctx->numThreadsBusy && !ctx->shutdown) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + +#endif /* FL2_SINGLETHREAD */ diff --git a/C/fast-lzma2/fl2pool.h b/C/fast-lzma2/fl2pool.h new file mode 100644 index 00000000..9c99f3c5 --- /dev/null +++ b/C/fast-lzma2/fl2pool.h @@ -0,0 +1,60 @@ +/* +* Copyright (c) 2016-present, Yann Collet, Facebook, Inc. +* All rights reserved. +* Modified for FL2 by Conor McCarthy +* +* This source code is licensed under both the BSD-style license (found in the +* LICENSE file in the root directory of this source tree) and the GPLv2 (found +* in the COPYING file in the root directory of this source tree). +* You may select, at your option, one of the above-listed licenses. +*/ + +#ifndef FL2POOL_H +#define FL2POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +#include /* size_t */ + +typedef struct FL2POOL_ctx_s FL2POOL_ctx; + +/*! FL2POOL_create() : +* Create a thread pool with at most `numThreads` threads. +* `numThreads` must be at least 1. +* @return : FL2POOL_ctx pointer on success, else NULL. +*/ +FL2POOL_ctx *FL2POOL_create(size_t numThreads); + + +/*! FL2POOL_free() : +Free a thread pool returned by FL2POOL_create(). +*/ +void FL2POOL_free(FL2POOL_ctx *ctx); + +/*! FL2POOL_sizeof() : +return memory usage of pool returned by FL2POOL_create(). +*/ +size_t FL2POOL_sizeof(FL2POOL_ctx *ctx); + +/*! FL2POOL_function : +The function type that can be added to a thread pool. +*/ +typedef void(*FL2POOL_function)(void *, size_t); + +/*! FL2POOL_add() : +Add the job `function(opaque)` to the thread pool. +Possibly blocks until there is room in the queue. +Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. +*/ +void FL2POOL_add(void *ctx, FL2POOL_function function, void *opaque, size_t n); + +void FL2POOL_waitAll(void *ctx); + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/C/fast-lzma2/fl2threading.c b/C/fast-lzma2/fl2threading.c new file mode 100644 index 00000000..3372b109 --- /dev/null +++ b/C/fast-lzma2/fl2threading.c @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +/* create fake symbol to avoid empty translation unit warning */ +int g_ZSTD_threading_useles_symbol; + +#if !defined(FL2_SINGLETHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include +#include +#include "fl2threading.h" + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int FL2_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int FL2_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* FL2_SINGLETHREAD */ diff --git a/C/fast-lzma2/fl2threading.h b/C/fast-lzma2/fl2threading.h new file mode 100644 index 00000000..9f6ff3b1 --- /dev/null +++ b/C/fast-lzma2/fl2threading.h @@ -0,0 +1,120 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#if defined (__cplusplus) +extern "C" { +#endif + +#if !defined(FL2_SINGLETHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#include + + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) /* No delete */ +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* FL2_pthread_create() and FL2_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int FL2_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int FL2_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif !defined(FL2_SINGLETHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* FL2_SINGLETHREAD defined */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)a, 0) +#define ZSTD_pthread_mutex_destroy(a) +#define ZSTD_pthread_mutex_lock(a) +#define ZSTD_pthread_mutex_unlock(a) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)a, 0) +#define ZSTD_pthread_cond_destroy(a) +#define ZSTD_pthread_cond_wait(a, b) +#define ZSTD_pthread_cond_signal(a) +#define ZSTD_pthread_cond_broadcast(a) + +/* do not use ZSTD_pthread_t */ + +#endif /* FL2_SINGLETHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/C/fast-lzma2/lzma2_enc.c b/C/fast-lzma2/lzma2_enc.c new file mode 100644 index 00000000..2aae5058 --- /dev/null +++ b/C/fast-lzma2/lzma2_enc.c @@ -0,0 +1,2047 @@ +/* lzma2_enc.c -- LZMA2 Encoder +Based on LzmaEnc.c and Lzma2Enc.c : Igor Pavlov +Modified for FL2 by Conor McCarthy +Public domain +*/ + +#include +#include + +#include "fl2_internal.h" +#include "mem.h" +#include "lzma2_enc.h" +#include "fl2_compress_internal.h" +#include "radix_mf.h" +#include "range_enc.h" +#include "count.h" + +#define kNumReps 4U +#define kNumStates 12U + +#define kNumLiterals 0x100U +#define kNumLitTables 3U + +#define kNumLenToPosStates 4U +#define kNumPosSlotBits 6U +#define kDicLogSizeMin 18U +#define kDicLogSizeMax 31U +#define kDistTableSizeMax (kDicLogSizeMax * 2U) + +#define kNumAlignBits 4U +#define kAlignTableSize (1U << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1U) +#define kAlignRepriceFrequency kAlignTableSize + +#define kStartPosModelIndex 4U +#define kEndPosModelIndex 14U +#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) + +#define kNumFullDistancesBits (kEndPosModelIndex >> 1U) +#define kNumFullDistances (1U << kNumFullDistancesBits) +#define kDistanceRepriceFrequency (1U << 7U) + +#define kNumPositionBitsMax 4U +#define kNumPositionStatesMax (1U << kNumPositionBitsMax) +#define kNumLiteralContextBitsMax 4U +#define kNumLiteralPosBitsMax 4U +#define kLcLpMax 4U + + +#define kLenNumLowBits 3U +#define kLenNumLowSymbols (1U << kLenNumLowBits) +#define kLenNumMidBits 3U +#define kLenNumMidSymbols (1U << kLenNumMidBits) +#define kLenNumHighBits 8U +#define kLenNumHighSymbols (1U << kLenNumHighBits) + +#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define kMatchLenMin 2U +#define kMatchLenMax (kMatchLenMin + kLenNumSymbolsTotal - 1U) + +#define kOptimizerBufferSize (1U << 12U) +#define kInfinityPrice (1UL << 30U) +#define kNullDist (U32)-1 + +#define kChunkSize ((1UL << 16U) - 8192U) +#define kChunkBufferSize (1UL << 16U) +#define kMaxChunkUncompressedSize ((1UL << 21U) - kMatchLenMax) +#define kChunkHeaderSize 5U +#define kChunkResetShift 5U +#define kChunkUncompressedDictReset 1U +#define kChunkUncompressed 2U +#define kChunkCompressedFlag 0x80U +#define kChunkNothingReset 0U +#define kChunkStateReset (1U << kChunkResetShift) +#define kChunkStatePropertiesReset (2U << kChunkResetShift) +#define kChunkAllReset (3U << kChunkResetShift) + +#define kMaxHashDictBits 14U +#define kHash3Bits 14U +#define kNullLink -1 + +#define kMinTestChunkSize 0x4000U +#define kRandomFilterMarginBits 8U + +static const BYTE kLiteralNextStates[kNumStates] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; +#define LiteralNextState(s) kLiteralNextStates[s] +static const BYTE kMatchNextStates[kNumStates] = { 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 }; +#define MatchNextState(s) kMatchNextStates[s] +static const BYTE kRepNextStates[kNumStates] = { 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 }; +#define RepNextState(s) kRepNextStates[s] +static const BYTE kShortRepNextStates[kNumStates] = { 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 }; +#define ShortRepNextState(s) kShortRepNextStates[s] + +#include "fastpos_table.h" + +typedef struct +{ + size_t table_size; + unsigned prices[kNumPositionStatesMax][kLenNumSymbolsTotal]; + unsigned counters[kNumPositionStatesMax]; + Probability choice; + Probability choice_2; + Probability low[kNumPositionStatesMax << kLenNumLowBits]; + Probability mid[kNumPositionStatesMax << kLenNumMidBits]; + Probability high[kLenNumHighSymbols]; +} LengthStates; + +typedef struct +{ + U32 reps[kNumReps]; + size_t state; + + Probability is_rep[kNumStates]; + Probability is_rep_G0[kNumStates]; + Probability is_rep_G1[kNumStates]; + Probability is_rep_G2[kNumStates]; + Probability is_rep0_long[kNumStates][kNumPositionStatesMax]; + Probability is_match[kNumStates][kNumPositionStatesMax]; + + Probability dist_slot_encoders[kNumLenToPosStates][1 << kNumPosSlotBits]; + Probability dist_align_encoders[1 << kNumAlignBits]; + Probability dist_encoders[kNumFullDistances - kEndPosModelIndex]; + + LengthStates len_states; + LengthStates rep_len_states; + + Probability literal_probs[(kNumLiterals * kNumLitTables) << kLcLpMax]; +} EncoderStates; + +typedef struct +{ + size_t state; + U32 reps[kNumReps]; + U32 price; + unsigned prev_index; + U32 prev_dist; + unsigned prev_index_2; + U32 prev_dist_2; + BYTE is_combination; + BYTE prev_2; + +} OptimalNode; + +#define MakeAsLiteral(node) (node).prev_dist = kNullDist; (node).is_combination = 0; +#define MakeAsShortRep(node) (node).prev_dist = 0; (node).is_combination = 0; + +typedef struct { + S32 table_3[1 << kHash3Bits]; + S32 hash_chain_3[1]; +} HashChains; + +typedef struct +{ + U32 length; + U32 dist; +} Match; + +struct FL2_lzmaEncoderCtx_s +{ + unsigned lc; + unsigned lp; + unsigned pb; + unsigned fast_length; + size_t len_end_max; + size_t lit_pos_mask; + size_t pos_mask; + unsigned match_cycles; + FL2_strategy strategy; + + RangeEncoder rc; + + EncoderStates states; + + unsigned match_price_count; + unsigned align_price_count; + size_t dist_price_table_size; + unsigned align_prices[kAlignTableSize]; + unsigned dist_slot_prices[kNumLenToPosStates][kDistTableSizeMax]; + unsigned distance_prices[kNumLenToPosStates][kNumFullDistances]; + + Match matches[kMatchLenMax-kMatchLenMin]; + size_t match_count; + + OptimalNode opt_buf[kOptimizerBufferSize]; + + BYTE* out_buf; + + HashChains* hash_buf; + ptrdiff_t chain_mask_2; + ptrdiff_t chain_mask_3; + ptrdiff_t hash_dict_3; + ptrdiff_t hash_prev_index; + ptrdiff_t hash_alloc_3; +}; + +FL2_lzmaEncoderCtx* FL2_lzma2Create() +{ + FL2_lzmaEncoderCtx* enc = malloc(sizeof(FL2_lzmaEncoderCtx)); + DEBUGLOG(3, "FL2_lzma2Create"); + if (enc == NULL) + return NULL; + + enc->out_buf = malloc(kChunkBufferSize); + if (enc->out_buf == NULL) { + free(enc); + return NULL; + } + enc->lc = 3; + enc->lp = 0; + enc->pb = 2; + enc->fast_length = 48; + enc->len_end_max = kOptimizerBufferSize - 1; + enc->lit_pos_mask = (1 << enc->lp) - 1; + enc->pos_mask = (1 << enc->pb) - 1; + enc->match_cycles = 1; + enc->strategy = FL2_ultra; + enc->match_price_count = kDistanceRepriceFrequency; + enc->align_price_count = kAlignRepriceFrequency; + enc->dist_price_table_size = kDistTableSizeMax; + enc->hash_buf = NULL; + enc->hash_dict_3 = 0; + enc->chain_mask_3 = 0; + enc->hash_alloc_3 = 0; + return enc; +} + +void FL2_lzma2Free(FL2_lzmaEncoderCtx* enc) +{ + if (enc == NULL) + return; + free(enc->hash_buf); + free(enc->out_buf); + free(enc); +} + +#define GetLiteralProbs(enc, pos, prev_symbol) (enc->states.literal_probs + ((((pos) & enc->lit_pos_mask) << enc->lc) + ((prev_symbol) >> (8 - enc->lc))) * kNumLiterals * kNumLitTables) + +#define GetLenToDistState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) + +#define IsCharState(state) ((state) < 7) + +HINT_INLINE +unsigned GetRepLen1Price(FL2_lzmaEncoderCtx* enc, size_t state, size_t pos_state) +{ + unsigned rep_G0_prob = enc->states.is_rep_G0[state]; + unsigned rep0_long_prob = enc->states.is_rep0_long[state][pos_state]; + return GET_PRICE_0(enc->rc, rep_G0_prob) + GET_PRICE_0(enc->rc, rep0_long_prob); +} + +static unsigned GetRepPrice(FL2_lzmaEncoderCtx* enc, size_t rep_index, size_t state, size_t pos_state) +{ + unsigned price; + unsigned rep_G0_prob = enc->states.is_rep_G0[state]; + if (rep_index == 0) { + unsigned rep0_long_prob = enc->states.is_rep0_long[state][pos_state]; + price = GET_PRICE_0(enc->rc, rep_G0_prob); + price += GET_PRICE_1(enc->rc, rep0_long_prob); + } + else { + unsigned rep_G1_prob = enc->states.is_rep_G1[state]; + price = GET_PRICE_1(enc->rc, rep_G0_prob); + if (rep_index == 1) { + price += GET_PRICE_0(enc->rc, rep_G1_prob); + } + else { + unsigned rep_G2_prob = enc->states.is_rep_G2[state]; + price += GET_PRICE_1(enc->rc, rep_G1_prob); + price += GET_PRICE(enc->enc->rc, rep_G2_prob, (U32)(rep_index) - 2); + } + } + return price; +} + +static unsigned GetRepMatch0Price(FL2_lzmaEncoderCtx* enc, size_t len, size_t state, size_t pos_state) +{ + unsigned rep_G0_prob = enc->states.is_rep_G0[state]; + unsigned rep0_long_prob = enc->states.is_rep0_long[state][pos_state]; + return enc->states.rep_len_states.prices[pos_state][len - kMatchLenMin] + + GET_PRICE_0(enc->rc, rep_G0_prob) + + GET_PRICE_1(enc->rc, rep0_long_prob); +} + +static unsigned GetLiteralPriceMatched(RangeEncoder* rc, const Probability *prob_table, U32 symbol, unsigned match_byte) +{ + unsigned price = 0; + unsigned offs = 0x100; + symbol |= 0x100; + do { + match_byte <<= 1; + price += GET_PRICE(enc->rc, prob_table[offs + (match_byte & offs) + (symbol >> 8)], (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(match_byte ^ symbol); + } while (symbol < 0x10000); + return price; +} + +static void EncodeLiteral(FL2_lzmaEncoderCtx* enc, size_t index, U32 symbol, unsigned prev_symbol) +{ + EncodeBit0(&enc->rc, &enc->states.is_match[enc->states.state][index & enc->pos_mask]); + enc->states.state = LiteralNextState(enc->states.state); + + { Probability* prob_table = GetLiteralProbs(enc, index, prev_symbol); + symbol |= 0x100; + do { + EncodeBit(&enc->rc, prob_table + (symbol >> 8), symbol & (1 << 7)); + symbol <<= 1; + } while (symbol < 0x10000); + } +} + +static void EncodeLiteralMatched(FL2_lzmaEncoderCtx* enc, const BYTE* data_block, size_t index, U32 symbol) +{ + EncodeBit0(&enc->rc, &enc->states.is_match[enc->states.state][index & enc->pos_mask]); + enc->states.state = LiteralNextState(enc->states.state); + + { unsigned match_symbol = data_block[index - enc->states.reps[0] - 1]; + Probability* prob_table = GetLiteralProbs(enc, index, data_block[index - 1]); + unsigned offset = 0x100; + symbol |= 0x100; + do { + match_symbol <<= 1; + size_t prob_index = offset + (match_symbol & offset) + (symbol >> 8); + EncodeBit(&enc->rc, prob_table + prob_index, symbol & (1 << 7)); + symbol <<= 1; + offset &= ~(match_symbol ^ symbol); + } while (symbol < 0x10000); + } +} + +HINT_INLINE +void EncodeLiteralBuf(FL2_lzmaEncoderCtx* enc, const BYTE* data_block, size_t index) +{ + U32 symbol = data_block[index]; + if (IsCharState(enc->states.state)) { + unsigned prev_symbol = data_block[index - 1]; + EncodeLiteral(enc, index, symbol, prev_symbol); + } + else { + EncodeLiteralMatched(enc, data_block, index, symbol); + } +} + +static size_t RMF_bitpackExtendMatch(const BYTE* const data, + const U32* const table, + ptrdiff_t const start_index, + ptrdiff_t limit, + U32 const link, + size_t const length) +{ + ptrdiff_t end_index = start_index + length; + ptrdiff_t dist = start_index - link; + if (limit > start_index + (ptrdiff_t)kMatchLenMax) + limit = start_index + kMatchLenMax; + while (end_index < limit && end_index - (table[end_index] & RADIX_LINK_MASK) == dist) { + end_index += table[end_index] >> RADIX_LINK_BITS; + } + if (end_index >= limit) { + DEBUGLOG(7, "RMF_bitpackExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(limit - start_index)); + return limit - start_index; + } + while (end_index < limit && data[end_index - dist] == data[end_index]) { + ++end_index; + } + DEBUGLOG(7, "RMF_bitpackExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(end_index - start_index)); + return end_index - start_index; +} + +#define GetMatchLink(table, index) ((const RMF_unit*)(table))[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] + +#define GetMatchLength(table, index) ((const RMF_unit*)(table))[(index) >> UNIT_BITS].lengths[(index) & UNIT_MASK] + +static size_t RMF_structuredExtendMatch(const BYTE* const data, + const U32* const table, + ptrdiff_t const start_index, + ptrdiff_t limit, + U32 const link, + size_t const length) +{ + ptrdiff_t end_index = start_index + length; + ptrdiff_t dist = start_index - link; + if (limit > start_index + (ptrdiff_t)kMatchLenMax) + limit = start_index + kMatchLenMax; + while (end_index < limit && end_index - GetMatchLink(table, end_index) == dist) { + end_index += GetMatchLength(table, end_index); + } + if (end_index >= limit) { + DEBUGLOG(7, "RMF_structuredExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(limit - start_index)); + return limit - start_index; + } + while (end_index < limit && data[end_index - dist] == data[end_index]) { + ++end_index; + } + DEBUGLOG(7, "RMF_structuredExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(end_index - start_index)); + return end_index - start_index; +} + +FORCE_INLINE_TEMPLATE +Match FL2_radixGetMatch(FL2_dataBlock block, + FL2_matchTable* tbl, + unsigned max_depth, + int structTbl, + size_t index) +{ + if (structTbl) + { + Match match; + U32 link = GetMatchLink(tbl->table, index); + size_t length; + size_t dist; + match.length = 0; + if (link == RADIX_NULL_LINK) + return match; + length = GetMatchLength(tbl->table, index); + dist = index - link - 1; + if (length > block.end - index) { + match.length = (U32)(block.end - index); + } + else if (length == max_depth + || length == STRUCTURED_MAX_LENGTH /* from HandleRepeat */) + { + match.length = (U32)RMF_structuredExtendMatch(block.data, tbl->table, index, block.end, link, length); + } + else { + match.length = (U32)length; + } + match.dist = (U32)dist; + return match; + } + else { + Match match; + U32 link = tbl->table[index]; + size_t length; + size_t dist; + match.length = 0; + if (link == RADIX_NULL_LINK) + return match; + length = link >> RADIX_LINK_BITS; + link &= RADIX_LINK_MASK; + dist = index - link - 1; + if (length > block.end - index) { + match.length = (U32)(block.end - index); + } + else if (length == max_depth + || length == BITPACK_MAX_LENGTH /* from HandleRepeat */) + { + match.length = (U32)RMF_bitpackExtendMatch(block.data, tbl->table, index, block.end, link, length); + } + else { + match.length = (U32)length; + } + match.dist = (U32)dist; + return match; + } +} + +FORCE_INLINE_TEMPLATE +Match FL2_radixGetNextMatch(FL2_dataBlock block, + FL2_matchTable* tbl, + unsigned max_depth, + int structTbl, + size_t index) +{ + if (structTbl) + { + Match match; + U32 link = GetMatchLink(tbl->table, index); + size_t length; + size_t dist; + match.length = 0; + if (link == RADIX_NULL_LINK) + return match; + length = GetMatchLength(tbl->table, index); + dist = index - link - 1; + if (link - 1 == GetMatchLink(tbl->table, index - 1)) { + /* same as the previous match, one byte shorter */ + return match; + } + if (length > block.end - index) { + match.length = (U32)(block.end - index); + } + else if (length == max_depth + || length == STRUCTURED_MAX_LENGTH /* from HandleRepeat */) + { + match.length = (U32)RMF_structuredExtendMatch(block.data, tbl->table, index, block.end, link, length); + } + else { + match.length = (U32)length; + } + match.dist = (U32)dist; + return match; + } + else { + Match match; + U32 link = tbl->table[index]; + size_t length; + size_t dist; + match.length = 0; + if (link == RADIX_NULL_LINK) + return match; + length = link >> RADIX_LINK_BITS; + link &= RADIX_LINK_MASK; + dist = index - link - 1; + if (link - 1 == (tbl->table[index - 1] & RADIX_LINK_MASK)) { + /* same distance, one byte shorter */ + return match; + } + if (length > block.end - index) { + match.length = (U32)(block.end - index); + } + else if (length == max_depth + || length == BITPACK_MAX_LENGTH /* from HandleRepeat */) + { + match.length = (U32)RMF_bitpackExtendMatch(block.data, tbl->table, index, block.end, link, length); + } + else { + match.length = (U32)length; + } + match.dist = (U32)dist; + return match; + } +} + +static void LengthStates_SetPrices(RangeEncoder* rc, LengthStates* ls, size_t pos_state) +{ + unsigned prob = ls->choice; + unsigned a0 = GET_PRICE_0(rc, prob); + unsigned a1 = GET_PRICE_1(rc, prob); + unsigned b0, b1; + size_t i = 0; + prob = ls->choice_2; + b0 = a1 + GET_PRICE_0(rc, prob); + b1 = a1 + GET_PRICE_1(rc, prob); + for (; i < kLenNumLowSymbols && i < ls->table_size; ++i) { + ls->prices[pos_state][i] = a0 + GetTreePrice(rc, ls->low + (pos_state << kLenNumLowBits), kLenNumLowBits, i); + } + for (; i < kLenNumLowSymbols + kLenNumMidSymbols && i < ls->table_size; ++i) { + ls->prices[pos_state][i] = b0 + GetTreePrice(rc, ls->mid + (pos_state << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols); + } + for (; i < ls->table_size; ++i) { + ls->prices[pos_state][i] = b1 + GetTreePrice(rc, ls->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols); + } + ls->counters[pos_state] = (unsigned)(ls->table_size); +} + +static void EncodeLength(FL2_lzmaEncoderCtx* enc, LengthStates* len_prob_table, unsigned len, size_t pos_state) +{ + len -= kMatchLenMin; + if (len < kLenNumLowSymbols) { + EncodeBit0(&enc->rc, &len_prob_table->choice); + EncodeBitTree(&enc->rc, len_prob_table->low + (pos_state << kLenNumLowBits), kLenNumLowBits, len); + } + else { + EncodeBit1(&enc->rc, &len_prob_table->choice); + if (len < kLenNumLowSymbols + kLenNumMidSymbols) { + EncodeBit0(&enc->rc, &len_prob_table->choice_2); + EncodeBitTree(&enc->rc, len_prob_table->mid + (pos_state << kLenNumMidBits), kLenNumMidBits, len - kLenNumLowSymbols); + } + else { + EncodeBit1(&enc->rc, &len_prob_table->choice_2); + EncodeBitTree(&enc->rc, len_prob_table->high, kLenNumHighBits, len - kLenNumLowSymbols - kLenNumMidSymbols); + } + } + if (enc->strategy != FL2_fast && --len_prob_table->counters[pos_state] == 0) { + LengthStates_SetPrices(&enc->rc, len_prob_table, pos_state); + } +} + +static void EncodeRepMatch(FL2_lzmaEncoderCtx* enc, unsigned len, unsigned rep, size_t pos_state) +{ + DEBUGLOG(7, "EncodeRepMatch : length %u, rep %u", len, rep); + EncodeBit1(&enc->rc, &enc->states.is_match[enc->states.state][pos_state]); + EncodeBit1(&enc->rc, &enc->states.is_rep[enc->states.state]); + if (rep == 0) { + EncodeBit0(&enc->rc, &enc->states.is_rep_G0[enc->states.state]); + EncodeBit(&enc->rc, &enc->states.is_rep0_long[enc->states.state][pos_state], ((len == 1) ? 0 : 1)); + } + else { + U32 distance = enc->states.reps[rep]; + EncodeBit1(&enc->rc, &enc->states.is_rep_G0[enc->states.state]); + if (rep == 1) { + EncodeBit0(&enc->rc, &enc->states.is_rep_G1[enc->states.state]); + } + else { + EncodeBit1(&enc->rc, &enc->states.is_rep_G1[enc->states.state]); + EncodeBit(&enc->rc, &enc->states.is_rep_G2[enc->states.state], rep - 2); + if (rep == 3) { + enc->states.reps[3] = enc->states.reps[2]; + } + enc->states.reps[2] = enc->states.reps[1]; + } + enc->states.reps[1] = enc->states.reps[0]; + enc->states.reps[0] = distance; + } + if (len == 1) { + enc->states.state = ShortRepNextState(enc->states.state); + } + else { + EncodeLength(enc, &enc->states.rep_len_states, len, pos_state); + enc->states.state = RepNextState(enc->states.state); + } +} + +/* *****************************************/ +/* Distance slot functions based on fastpos.h from XZ*/ + +HINT_INLINE +unsigned FastDistShift(unsigned n) +{ + return n * (kFastDistBits - 1); +} + +HINT_INLINE +unsigned FastDistResult(U32 dist, unsigned n) +{ + return distance_table[dist >> FastDistShift(n)] + + 2 * FastDistShift(n); +} + +static size_t GetDistSlot(U32 distance) +{ + U32 limit = 1UL << kFastDistBits; + /* If it is small enough, we can pick the result directly from */ + /* the precalculated table. */ + if (distance < limit) { + return distance_table[distance]; + } + limit <<= FastDistShift(1); + if (distance < limit) { + return FastDistResult(distance, 1); + } + return FastDistResult(distance, 2); +} + +/* **************************************** */ + +static void EncodeNormalMatch(FL2_lzmaEncoderCtx* enc, unsigned len, U32 dist, size_t pos_state) +{ + DEBUGLOG(7, "EncodeNormalMatch : length %u, dist %u", len, dist); + EncodeBit1(&enc->rc, &enc->states.is_match[enc->states.state][pos_state]); + EncodeBit0(&enc->rc, &enc->states.is_rep[enc->states.state]); + enc->states.state = MatchNextState(enc->states.state); + EncodeLength(enc, &enc->states.len_states, len, pos_state); + + { size_t dist_slot = GetDistSlot(dist); + EncodeBitTree(&enc->rc, enc->states.dist_slot_encoders[GetLenToDistState(len)], kNumPosSlotBits, (unsigned)(dist_slot)); + if (dist_slot >= kStartPosModelIndex) { + unsigned footerBits = ((unsigned)(dist_slot >> 1) - 1); + size_t base = ((2 | (dist_slot & 1)) << footerBits); + unsigned posReduced = (unsigned)(dist - base); + if (dist_slot < kEndPosModelIndex) { + EncodeBitTreeReverse(&enc->rc, enc->states.dist_encoders + base - dist_slot - 1, footerBits, posReduced); + } + else { + EncodeDirect(&enc->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + EncodeBitTreeReverse(&enc->rc, enc->states.dist_align_encoders, kNumAlignBits, posReduced & kAlignMask); + ++enc->align_price_count; + } + } + } + enc->states.reps[3] = enc->states.reps[2]; + enc->states.reps[2] = enc->states.reps[1]; + enc->states.reps[1] = enc->states.reps[0]; + enc->states.reps[0] = dist; + ++enc->match_price_count; +} + +#if defined(_MSC_VER) +# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable */ +#endif + +FORCE_INLINE_TEMPLATE +size_t EncodeChunkFast(FL2_lzmaEncoderCtx* enc, + FL2_dataBlock const block, + FL2_matchTable* tbl, + int structTbl, + size_t index, + size_t uncompressed_end) +{ + size_t const pos_mask = enc->pos_mask; + size_t prev = index; + unsigned search_depth = tbl->params.depth; + while (index < uncompressed_end && enc->rc.out_index < enc->rc.chunk_size) + { + size_t max_len; + const BYTE* data; + /* Table of distance restrictions for short matches */ + static const U32 max_dist_table[] = { 0, 0, 0, 1 << 6, 1 << 14 }; + /* Get a match from the table, extended to its full length */ + Match bestMatch = FL2_radixGetMatch(block, tbl, search_depth, structTbl, index); + if (bestMatch.length < kMatchLenMin) { + ++index; + continue; + } + /* Use if near enough */ + if (bestMatch.length >= 5 || bestMatch.dist < max_dist_table[bestMatch.length]) { + bestMatch.dist += kNumReps; + } + else { + bestMatch.length = 0; + } + max_len = MIN(kMatchLenMax, block.end - index); + data = block.data + index; + + { Match bestRep; + Match repMatch; + bestRep.length = 0; + for (repMatch.dist = 0; repMatch.dist < kNumReps; ++repMatch.dist) { + const BYTE *data_2 = data - enc->states.reps[repMatch.dist] - 1; + if (MEM_read16(data) != MEM_read16(data_2)) { + continue; + } + repMatch.length = (U32)(ZSTD_count(data + 2, data_2 + 2, data + max_len) + 2); + if (repMatch.length >= max_len) { + bestMatch = repMatch; + goto _encode; + } + if (repMatch.length > bestRep.length) { + bestRep = repMatch; + } + } + if (bestMatch.length >= max_len) + goto _encode; + if (bestRep.length >= 2) { + int const gain2 = (int)(bestRep.length * 3 - bestRep.dist); + int const gain1 = (int)(bestMatch.length * 3 - ZSTD_highbit32(bestMatch.dist + 1) + 1); + if (gain2 > gain1) { + DEBUGLOG(7, "Replace match (%u, %u) with rep (%u, %u)", bestMatch.length, bestMatch.dist, bestRep.length, bestRep.dist); + bestMatch = bestRep; + } + } + } + + if (bestMatch.length < kMatchLenMin) { + ++index; + continue; + } + + for (size_t next = index + 1; bestMatch.length < kMatchLenMax && next < uncompressed_end; ++next) { + /* lazy matching scheme from ZSTD */ + Match next_match = FL2_radixGetNextMatch(block, tbl, search_depth, structTbl, next); + if (next_match.length >= kMatchLenMin) { + Match bestRep; + Match repMatch; + bestRep.length = 0; + data = block.data + next; + max_len = MIN(kMatchLenMax, block.end - next); + for (repMatch.dist = 0; repMatch.dist < kNumReps; ++repMatch.dist) { + const BYTE *data_2 = data - enc->states.reps[repMatch.dist] - 1; + if (MEM_read16(data) != MEM_read16(data_2)) { + continue; + } + repMatch.length = (U32)(ZSTD_count(data + 2, data_2 + 2, data + max_len) + 2); + if (repMatch.length > bestRep.length) { + bestRep = repMatch; + } + } + if (bestRep.length >= 3) { + int const gain2 = (int)(bestRep.length * 3 - bestRep.dist); + int const gain1 = (int)(bestMatch.length * 3 - ZSTD_highbit32((U32)bestMatch.dist + 1) + 1); + if (gain2 > gain1) { + DEBUGLOG(7, "Replace match (%u, %u) with rep (%u, %u)", bestMatch.length, bestMatch.dist, bestRep.length, bestRep.dist); + bestMatch = bestRep; + index = next; + } + } + if (next_match.length >= 3 && next_match.dist != bestMatch.dist) { + int const gain2 = (int)(next_match.length * 4 - ZSTD_highbit32((U32)next_match.dist + 1)); /* raw approx */ + int const gain1 = (int)(bestMatch.length * 4 - ZSTD_highbit32((U32)bestMatch.dist + 1) + 4); + if (gain2 > gain1) { + DEBUGLOG(7, "Replace match (%u, %u) with match (%u, %u)", bestMatch.length, bestMatch.dist, next_match.length, next_match.dist + kNumReps); + bestMatch = next_match; + bestMatch.dist += kNumReps; + index = next; + continue; + } + } + } + if (next < uncompressed_end - 4) { + Match bestRep; + Match repMatch; + ++next; + next_match = FL2_radixGetNextMatch(block, tbl, search_depth, structTbl, next); + if (next_match.length < 4) + break; + data = block.data + next; + max_len = MIN(kMatchLenMax, block.end - next); + bestRep.length = 0; + for (repMatch.dist = 0; repMatch.dist < kNumReps; ++repMatch.dist) { + const BYTE *data_2 = data - enc->states.reps[repMatch.dist] - 1; + if (MEM_read16(data) != MEM_read16(data_2)) { + continue; + } + repMatch.length = (U32)(ZSTD_count(data + 2, data_2 + 2, data + max_len) + 2); + if (repMatch.length > bestRep.length) { + bestRep = repMatch; + } + } + if (bestRep.length >= 4) { + int const gain2 = (int)(bestRep.length * 4 - (bestRep.dist >> 1)); + int const gain1 = (int)(bestMatch.length * 4 - ZSTD_highbit32((U32)bestMatch.dist + 1) + 1); + if (gain2 > gain1) { + DEBUGLOG(7, "Replace match (%u, %u) with rep (%u, %u)", bestMatch.length, bestMatch.dist, bestRep.length, bestRep.dist); + bestMatch = bestRep; + index = next; + } + } + if (next_match.length >= 4 && next_match.dist != bestMatch.dist) { + int const gain2 = (int)(next_match.length * 4 - ZSTD_highbit32((U32)next_match.dist + 1)); + int const gain1 = (int)(bestMatch.length * 4 - ZSTD_highbit32((U32)bestMatch.dist + 1) + 7); + if (gain2 > gain1) { + DEBUGLOG(7, "Replace match (%u, %u) with match (%u, %u)", bestMatch.length, bestMatch.dist, next_match.length, next_match.dist + kNumReps); + bestMatch = next_match; + bestMatch.dist += kNumReps; + index = next; + continue; + } + } + + } + break; + } +_encode: + assert(index + bestMatch.length <= block.end); + while (prev < index && enc->rc.out_index < enc->rc.chunk_size) { + if (block.data[prev] == block.data[prev - enc->states.reps[0] - 1]) { + EncodeRepMatch(enc, 1, 0, prev & pos_mask); + } + else { + EncodeLiteralBuf(enc, block.data, prev); + } + ++prev; + } + if (enc->rc.out_index >= enc->rc.chunk_size) { + break; + } + if(bestMatch.length >= kMatchLenMin) { + if (bestMatch.dist < kNumReps) { + EncodeRepMatch(enc, bestMatch.length, bestMatch.dist, index & pos_mask); + } + else { + EncodeNormalMatch(enc, bestMatch.length, bestMatch.dist - kNumReps, index & pos_mask); + } + index += bestMatch.length; + prev = index; + } + } + while (prev < index && enc->rc.out_index < enc->rc.chunk_size) { + if (block.data[prev] == block.data[prev - enc->states.reps[0] - 1]) { + EncodeRepMatch(enc, 1, 0, prev & pos_mask); + } + else { + EncodeLiteralBuf(enc, block.data, prev); + } + ++prev; + } + Flush(&enc->rc); + return prev; +} + +/* Reverse the direction of the linked list generated by the optimal parser */ +static void ReverseOptimalChain(OptimalNode* opt_buf, size_t cur) +{ + size_t next_index = opt_buf[cur].prev_index; + U32 next_dist = opt_buf[cur].prev_dist; + do + { + if (opt_buf[cur].is_combination) + { + MakeAsLiteral(opt_buf[next_index]); + opt_buf[next_index].prev_index = (unsigned)(next_index - 1); + if (opt_buf[cur].prev_2) + { + opt_buf[next_index - 1].is_combination = 0; + opt_buf[next_index - 1].prev_index = opt_buf[cur].prev_index_2; + opt_buf[next_index - 1].prev_dist = opt_buf[cur].prev_dist_2; + } + } + + { U32 temp = opt_buf[next_index].prev_dist; + opt_buf[next_index].prev_dist = next_dist; + next_dist = temp; + } + + { size_t prev_index = next_index; + next_index = opt_buf[prev_index].prev_index; + opt_buf[prev_index].prev_index = (unsigned)(cur); + cur = prev_index; + } + } while (cur != 0); +} + +static unsigned GetLiteralPrice(FL2_lzmaEncoderCtx* enc, size_t index, size_t state, unsigned prev_symbol, U32 symbol, unsigned match_byte) +{ + const Probability* prob_table = GetLiteralProbs(enc, index, prev_symbol); + if (IsCharState(state)) { + unsigned price = 0; + symbol |= 0x100; + do { + price += GET_PRICE(enc->rc, prob_table[symbol >> 8], (symbol >> 7) & 1); + symbol <<= 1; + } while (symbol < 0x10000); + return price; + } + return GetLiteralPriceMatched(&enc->rc, prob_table, symbol, match_byte); +} + +static void HashReset(FL2_lzmaEncoderCtx* enc, unsigned dictionary_bits_3) +{ + enc->hash_dict_3 = (ptrdiff_t)1 << dictionary_bits_3; + enc->chain_mask_3 = enc->hash_dict_3 - 1; + memset(enc->hash_buf->table_3, 0xFF, sizeof(enc->hash_buf->table_3)); +} + +static int HashCreate(FL2_lzmaEncoderCtx* enc, unsigned dictionary_bits_3) +{ + DEBUGLOG(3, "Create hash chain : dict bits %u", dictionary_bits_3); + if (enc->hash_buf) { + free(enc->hash_buf); + } + enc->hash_alloc_3 = (ptrdiff_t)1 << dictionary_bits_3; + enc->hash_buf = malloc(sizeof(HashChains) + (enc->hash_alloc_3 - 1) * sizeof(S32)); + if (enc->hash_buf == NULL) + return 1; + HashReset(enc, dictionary_bits_3); + return 0; +} + +/* Create a hash chain for hybrid mode */ +int FL2_lzma2HashAlloc(FL2_lzmaEncoderCtx* enc, const FL2_lzma2Parameters* options) +{ + if (enc->strategy == FL2_ultra && enc->hash_alloc_3 < ((ptrdiff_t)1 << options->second_dict_bits)) { + return HashCreate(enc, options->second_dict_bits); + } + return 0; +} + +#define GET_HASH_3(data) ((((MEM_readLE32(data)) << 8) * 506832829U) >> (32 - kHash3Bits)) + +HINT_INLINE +size_t HashGetMatches(FL2_lzmaEncoderCtx* enc, const FL2_dataBlock block, + ptrdiff_t index, + size_t length_limit, + Match match) +{ + ptrdiff_t const hash_dict_3 = enc->hash_dict_3; + const BYTE* data = block.data; + HashChains* tbl = enc->hash_buf; + ptrdiff_t const chain_mask_3 = enc->chain_mask_3; + size_t max_len; + ptrdiff_t first_3; + + enc->match_count = 0; + enc->hash_prev_index = MAX(enc->hash_prev_index, index - hash_dict_3); + /* Update hash tables and chains for any positions that were skipped */ + while (++enc->hash_prev_index < index) { + size_t hash = GET_HASH_3(data + enc->hash_prev_index); + tbl->hash_chain_3[enc->hash_prev_index & chain_mask_3] = tbl->table_3[hash]; + tbl->table_3[hash] = (S32)enc->hash_prev_index; + } + data += index; + max_len = 2; + + { size_t hash = GET_HASH_3(data); + first_3 = tbl->table_3[hash]; + tbl->table_3[hash] = (S32)(index); + } + if (first_3 >= 0) { + int cycles = enc->match_cycles; + ptrdiff_t end_index = index - (((ptrdiff_t)match.dist < hash_dict_3) ? match.dist : hash_dict_3); + ptrdiff_t match_3 = first_3; + if (match_3 >= end_index) { + do { + --cycles; + const BYTE* data_2 = block.data + match_3; + size_t len_test = ZSTD_count(data + 1, data_2 + 1, data + length_limit) + 1; + if (len_test > max_len) { + enc->matches[enc->match_count].length = (U32)len_test; + enc->matches[enc->match_count].dist = (U32)(index - match_3 - 1); + ++enc->match_count; + max_len = len_test; + if (len_test >= length_limit) { + break; + } + } + if (cycles <= 0) + break; + match_3 = tbl->hash_chain_3[match_3 & chain_mask_3]; + } while (match_3 >= end_index); + } + } + tbl->hash_chain_3[index & chain_mask_3] = (S32)first_3; + if ((unsigned)(max_len) < match.length) { + enc->matches[enc->match_count] = match; + ++enc->match_count; + return match.length; + } + return max_len; +} + +#if defined(_MSC_VER) +# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable */ +#endif + +/* The speed of this function is critical and the sections have so many variables +* in common that breaking it up would be inefficient. +* For each position cur, starting at 1, check some or all possible +* encoding choices - a literal, 1-byte rep 0 match, all rep match lengths, and +* all match lengths at available distances. It also checks the combined +* sequences literal+rep0, rep+rep0 and match+rep0. +* If is_hybrid != 0, this method works in hybrid mode, using the +* hash chain to find shorter matches at near distances. */ +FORCE_INLINE_TEMPLATE +size_t OptimalParse(FL2_lzmaEncoderCtx* const enc, const FL2_dataBlock block, + Match match, + size_t const index, + size_t const cur, + size_t len_end, + int const is_hybrid, + U32* const reps) +{ + OptimalNode* cur_opt = &enc->opt_buf[cur]; + size_t prev_index = cur_opt->prev_index; + size_t state = enc->opt_buf[prev_index].state; + size_t const pos_mask = enc->pos_mask; + size_t pos_state = (index & pos_mask); + const BYTE* data = block.data + index; + size_t const fast_length = enc->fast_length; + size_t bytes_avail; + size_t max_length; + size_t start_len; + U32 match_price; + U32 rep_match_price; + Probability is_rep_prob; + + if (cur_opt->is_combination) { + --prev_index; + if (cur_opt->prev_2) { + state = enc->opt_buf[cur_opt->prev_index_2].state; + if (cur_opt->prev_dist_2 < kNumReps) { + state = RepNextState(state); + } + else { + state = MatchNextState(state); + } + } + else { + state = enc->opt_buf[prev_index].state; + } + state = LiteralNextState(state); + } + if (prev_index == cur - 1) { + if (cur_opt->prev_dist == 0) { + state = ShortRepNextState(state); + } + else { + state = LiteralNextState(state); + } + } + else { + size_t dist; + if (cur_opt->is_combination && cur_opt->prev_2) { + prev_index = cur_opt->prev_index_2; + dist = cur_opt->prev_dist_2; + state = RepNextState(state); + } + else { + dist = cur_opt->prev_dist; + if (dist < kNumReps) { + state = RepNextState(state); + } + else { + state = MatchNextState(state); + } + } + const OptimalNode* prev_opt = &enc->opt_buf[prev_index]; + if (dist < kNumReps) { + size_t i = 1; + reps[0] = prev_opt->reps[dist]; + for (; i <= dist; ++i) { + reps[i] = prev_opt->reps[i - 1]; + } + for (; i < kNumReps; ++i) { + reps[i] = prev_opt->reps[i]; + } + } + else { + reps[0] = (U32)(dist - kNumReps); + for (size_t i = 1; i < kNumReps; ++i) { + reps[i] = prev_opt->reps[i - 1]; + } + } + } + cur_opt->state = state; + memcpy(cur_opt->reps, reps, sizeof(cur_opt->reps)); + is_rep_prob = enc->states.is_rep[state]; + + { Probability is_match_prob = enc->states.is_match[state][pos_state]; + unsigned cur_byte = *data; + unsigned match_byte = *(data - reps[0] - 1); + U32 cur_price = cur_opt->price; + U32 cur_and_lit_price = cur_price + GET_PRICE_0(rc, is_match_prob) + + GetLiteralPrice(enc, index, state, data[-1], cur_byte, match_byte); + OptimalNode* next_opt = &enc->opt_buf[cur + 1]; + BYTE next_is_char = 0; + /* Try literal */ + if (cur_and_lit_price < next_opt->price) { + next_opt->price = cur_and_lit_price; + next_opt->prev_index = (unsigned)cur; + MakeAsLiteral(*next_opt); + next_is_char = 1; + } + match_price = cur_price + GET_PRICE_1(rc, is_match_prob); + rep_match_price = match_price + GET_PRICE_1(rc, is_rep_prob); + if (match_byte == cur_byte) { + /* Try 1-byte rep0 */ + U32 short_rep_price = rep_match_price + GetRepLen1Price(enc, state, pos_state); + if (short_rep_price <= next_opt->price) { + next_opt->price = short_rep_price; + next_opt->prev_index = (unsigned)cur; + MakeAsShortRep(*next_opt); + next_is_char = 1; + } + } + bytes_avail = MIN(block.end - index, kOptimizerBufferSize - 1 - cur); + if (bytes_avail < 2) + return len_end; + if (!next_is_char && match_byte != cur_byte) { + /* Try literal + rep0 */ + const BYTE *data_2 = data - reps[0]; + size_t limit = MIN(bytes_avail - 1, fast_length); + size_t len_test_2 = ZSTD_count(data + 1, data_2, data + 1 + limit); + if (len_test_2 >= 2) { + size_t state_2 = LiteralNextState(state); + size_t pos_state_next = (index + 1) & pos_mask; + U32 next_rep_match_price = cur_and_lit_price + + GET_PRICE_1(rc, enc->states.is_match[state_2][pos_state_next]) + + GET_PRICE_1(rc, enc->states.is_rep[state_2]); + size_t offset = cur + 1 + len_test_2; + U32 cur_and_len_price = next_rep_match_price + GetRepMatch0Price(enc, len_test_2, state_2, pos_state_next); + if (cur_and_len_price < enc->opt_buf[offset].price) { + len_end = MAX(len_end, offset); + enc->opt_buf[offset].price = cur_and_len_price; + enc->opt_buf[offset].prev_index = (unsigned)(cur + 1); + enc->opt_buf[offset].prev_dist = 0; + enc->opt_buf[offset].is_combination = 1; + enc->opt_buf[offset].prev_2 = 0; + } + } + } + } + + max_length = MIN(bytes_avail, fast_length); + start_len = 2; + if (match.length > 0) { + size_t len_test; + size_t len; + U32 cur_rep_price; + for (size_t rep_index = 0; rep_index < kNumReps; ++rep_index) { + const BYTE *data_2 = data - reps[rep_index] - 1; + if (MEM_read16(data) != MEM_read16(data_2)) + continue; + len_test = ZSTD_count(data + 2, data_2 + 2, data + max_length) + 2; + len_end = MAX(len_end, cur + len_test); + cur_rep_price = rep_match_price + GetRepPrice(enc, rep_index, state, pos_state); + len = 2; + /* Try rep match */ + do { + U32 cur_and_len_price = cur_rep_price + enc->states.rep_len_states.prices[pos_state][len - kMatchLenMin]; + OptimalNode* opt = &enc->opt_buf[cur + len]; + if (cur_and_len_price < opt->price) { + opt->price = cur_and_len_price; + opt->prev_index = (unsigned)cur; + opt->prev_dist = (U32)(rep_index); + opt->is_combination = 0; + } + } while (++len <= len_test); + + if (rep_index == 0) { + /* Save time by exluding normal matches not longer than the rep */ + start_len = len_test + 1; + } + if (is_hybrid && len_test + 3 <= bytes_avail && MEM_read16(data + len_test + 1) == MEM_read16(data_2 + len_test + 1)) { + /* Try rep + literal + rep0 */ + size_t len_test_2 = ZSTD_count(data + len_test + 3, + data_2 + len_test + 3, + data + MIN(len_test + 1 + fast_length, bytes_avail)) + 2; + size_t state_2 = RepNextState(state); + size_t pos_state_next = (index + len_test) & pos_mask; + U32 rep_lit_rep_total_price = + cur_rep_price + enc->states.rep_len_states.prices[pos_state][len_test - kMatchLenMin] + + GET_PRICE_0(rc, enc->states.is_match[state_2][pos_state_next]) + + GetLiteralPriceMatched(&enc->rc, GetLiteralProbs(enc, index + len_test, data[len_test - 1]), + data[len_test], data_2[len_test]); + size_t offset; + + state_2 = LiteralNextState(state_2); + pos_state_next = (index + len_test + 1) & pos_mask; + rep_lit_rep_total_price += + GET_PRICE_1(rc, enc->states.is_match[state_2][pos_state_next]) + + GET_PRICE_1(rc, enc->states.is_rep[state_2]); + offset = cur + len_test + 1 + len_test_2; + rep_lit_rep_total_price += GetRepMatch0Price(enc, len_test_2, state_2, pos_state_next); + if (rep_lit_rep_total_price < enc->opt_buf[offset].price) { + len_end = MAX(len_end, offset); + enc->opt_buf[offset].price = rep_lit_rep_total_price; + enc->opt_buf[offset].prev_index = (unsigned)(cur + len_test + 1); + enc->opt_buf[offset].prev_dist = 0; + enc->opt_buf[offset].is_combination = 1; + enc->opt_buf[offset].prev_2 = 1; + enc->opt_buf[offset].prev_index_2 = (unsigned)cur; + enc->opt_buf[offset].prev_dist_2 = (U32)(rep_index); + } + } + } + } + if (match.length >= start_len && max_length >= start_len) { + /* Try normal match */ + U32 normal_match_price = match_price + GET_PRICE_0(rc, is_rep_prob); + if (!is_hybrid) { + /* Normal mode - single match */ + size_t length = MIN(match.length, max_length); + size_t cur_dist = match.dist; + size_t dist_slot = GetDistSlot(match.dist); + size_t len_test = length; + len_end = MAX(len_end, cur + length); + /* Pre-load rep0 data bytes */ +/* unsigned rep_0_bytes = MEM_read16(data - cur_dist + length); */ + for (; len_test >= start_len; --len_test) { + OptimalNode *opt; + U32 cur_and_len_price = normal_match_price + enc->states.len_states.prices[pos_state][len_test - kMatchLenMin]; + size_t len_to_dist_state = GetLenToDistState(len_test); + + if (cur_dist < kNumFullDistances) { + cur_and_len_price += enc->distance_prices[len_to_dist_state][cur_dist]; + } + else { + cur_and_len_price += enc->dist_slot_prices[len_to_dist_state][dist_slot] + enc->align_prices[cur_dist & kAlignMask]; + } + opt = &enc->opt_buf[cur + len_test]; + if (cur_and_len_price < opt->price) { + opt->price = cur_and_len_price; + opt->prev_index = (unsigned)cur; + opt->prev_dist = (U32)(cur_dist + kNumReps); + opt->is_combination = 0; + } + else break; + } + } + else { + /* Hybrid mode */ + size_t main_len; + ptrdiff_t match_index; + ptrdiff_t start_match; + + match.length = MIN(match.length, (U32)max_length); + if (match.length < 3 || match.dist < 256) { + enc->matches[0] = match; + enc->match_count = 1; + main_len = match.length; + } + else { + main_len = HashGetMatches(enc, block, index, max_length, match); + } + match_index = enc->match_count - 1; + if (main_len == max_length + && match_index > 0 + && enc->matches[match_index - 1].length == main_len) + { + --match_index; + } + len_end = MAX(len_end, cur + main_len); + start_match = 0; + while (start_len > enc->matches[start_match].length) { + ++start_match; + } + for (; match_index >= start_match; --match_index) { + size_t len_test = enc->matches[match_index].length; + size_t cur_dist = enc->matches[match_index].dist; + size_t dist_slot = GetDistSlot((U32)cur_dist); + U32 cur_and_len_price; + size_t base_len = (match_index > start_match) ? enc->matches[match_index - 1].length + 1 : start_len; + unsigned rep_0_bytes = MEM_read16(data - cur_dist + len_test); + for (; len_test >= base_len; --len_test) { + size_t len_to_dist_state; + OptimalNode *opt; + + cur_and_len_price = normal_match_price + enc->states.len_states.prices[pos_state][len_test - kMatchLenMin]; + len_to_dist_state = GetLenToDistState(len_test); + if (cur_dist < kNumFullDistances) { + cur_and_len_price += enc->distance_prices[len_to_dist_state][cur_dist]; + } + else { + cur_and_len_price += enc->dist_slot_prices[len_to_dist_state][dist_slot] + enc->align_prices[cur_dist & kAlignMask]; + } + opt = &enc->opt_buf[cur + len_test]; + if (cur_and_len_price < opt->price) { + opt->price = cur_and_len_price; + opt->prev_index = (unsigned)cur; + opt->prev_dist = (U32)(cur_dist + kNumReps); + opt->is_combination = 0; + } + else if(len_test < main_len) + break; + if (len_test == enc->matches[match_index].length) { + size_t rep_0_pos = len_test + 1; + if (rep_0_pos + 2 <= bytes_avail && rep_0_bytes == MEM_read16(data + rep_0_pos)) { + /* Try match + literal + rep0 */ + const BYTE *data_2 = data - cur_dist - 1; + size_t limit = MIN(rep_0_pos + fast_length, bytes_avail); + size_t len_test_2 = ZSTD_count(data + rep_0_pos + 2, data_2 + rep_0_pos + 2, data + limit) + 2; + size_t state_2 = MatchNextState(state); + size_t pos_state_next = (index + len_test) & pos_mask; + U32 match_lit_rep_total_price = cur_and_len_price + + GET_PRICE_0(rc, enc->states.is_match[state_2][pos_state_next]) + + GetLiteralPriceMatched(&enc->rc, GetLiteralProbs(enc, index + len_test, data[len_test - 1]), + data[len_test], data_2[len_test]); + size_t offset; + + state_2 = LiteralNextState(state_2); + pos_state_next = (pos_state_next + 1) & pos_mask; + match_lit_rep_total_price += + GET_PRICE_1(rc, enc->states.is_match[state_2][pos_state_next]) + + GET_PRICE_1(rc, enc->states.is_rep[state_2]); + offset = cur + rep_0_pos + len_test_2; + match_lit_rep_total_price += GetRepMatch0Price(enc, len_test_2, state_2, pos_state_next); + if (match_lit_rep_total_price < enc->opt_buf[offset].price) { + len_end = MAX(len_end, offset); + enc->opt_buf[offset].price = match_lit_rep_total_price; + enc->opt_buf[offset].prev_index = (unsigned)(cur + rep_0_pos); + enc->opt_buf[offset].prev_dist = 0; + enc->opt_buf[offset].is_combination = 1; + enc->opt_buf[offset].prev_2 = 1; + enc->opt_buf[offset].prev_index_2 = (unsigned)cur; + enc->opt_buf[offset].prev_dist_2 = (U32)(cur_dist + kNumReps); + } + } + } + } + } + } + } + return len_end; +} + +HINT_INLINE +void InitMatchesPos0(FL2_lzmaEncoderCtx* enc, const FL2_dataBlock block, + Match match, + size_t pos_state, + size_t len, + unsigned normal_match_price) +{ + if ((unsigned)len <= match.length) { + size_t distance = match.dist; + size_t slot = GetDistSlot(match.dist); + /* Test every available length of the match */ + do + { + unsigned cur_and_len_price = normal_match_price + enc->states.len_states.prices[pos_state][len - kMatchLenMin]; + size_t len_to_dist_state = GetLenToDistState(len); + if (distance < kNumFullDistances) { + cur_and_len_price += enc->distance_prices[len_to_dist_state][distance]; + } + else { + cur_and_len_price += enc->align_prices[distance & kAlignMask] + enc->dist_slot_prices[len_to_dist_state][slot]; + } + if (cur_and_len_price < enc->opt_buf[len].price) { + enc->opt_buf[len].price = cur_and_len_price; + enc->opt_buf[len].prev_index = 0; + enc->opt_buf[len].prev_dist = (U32)(distance + kNumReps); + enc->opt_buf[len].is_combination = 0; + } + ++len; + } while ((unsigned)len <= match.length); + } +} + +static size_t InitMatchesPos0Best(FL2_lzmaEncoderCtx* enc, const FL2_dataBlock block, + Match match, + size_t index, + size_t len, + unsigned normal_match_price) +{ + if (len <= match.length) { + size_t main_len; + size_t match_index; + size_t pos_state; + size_t distance; + size_t slot; + + if (match.length < 3 || match.dist < 256) { + enc->matches[0] = match; + enc->match_count = 1; + main_len = match.length; + } + else { + main_len = HashGetMatches(enc, block, index, MIN(block.end - index, enc->fast_length), match); + } + match_index = 0; + while (len > enc->matches[match_index].length) { + ++match_index; + } + pos_state = index & enc->pos_mask; + distance = enc->matches[match_index].dist; + slot = GetDistSlot(enc->matches[match_index].dist); + /* Test every available match length at the shortest distance. The buffer is sorted */ + /* in order of increasing length, and therefore increasing distance too. */ + for (;; ++len) { + unsigned cur_and_len_price = normal_match_price + + enc->states.len_states.prices[pos_state][len - kMatchLenMin]; + size_t len_to_dist_state = GetLenToDistState(len); + if (distance < kNumFullDistances) { + cur_and_len_price += enc->distance_prices[len_to_dist_state][distance]; + } + else { + cur_and_len_price += enc->align_prices[distance & kAlignMask] + enc->dist_slot_prices[len_to_dist_state][slot]; + } + if (cur_and_len_price < enc->opt_buf[len].price) { + enc->opt_buf[len].price = cur_and_len_price; + enc->opt_buf[len].prev_index = 0; + enc->opt_buf[len].prev_dist = (U32)(distance + kNumReps); + enc->opt_buf[len].is_combination = 0; + } + if (len == enc->matches[match_index].length) { + /* Run out of length for this match. Get the next if any. */ + if (len == main_len) { + break; + } + ++match_index; + distance = enc->matches[match_index].dist; + slot = GetDistSlot(enc->matches[match_index].dist); + } + } + return main_len; + } + return 0; +} + +/* Test all available options at position 0 of the optimizer buffer. +* The prices at this point are all initialized to kInfinityPrice. +* This function must not be called at a position where no match is +* available. */ +FORCE_INLINE_TEMPLATE +size_t InitOptimizerPos0(FL2_lzmaEncoderCtx* enc, const FL2_dataBlock block, + Match match, + size_t index, + int const is_hybrid, + U32* reps) +{ + size_t max_length = MIN(block.end - index, kMatchLenMax); + const BYTE *data = block.data + index; + const BYTE *data_2; + size_t rep_max_index = 0; + size_t rep_lens[kNumReps]; + + /* Find any rep matches */ + for (size_t i = 0; i < kNumReps; ++i) { + reps[i] = enc->states.reps[i]; + data_2 = data - reps[i] - 1; + if (MEM_read16(data) != MEM_read16(data_2)) { + rep_lens[i] = 0; + continue; + } + rep_lens[i] = ZSTD_count(data + 2, data_2 + 2, data + max_length) + 2; + if (rep_lens[i] > rep_lens[rep_max_index]) { + rep_max_index = i; + } + } + if (rep_lens[rep_max_index] >= enc->fast_length) { + enc->opt_buf[0].prev_index = (unsigned)(rep_lens[rep_max_index]); + enc->opt_buf[0].prev_dist = (U32)(rep_max_index); + return 0; + } + if (match.length >= enc->fast_length) { + enc->opt_buf[0].prev_index = match.length; + enc->opt_buf[0].prev_dist = match.dist + kNumReps; + return 0; + } + + { unsigned cur_byte = *data; + unsigned match_byte = *(data - reps[0] - 1); + unsigned match_price; + unsigned normal_match_price; + unsigned rep_match_price; + size_t len; + size_t state = enc->states.state; + size_t pos_state = index & enc->pos_mask; + Probability is_match_prob = enc->states.is_match[state][pos_state]; + Probability is_rep_prob = enc->states.is_rep[state]; + + enc->opt_buf[0].state = state; + /* Set the price for literal */ + enc->opt_buf[1].price = GET_PRICE_0(rc, is_match_prob) + + GetLiteralPrice(enc, index, state, data[-1], cur_byte, match_byte); + MakeAsLiteral(enc->opt_buf[1]); + + match_price = GET_PRICE_1(rc, is_match_prob); + rep_match_price = match_price + GET_PRICE_1(rc, is_rep_prob); + if (match_byte == cur_byte) { + /* Try 1-byte rep0 */ + unsigned short_rep_price = rep_match_price + GetRepLen1Price(enc, state, pos_state); + if (short_rep_price < enc->opt_buf[1].price) { + enc->opt_buf[1].price = short_rep_price; + MakeAsShortRep(enc->opt_buf[1]); + } + } + memcpy(enc->opt_buf[0].reps, reps, sizeof(enc->opt_buf[0].reps)); + enc->opt_buf[1].prev_index = 0; + /* Test the rep match prices */ + for (size_t i = 0; i < kNumReps; ++i) { + unsigned price; + size_t rep_len = rep_lens[i]; + if (rep_len < 2) { + continue; + } + price = rep_match_price + GetRepPrice(enc, i, state, pos_state); + /* Test every available length of the rep */ + do { + unsigned cur_and_len_price = price + enc->states.rep_len_states.prices[pos_state][rep_len - kMatchLenMin]; + if (cur_and_len_price < enc->opt_buf[rep_len].price) { + enc->opt_buf[rep_len].price = cur_and_len_price; + enc->opt_buf[rep_len].prev_index = 0; + enc->opt_buf[rep_len].prev_dist = (U32)(i); + enc->opt_buf[rep_len].is_combination = 0; + } + } while (--rep_len >= kMatchLenMin); + } + normal_match_price = match_price + GET_PRICE_0(rc, is_rep_prob); + len = (rep_lens[0] >= 2) ? rep_lens[0] + 1 : 2; + /* Test the match prices */ + if (!is_hybrid) { + /* Normal mode */ + InitMatchesPos0(enc, block, match, pos_state, len, normal_match_price); + return MAX(match.length, rep_lens[rep_max_index]); + } + else { + /* Hybrid mode */ + size_t main_len = InitMatchesPos0Best(enc, block, match, index, len, normal_match_price); + return MAX(main_len, rep_lens[rep_max_index]); + } + } +} + +FORCE_INLINE_TEMPLATE +size_t EncodeOptimumSequence(FL2_lzmaEncoderCtx* enc, const FL2_dataBlock block, + FL2_matchTable* tbl, + int const structTbl, + int const is_hybrid, + size_t start_index, + size_t uncompressed_end, + Match match) +{ + size_t len_end = enc->len_end_max; + unsigned search_depth = tbl->params.depth; + do { + U32 reps[kNumReps]; + size_t index; + size_t cur; + unsigned prev_index; + size_t i; + size_t const pos_mask = enc->pos_mask; + for (; (len_end & 3) != 0; --len_end) { + enc->opt_buf[len_end].price = kInfinityPrice; + } + for (; len_end >= 4; len_end -= 4) { + enc->opt_buf[len_end].price = kInfinityPrice; + enc->opt_buf[len_end - 1].price = kInfinityPrice; + enc->opt_buf[len_end - 2].price = kInfinityPrice; + enc->opt_buf[len_end - 3].price = kInfinityPrice; + } + index = start_index; + /* Set everything up at position 0 */ + len_end = InitOptimizerPos0(enc, block, match, index, is_hybrid, reps); + match.length = 0; + cur = 1; + /* len_end == 0 if a match of fast_length was found */ + if (len_end > 0) { + ++index; + /* Lazy termination of the optimal parser. In the second half of the buffer */ + /* a resolution within one byte is enough */ + for (; cur < (len_end - cur / (kOptimizerBufferSize / 2U)); ++cur, ++index) { + if (enc->opt_buf[cur + 1].price < enc->opt_buf[cur].price) + continue; + match = FL2_radixGetMatch(block, tbl, search_depth, structTbl, index); + if (match.length >= enc->fast_length) { + break; + } + len_end = OptimalParse(enc, block, match, index, cur, len_end, is_hybrid, reps); + } + if (cur < len_end && match.length < enc->fast_length) { + /* Adjust the end point base on scaling up the price. */ + cur += (enc->opt_buf[cur].price + enc->opt_buf[cur].price / cur) >= enc->opt_buf[cur + 1].price; + } + DEBUGLOG(6, "End optimal parse at %u", (U32)cur); + ReverseOptimalChain(enc->opt_buf, cur); + } + /* Encode the selections in the buffer */ + prev_index = 0; + i = 0; + do { + unsigned len = enc->opt_buf[i].prev_index - prev_index; + prev_index = enc->opt_buf[i].prev_index; + if (len == 1 && enc->opt_buf[i].prev_dist == kNullDist) + { + EncodeLiteralBuf(enc, block.data, start_index + i); + } + else { + size_t match_index = start_index + i; + U32 dist = enc->opt_buf[i].prev_dist; + /* The last match will be truncated to fit in the optimal buffer so get the full length */ + if (i + len >= kOptimizerBufferSize - 1 && dist >= kNumReps) { + Match lastmatch = FL2_radixGetMatch(block, tbl, search_depth, tbl->isStruct, match_index); + if (lastmatch.length > len) { + len = lastmatch.length; + dist = lastmatch.dist + kNumReps; + } + } + if (dist < kNumReps) { + EncodeRepMatch(enc, len, dist, match_index & pos_mask); + } + else { + EncodeNormalMatch(enc, len, dist - kNumReps, match_index & pos_mask); + } + } + i += len; + } while (i < cur); + start_index += i; + /* Do another round if there is a long match pending, because the reps must be checked */ + /* and the match encoded. */ + } while (match.length >= enc->fast_length && start_index < uncompressed_end && enc->rc.out_index < enc->rc.chunk_size); + enc->len_end_max = len_end; + return start_index; +} + +static void UpdateLengthPrices(FL2_lzmaEncoderCtx* enc, LengthStates* len_states) +{ + for (size_t pos_state = 0; pos_state <= enc->pos_mask; ++pos_state) { + LengthStates_SetPrices(&enc->rc, len_states, pos_state); + } +} + +static void FillAlignPrices(FL2_lzmaEncoderCtx* enc) +{ + for (size_t i = 0; i < kAlignTableSize; ++i) { + enc->align_prices[i] = GetReverseTreePrice(&enc->rc, enc->states.dist_align_encoders, kNumAlignBits, i); + } + enc->align_price_count = 0; +} + +static void FillDistancesPrices(FL2_lzmaEncoderCtx* enc) +{ + static const size_t kLastLenToPosState = kNumLenToPosStates - 1; + for (size_t i = kStartPosModelIndex; i < kNumFullDistances; ++i) { + size_t dist_slot = distance_table[i]; + unsigned footerBits = (unsigned)((dist_slot >> 1) - 1); + size_t base = ((2 | (dist_slot & 1)) << footerBits); + enc->distance_prices[kLastLenToPosState][i] = GetReverseTreePrice(&enc->rc, enc->states.dist_encoders + base - dist_slot - 1, + footerBits, + i - base); + } + for (size_t lenToPosState = 0; lenToPosState < kNumLenToPosStates; ++lenToPosState) { + const Probability* encoder = enc->states.dist_slot_encoders[lenToPosState]; + for (size_t dist_slot = 0; dist_slot < enc->dist_price_table_size; ++dist_slot) { + enc->dist_slot_prices[lenToPosState][dist_slot] = GetTreePrice(&enc->rc, encoder, kNumPosSlotBits, dist_slot); + } + for (size_t dist_slot = kEndPosModelIndex; dist_slot < enc->dist_price_table_size; ++dist_slot) { + enc->dist_slot_prices[lenToPosState][dist_slot] += (((unsigned)(dist_slot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits; + } + size_t i = 0; + for (; i < kStartPosModelIndex; ++i) { + enc->distance_prices[lenToPosState][i] = enc->dist_slot_prices[lenToPosState][i]; + } + for (; i < kNumFullDistances; ++i) { + enc->distance_prices[lenToPosState][i] = enc->dist_slot_prices[lenToPosState][distance_table[i]] + + enc->distance_prices[kLastLenToPosState][i]; + } + } + enc->match_price_count = 0; +} + +FORCE_INLINE_TEMPLATE +size_t EncodeChunkBest(FL2_lzmaEncoderCtx* enc, + FL2_dataBlock const block, + FL2_matchTable* tbl, + int const structTbl, + size_t index, + size_t uncompressed_end) +{ + unsigned search_depth = tbl->params.depth; + FillDistancesPrices(enc); + FillAlignPrices(enc); + UpdateLengthPrices(enc, &enc->states.len_states); + UpdateLengthPrices(enc, &enc->states.rep_len_states); + while (index < uncompressed_end && enc->rc.out_index < enc->rc.chunk_size) + { + Match match = FL2_radixGetMatch(block, tbl, search_depth, structTbl, index); + if (match.length > 1) { + if (enc->strategy != FL2_ultra) { + index = EncodeOptimumSequence(enc, block, tbl, structTbl, 0, index, uncompressed_end, match); + } + else { + index = EncodeOptimumSequence(enc, block, tbl, structTbl, 1, index, uncompressed_end, match); + } + if (enc->match_price_count >= kDistanceRepriceFrequency) { + FillDistancesPrices(enc); + } + if (enc->align_price_count >= kAlignRepriceFrequency) { + FillAlignPrices(enc); + } + } + else { + if (block.data[index] == block.data[index - enc->states.reps[0] - 1]) { + EncodeRepMatch(enc, 1, 0, index & enc->pos_mask); + } + else { + EncodeLiteralBuf(enc, block.data, index); + } + ++index; + } + } + Flush(&enc->rc); + return index; +} + +static void LengthStates_Reset(LengthStates* ls, unsigned fast_length) +{ + ls->choice = kProbInitValue; + ls->choice_2 = kProbInitValue; + for (size_t i = 0; i < (kNumPositionStatesMax << kLenNumLowBits); ++i) { + ls->low[i] = kProbInitValue; + } + for (size_t i = 0; i < (kNumPositionStatesMax << kLenNumMidBits); ++i) { + ls->mid[i] = kProbInitValue; + } + for (size_t i = 0; i < kLenNumHighSymbols; ++i) { + ls->high[i] = kProbInitValue; + } + ls->table_size = fast_length + 1 - kMatchLenMin; +} + +static void EncoderStates_Reset(EncoderStates* es, unsigned lc, unsigned lp, unsigned fast_length) +{ + es->state = 0; + for (size_t i = 0; i < kNumReps; ++i) { + es->reps[i] = 0; + } + for (size_t i = 0; i < kNumStates; ++i) { + for (size_t j = 0; j < kNumPositionStatesMax; ++j) { + es->is_match[i][j] = kProbInitValue; + es->is_rep0_long[i][j] = kProbInitValue; + } + es->is_rep[i] = kProbInitValue; + es->is_rep_G0[i] = kProbInitValue; + es->is_rep_G1[i] = kProbInitValue; + es->is_rep_G2[i] = kProbInitValue; + } + size_t num = (size_t)(kNumLiterals * kNumLitTables) << (lp + lc); + for (size_t i = 0; i < num; ++i) { + es->literal_probs[i] = kProbInitValue; + } + for (size_t i = 0; i < kNumLenToPosStates; ++i) { + Probability *probs = es->dist_slot_encoders[i]; + for (size_t j = 0; j < (1 << kNumPosSlotBits); ++j) { + probs[j] = kProbInitValue; + } + } + for (size_t i = 0; i < kNumFullDistances - kEndPosModelIndex; ++i) { + es->dist_encoders[i] = kProbInitValue; + } + LengthStates_Reset(&es->len_states, fast_length); + LengthStates_Reset(&es->rep_len_states, fast_length); + for (size_t i = 0; i < (1 << kNumAlignBits); ++i) { + es->dist_align_encoders[i] = kProbInitValue; + } +} + +BYTE FL2_getDictSizeProp(size_t dictionary_size) +{ + BYTE dict_size_prop = 0; + for (BYTE bit = 11; bit < 32; ++bit) { + if (((size_t)2 << bit) >= dictionary_size) { + dict_size_prop = (bit - 11) << 1; + break; + } + if (((size_t)3 << bit) >= dictionary_size) { + dict_size_prop = ((bit - 11) << 1) | 1; + break; + } + } + return dict_size_prop; +} + +size_t FL2_lzma2MemoryUsage(unsigned chain_log, FL2_strategy strategy, unsigned thread_count) +{ + size_t size = sizeof(FL2_lzmaEncoderCtx) + kChunkBufferSize; + if(strategy == FL2_ultra) + size += sizeof(HashChains) + (sizeof(U32) << chain_log) - sizeof(U32); + return size * thread_count; +} + +static void Reset(FL2_lzmaEncoderCtx* enc, size_t max_distance) +{ + DEBUGLOG(5, "LZMA encoder reset : max_distance %u", (unsigned)max_distance); + U32 i = 0; + RangeEncReset(&enc->rc); + EncoderStates_Reset(&enc->states, enc->lc, enc->lp, enc->fast_length); + enc->pos_mask = (1 << enc->pb) - 1; + enc->lit_pos_mask = (1 << enc->lp) - 1; + for (; max_distance > (size_t)1 << i; ++i) { + } + enc->dist_price_table_size = i * 2; +} + +static BYTE GetLcLpPbCode(FL2_lzmaEncoderCtx* enc) +{ + return (BYTE)((enc->pb * 5 + enc->lp) * 9 + enc->lc); +} + +BYTE IsChunkRandom(const FL2_matchTable* const tbl, + const FL2_dataBlock block, size_t const start, + unsigned const strategy) +{ + if (block.end - start >= kMinTestChunkSize) { + static const size_t max_dist_table[][5] = { + { 0, 0, 0, 1U << 6, 1U << 14 }, /* fast */ + { 0, 0, 1U << 6, 1U << 14, 1U << 22 }, /* opt */ + { 0, 0, 1U << 6, 1U << 14, 1U << 22 } }; /* ultra */ + static const size_t margin_divisor[3] = { 60U, 45U, 120U }; + static const double dev_table[3] = { 6.0, 6.0, 5.0 }; + + size_t const end = MIN(start + kChunkSize, block.end); + size_t const chunk_size = end - start; + size_t count = 0; + size_t const margin = chunk_size / margin_divisor[strategy]; + size_t const terminator = start + margin; + + if (tbl->isStruct) { + size_t prev_dist = 0; + for (size_t index = start; index < end; ) { + U32 const link = GetMatchLink(tbl->table, index); + if (link == RADIX_NULL_LINK) { + ++index; + ++count; + prev_dist = 0; + } + else { + size_t length = GetMatchLength(tbl->table, index); + size_t dist = index - GetMatchLink(tbl->table, index); + if (length > 4) + count += dist != prev_dist; + else + count += (dist < max_dist_table[strategy][length]) ? 1 : length; + index += length; + prev_dist = dist; + } + if (count + terminator <= index) + return 0; + } + } + else { + size_t prev_dist = 0; + for (size_t index = start; index < end; ) { + U32 const link = tbl->table[index]; + if (link == RADIX_NULL_LINK) { + ++index; + ++count; + prev_dist = 0; + } + else { + size_t length = link >> RADIX_LINK_BITS; + size_t dist = index - (link & RADIX_LINK_MASK); + if (length > 4) + count += dist != prev_dist; + else + count += (dist < max_dist_table[strategy][length]) ? 1 : length; + index += length; + prev_dist = dist; + } + if (count + terminator <= index) + return 0; + } + } + + { U32 char_count[256]; + double char_total = 0.0; + /* Expected normal character count */ + double const avg = (double)chunk_size / 256.0; + + memset(char_count, 0, sizeof(char_count)); + for (size_t index = start; index < end; ++index) + ++char_count[block.data[index]]; + /* Sum the deviations */ + for (size_t i = 0; i < 256; ++i) { + double delta = (double)char_count[i] - avg; + char_total += delta * delta; + } + return sqrt(char_total) / sqrt((double)chunk_size) <= dev_table[strategy]; + } + } + return 0; +} + +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#else +__pragma(warning(disable:4701)) +#endif + +size_t FL2_lzma2Encode(FL2_lzmaEncoderCtx* enc, + FL2_matchTable* tbl, + const FL2_dataBlock block, + const FL2_lzma2Parameters* options, + FL2_progressFn progress, void* opaque, size_t base, U32 weight) +{ + size_t const start = block.start; + BYTE* out_dest = enc->out_buf; + /* Each encoder writes a properties byte because the upstream encoder(s) could */ + /* write only uncompressed chunks with no properties. */ + BYTE encode_properties = 1; + BYTE next_is_random = 0; + + if (block.end <= block.start) { + return 0; + } + enc->lc = options->lc; + enc->lp = options->lp; + if (enc->lc + enc->lp > 4) { + enc->lc = 3; + enc->lp = 0; + } + enc->pb = options->pb; + enc->strategy = options->strategy; + enc->fast_length = options->fast_length; + enc->match_cycles = options->match_cycles; + Reset(enc, block.end); + if (enc->strategy == FL2_ultra) { + /* Create a hash chain to put the encoder into hybrid mode */ + if (enc->hash_alloc_3 < ((ptrdiff_t)1 << options->second_dict_bits)) { + if(HashCreate(enc, options->second_dict_bits) != 0) + return FL2_ERROR(memory_allocation); + } + else { + HashReset(enc, options->second_dict_bits); + } + enc->hash_prev_index = (start >= (size_t)enc->hash_dict_3) ? start - enc->hash_dict_3 : -1; + } + enc->len_end_max = kOptimizerBufferSize - 1; + RMF_limitLengths(tbl, block.end); + for (size_t index = start; index < block.end;) + { + unsigned header_size = encode_properties ? kChunkHeaderSize + 1 : kChunkHeaderSize; + EncoderStates saved_states; + size_t next_index; + size_t compressed_size; + size_t uncompressed_size; + RangeEncReset(&enc->rc); + SetOutputBuffer(&enc->rc, out_dest + header_size, kChunkSize); + if (!next_is_random) { + saved_states = enc->states; + if (index == 0) { + EncodeLiteral(enc, 0, block.data[0], 0); + } + if (enc->strategy == FL2_fast) { + if (tbl->isStruct) { + next_index = EncodeChunkFast(enc, block, tbl, 1, + index + (index == 0), + MIN(block.end, index + kMaxChunkUncompressedSize)); + } + else { + next_index = EncodeChunkFast(enc, block, tbl, 0, + index + (index == 0), + MIN(block.end, index + kMaxChunkUncompressedSize)); + } + } + else { + if (tbl->isStruct) { + next_index = EncodeChunkBest(enc, block, tbl, 1, + index + (index == 0), + MIN(block.end, index + kMaxChunkUncompressedSize - kOptimizerBufferSize)); + } + else { + next_index = EncodeChunkBest(enc, block, tbl, 0, + index + (index == 0), + MIN(block.end, index + kMaxChunkUncompressedSize - kOptimizerBufferSize)); + } + } + } + else { + next_index = MIN(index + kChunkSize, block.end); + } + compressed_size = enc->rc.out_index; + uncompressed_size = next_index - index; + out_dest[1] = (BYTE)((uncompressed_size - 1) >> 8); + out_dest[2] = (BYTE)(uncompressed_size - 1); + /* Output an uncompressed chunk if necessary */ + if (next_is_random || uncompressed_size + 3 <= compressed_size + header_size) { + DEBUGLOG(5, "Storing chunk : was %u => %u", (unsigned)uncompressed_size, (unsigned)compressed_size); + if (index == 0) { + out_dest[0] = kChunkUncompressedDictReset; + } + else { + out_dest[0] = kChunkUncompressed; + } + memcpy(out_dest + 3, block.data + index, uncompressed_size); + compressed_size = uncompressed_size; + header_size = 3; + if (!next_is_random) { + enc->states = saved_states; + } + } + else { + DEBUGLOG(5, "Compressed chunk : %u => %u", (unsigned)uncompressed_size, (unsigned)compressed_size); + if (index == 0) { + out_dest[0] = kChunkCompressedFlag | kChunkAllReset; + } + else if (encode_properties) { + out_dest[0] = kChunkCompressedFlag | kChunkStatePropertiesReset; + } + else { + out_dest[0] = kChunkCompressedFlag | kChunkNothingReset; + } + out_dest[0] |= (BYTE)((uncompressed_size - 1) >> 16); + out_dest[3] = (BYTE)((compressed_size - 1) >> 8); + out_dest[4] = (BYTE)(compressed_size - 1); + if (encode_properties) { + out_dest[5] = GetLcLpPbCode(enc); + encode_properties = 0; + } + } + if (next_is_random || uncompressed_size + 3 <= compressed_size + (compressed_size >> kRandomFilterMarginBits) + header_size) + { + /* Test the next chunk for compressibility */ + next_is_random = IsChunkRandom(tbl, block, next_index, enc->strategy); + } + if (index == start) { + /* After the first chunk we can write data to the match table because the */ + /* compressed data will never catch up with the table position being read. */ + out_dest = RMF_getTableAsOutputBuffer(tbl, start); + memcpy(out_dest, enc->out_buf, compressed_size + header_size); + } + out_dest += compressed_size + header_size; + index = next_index; + if (progress && progress(base + (((index - start) * weight) >> 4), opaque) != 0) + return FL2_ERROR(canceled); + } + return out_dest - RMF_getTableAsOutputBuffer(tbl, start); +} diff --git a/C/fast-lzma2/lzma2_enc.h b/C/fast-lzma2/lzma2_enc.h new file mode 100644 index 00000000..9fbda523 --- /dev/null +++ b/C/fast-lzma2/lzma2_enc.h @@ -0,0 +1,64 @@ +/* lzma2_enc.h -- LZMA2 Encoder +Based on LzmaEnc.h and Lzma2Enc.h : Igor Pavlov +Modified for FL2 by Conor McCarthy +Public domain +*/ + +#ifndef RADYX_LZMA2_ENCODER_H +#define RADYX_LZMA2_ENCODER_H + +#include "mem.h" +#include "data_block.h" +#include "radix_mf.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +#define kFastDistBits 12U + +#define LZMA2_END_MARKER '\0' +#define LZMA_MIN_DICT_BITS 12 + +typedef struct FL2_lzmaEncoderCtx_s FL2_lzmaEncoderCtx; + +typedef enum { + FL2_fast, + FL2_opt, + FL2_ultra +} FL2_strategy; + +typedef struct +{ + unsigned lc; + unsigned lp; + unsigned pb; + unsigned fast_length; + unsigned match_cycles; + FL2_strategy strategy; + unsigned second_dict_bits; + unsigned random_filter; +} FL2_lzma2Parameters; + + +FL2_lzmaEncoderCtx* FL2_lzma2Create(); + +void FL2_lzma2Free(FL2_lzmaEncoderCtx* enc); + +int FL2_lzma2HashAlloc(FL2_lzmaEncoderCtx* enc, const FL2_lzma2Parameters* options); + +size_t FL2_lzma2Encode(FL2_lzmaEncoderCtx* enc, + FL2_matchTable* tbl, + const FL2_dataBlock block, + const FL2_lzma2Parameters* options, + FL2_progressFn progress, void* opaque, size_t base, U32 weight); + +BYTE FL2_getDictSizeProp(size_t dictionary_size); + +size_t FL2_lzma2MemoryUsage(unsigned chain_log, FL2_strategy strategy, unsigned thread_count); + +#if defined (__cplusplus) +} +#endif + +#endif /* RADYX_LZMA2_ENCODER_H */ \ No newline at end of file diff --git a/C/fast-lzma2/mem.h b/C/fast-lzma2/mem.h new file mode 100644 index 00000000..47d23001 --- /dev/null +++ b/C/fast-lzma2/mem.h @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/* code only tested on 32 and 64 bits systems */ +#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } +MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; + __pragma( pack(pop) ) +#else + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ diff --git a/C/fast-lzma2/platform.h b/C/fast-lzma2/platform.h new file mode 100644 index 00000000..a4d7850f --- /dev/null +++ b/C/fast-lzma2/platform.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef PLATFORM_H_MODULE +#define PLATFORM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/* ************************************** +* Compiler Options +****************************************/ +#if defined(_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS /* Disable Visual Studio warning messages for fopen, strncpy, strerror */ +# define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before and */ +# if (_MSC_VER <= 1800) /* (1800 = Visual Studio 2013) */ +# define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */ +# endif +#endif + + +/* ************************************** +* Detect 64-bit OS +* http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros +****************************************/ +#if defined __ia64 || defined _M_IA64 /* Intel Itanium */ \ + || defined __powerpc64__ || defined __ppc64__ || defined __PPC64__ /* POWER 64-bit */ \ + || (defined __sparc && (defined __sparcv9 || defined __sparc_v9__ || defined __arch64__)) || defined __sparc64__ /* SPARC 64-bit */ \ + || defined __x86_64__s || defined _M_X64 /* x86 64-bit */ \ + || defined __arm64__ || defined __aarch64__ || defined __ARM64_ARCH_8__ /* ARM 64-bit */ \ + || (defined __mips && (__mips == 64 || __mips == 4 || __mips == 3)) /* MIPS 64-bit */ \ + || defined _LP64 || defined __LP64__ /* NetBSD, OpenBSD */ || defined __64BIT__ /* AIX */ || defined _ADDR64 /* Cray */ \ + || (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8) /* gcc */ +# if !defined(__64BIT__) +# define __64BIT__ 1 +# endif +#endif + + +/* ********************************************************* +* Turn on Large Files support (>4GB) for 32-bit Linux/Unix +***********************************************************/ +#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ +# if !defined(_FILE_OFFSET_BITS) +# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ +# endif +# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ +# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ +# endif +# if defined(_AIX) || defined(__hpux) +# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ +# endif +#endif + + +/* ************************************************************ +* Detect POSIX version +* PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows +* PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX +* PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION +***************************************************************/ +#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \ + || defined(__midipix__) || defined(__VMS)) +# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \ + || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ +# define PLATFORM_POSIX_VERSION 200112L +# else +# if defined(__linux__) || defined(__linux) +# ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200112L /* use feature test macro */ +# endif +# endif +# include /* declares _POSIX_VERSION */ +# if defined(_POSIX_VERSION) /* POSIX compliant */ +# define PLATFORM_POSIX_VERSION _POSIX_VERSION +# else +# define PLATFORM_POSIX_VERSION 0 +# endif +# endif +#endif +#if !defined(PLATFORM_POSIX_VERSION) +# define PLATFORM_POSIX_VERSION -1 +#endif + + +/*-********************************************* +* Detect if isatty() and fileno() are available +************************************************/ +#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__) +# include /* isatty */ +# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) +#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__) +# include /* _isatty */ +# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +#elif defined(WIN32) || defined(_WIN32) +# include /* _isatty */ +# include /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include /* FILE */ +static __inline int IS_CONSOLE(FILE* stdStream) +{ + DWORD dummy; + return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy); +} +#else +# define IS_CONSOLE(stdStream) 0 +#endif + + +/****************************** +* OS-specific Includes +******************************/ +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) +# include /* _O_BINARY */ +# include /* _setmode, _fileno, _get_osfhandle */ +# if !defined(__DJGPP__) +# include /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include /* FSCTL_SET_SPARSE */ +# define SET_BINARY_MODE(file) { int unused=_setmode(_fileno(file), _O_BINARY); (void)unused; } +# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); } +# else +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +# define SET_SPARSE_FILE_MODE(file) +# endif +#else +# define SET_BINARY_MODE(file) +# define SET_SPARSE_FILE_MODE(file) +#endif + + +#ifndef ZSTD_SPARSE_DEFAULT +# if (defined(__APPLE__) && defined(__MACH__)) +# define ZSTD_SPARSE_DEFAULT 0 +# else +# define ZSTD_SPARSE_DEFAULT 1 +# endif +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* PLATFORM_H_MODULE */ diff --git a/C/fast-lzma2/radix_bitpack.c b/C/fast-lzma2/radix_bitpack.c new file mode 100644 index 00000000..a20b0d60 --- /dev/null +++ b/C/fast-lzma2/radix_bitpack.c @@ -0,0 +1,61 @@ +/* +* Copyright (c) 2018, Conor McCarthy +* All rights reserved. +* +* This source code is licensed under both the BSD-style license (found in the +* LICENSE file in the root directory of this source tree) and the GPLv2 (found +* in the COPYING file in the root directory of this source tree). +* You may select, at your option, one of the above-listed licenses. +*/ + +#include "mem.h" /* U32, U64 */ +#include "fl2threading.h" +#include "fl2_internal.h" +#include "radix_internal.h" + +typedef struct FL2_matchTable_s FL2_matchTable; + +#undef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#define RMF_BITPACK + +#define RADIX_MAX_LENGTH BITPACK_MAX_LENGTH + +#define InitMatchLink(index, link) tbl->table[index] = link + +#define GetMatchLink(link) (tbl->table[link] & RADIX_LINK_MASK) + +#define GetInitialMatchLink(index) tbl->table[index] + +#define GetMatchLength(index) (tbl->table[index] >> RADIX_LINK_BITS) + +#define SetMatchLink(index, link, length) tbl->table[index] = (link) | ((U32)(length) << RADIX_LINK_BITS) + +#define SetMatchLength(index, link, length) tbl->table[index] = (link) | ((U32)(length) << RADIX_LINK_BITS) + +#define SetMatchLinkAndLength(index, link, length) tbl->table[index] = (link) | ((U32)(length) << RADIX_LINK_BITS) + +#define SetNull(index) tbl->table[index] = RADIX_NULL_LINK + +#define IsNull(index) (tbl->table[index] == RADIX_NULL_LINK) + +BYTE* RMF_bitpackAsOutputBuffer(FL2_matchTable* const tbl, size_t const index) +{ + return (BYTE*)(tbl->table + index); +} + +/* Restrict the match lengths so that they don't reach beyond index */ +void RMF_bitpackLimitLengths(FL2_matchTable* const tbl, size_t const index) +{ + DEBUGLOG(5, "RMF_limitLengths : end %u, max length %u", (U32)index, RADIX_MAX_LENGTH); + SetNull(index - 1); + for (U32 length = 2; length < RADIX_MAX_LENGTH && length <= index; ++length) { + U32 const link = tbl->table[index - length]; + if (link != RADIX_NULL_LINK) { + tbl->table[index - length] = (MIN(length, link >> RADIX_LINK_BITS) << RADIX_LINK_BITS) | (link & RADIX_LINK_MASK); + } + } +} + +#include "radix_engine.h" \ No newline at end of file diff --git a/C/fast-lzma2/radix_engine.h b/C/fast-lzma2/radix_engine.h new file mode 100644 index 00000000..a5190414 --- /dev/null +++ b/C/fast-lzma2/radix_engine.h @@ -0,0 +1,1075 @@ +/* +* Copyright (c) 2018, Conor McCarthy +* All rights reserved. +* +* This source code is licensed under both the BSD-style license (found in the +* LICENSE file in the root directory of this source tree) and the GPLv2 (found +* in the COPYING file in the root directory of this source tree). +* You may select, at your option, one of the above-listed licenses. +*/ + +#include +#include "count.h" + +#define MAX_READ_BEYOND_DEPTH 2 + +/* If a repeating byte is found, fill that section of the table with matches of distance 1 */ +static size_t HandleRepeat(FL2_matchTable* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t const block_size, ptrdiff_t i, size_t const radix_16) +{ + ptrdiff_t const rpt_index = i - (MAX_REPEAT / 2 - 2); + ptrdiff_t rpt_end; + /* Set the head to the first byte of the repeat and adjust the count */ + tbl->list_heads[radix_16].head = (U32)(rpt_index - 1); + tbl->list_heads[radix_16].count -= MAX_REPEAT / 2 - 2; + /* Find the end */ + i += ZSTD_count(data_block + i + 2, data_block + i + 1, data_block + block_size); + rpt_end = i; + /* No point if it's in the overlap region */ + if (i >= (ptrdiff_t)start) { + U32 len = 2; + /* Set matches at distance 1 and available length */ + for (; i >= rpt_index && len <= RADIX_MAX_LENGTH; --i) { + SetMatchLinkAndLength(i, (U32)(i - 1), len); + ++len; + } + /* Set matches at distance 1 and max length */ + for (; i >= rpt_index; --i) { + SetMatchLinkAndLength(i, (U32)(i - 1), RADIX_MAX_LENGTH); + } + } + return rpt_end; +} + +/* If a 2-byte repeat is found, fill that section of the table with matches of distance 2 */ +static size_t HandleRepeat2(FL2_matchTable* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t const block_size, ptrdiff_t i, size_t const radix_16) +{ + size_t radix_16_rev; + ptrdiff_t const rpt_index = i - (MAX_REPEAT - 3); + ptrdiff_t rpt_end; + + /* Set the head to the first byte of the repeat and adjust the count */ + tbl->list_heads[radix_16].head = (U32)(rpt_index - 1); + tbl->list_heads[radix_16].count -= MAX_REPEAT / 2 - 2; + radix_16_rev = ((radix_16 >> 8) | (radix_16 << 8)) & 0xFFFF; + tbl->list_heads[radix_16_rev].head = (U32)(rpt_index - 2); + tbl->list_heads[radix_16_rev].count -= MAX_REPEAT / 2 - 1; + /* Find the end */ + i += ZSTD_count(data_block + i + 2, data_block + i, data_block + block_size); + rpt_end = i; + /* No point if it's in the overlap region */ + if (i >= (ptrdiff_t)start) { + U32 len = 2; + /* Set matches at distance 2 and available length */ + for (; i >= rpt_index && len <= RADIX_MAX_LENGTH; --i) { + SetMatchLinkAndLength(i, (U32)(i - 2), len); + ++len; + } + /* Set matches at distance 2 and max length */ + for (; i >= rpt_index; --i) { + SetMatchLinkAndLength(i, (U32)(i - 2), RADIX_MAX_LENGTH); + } + } + return rpt_end; +} + +/* Initialization for the reference algortithm */ +#ifdef RMF_REFERENCE +static void RadixInitReference(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end) +{ + const BYTE* const data_block = (const BYTE*)data; + ptrdiff_t const block_size = end - 1; + size_t st_index = 0; + for (ptrdiff_t i = 0; i < block_size; ++i) + { + size_t radix_16 = ((size_t)data_block[i] << 8) | data_block[i + 1]; + U32 prev = tbl->list_heads[radix_16].head; + if (prev != RADIX_NULL_LINK) { + SetMatchLinkAndLength(i, prev, 2U); + tbl->list_heads[radix_16].head = (U32)i; + ++tbl->list_heads[radix_16].count; + } + else { + SetNull(i); + tbl->list_heads[radix_16].head = (U32)i; + tbl->list_heads[radix_16].count = 1; + tbl->stack[st_index++] = (U32)radix_16; + } + } + SetNull(end - 1); + tbl->end_index = (U32)st_index; + tbl->st_index = ATOMIC_INITIAL_VALUE; + (void)start; +} +#endif + +size_t +#ifdef RMF_BITPACK +RMF_bitpackInit +#else +RMF_structuredInit +#endif +(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end) +{ + const BYTE* const data_block = (const BYTE*)data; + size_t st_index = 0; + size_t radix_16; + ptrdiff_t const block_size = end - 2; + ptrdiff_t rpt_total = 0; + U32 count = 0; + + if (end <= 2) { + for (size_t i = 0; i < end; ++i) { + SetNull(i); + } + return 0; + } +#ifdef RMF_REFERENCE + if (tbl->params.use_ref_mf) { + RadixInitReference(tbl, data, start, end); + return 0; + } +#endif + SetNull(0); + /* Initial 2-byte radix value */ + radix_16 = ((size_t)data_block[0] << 8) | data_block[1]; + tbl->stack[st_index++] = (U32)radix_16; + tbl->list_heads[radix_16].head = 0; + tbl->list_heads[radix_16].count = 1; + + radix_16 = ((size_t)((BYTE)radix_16) << 8) | data_block[2]; + + ptrdiff_t i = 1; + for (; i < block_size; ++i) { + /* Pre-load the next value for speed increase */ + size_t const next_radix = ((size_t)((BYTE)radix_16) << 8) | data_block[i + 2]; + + U32 const prev = tbl->list_heads[radix_16].head; + if (prev != RADIX_NULL_LINK) { + S32 dist = (S32)i - prev; + /* Check for repeat */ + if (dist > 2) { + count = 0; + /* Link this position to the previous occurance */ + InitMatchLink(i, prev); + /* Set the previous to this position */ + tbl->list_heads[radix_16].head = (U32)i; + ++tbl->list_heads[radix_16].count; + radix_16 = next_radix; + } + else { + count += 3 - dist; + /* Do the usual if the repeat is too short */ + if (count < MAX_REPEAT - 2) { + InitMatchLink(i, prev); + tbl->list_heads[radix_16].head = (U32)i; + ++tbl->list_heads[radix_16].count; + radix_16 = next_radix; + } + else { + ptrdiff_t const prev_i = i; + /* Eliminate the repeat from the linked list to save time */ + if (dist == 1) { + i = HandleRepeat(tbl, data_block, start, end, i, radix_16); + rpt_total += i - prev_i + MAX_REPEAT / 2U - 1; + } + else { + i = HandleRepeat2(tbl, data_block, start, end, i, radix_16); + rpt_total += i - prev_i + MAX_REPEAT - 2; + } + if (i < block_size) + radix_16 = ((size_t)data_block[i + 1] << 8) | data_block[i + 2]; + count = 0; + } + } + } + else { + count = 0; + SetNull(i); + tbl->list_heads[radix_16].head = (U32)i; + tbl->list_heads[radix_16].count = 1; + tbl->stack[st_index++] = (U32)radix_16; + radix_16 = next_radix; + } + } + /* Handle the last value */ + if (i <= block_size && tbl->list_heads[radix_16].head != RADIX_NULL_LINK) { + SetMatchLinkAndLength(block_size, tbl->list_heads[radix_16].head, 2); + } + else { + SetNull(block_size); + } + /* Never a match at the last byte */ + SetNull(end - 1); + + tbl->end_index = (U32)st_index; + tbl->st_index = ATOMIC_INITIAL_VALUE; + + return rpt_total; +} + +#if defined(_MSC_VER) +# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable */ +#endif + + +/* Copy the list into a buffer and recurse it there. This decreases cache misses and allows */ +/* data characters to be loaded every fourth pass and stored for use in the next 4 passes */ +static void RecurseListsBuffered(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_start, + size_t link, + BYTE depth, + BYTE const max_depth, + U32 orig_list_count, + size_t const stack_base) +{ + /* Create an offset data buffer pointer for reading the next bytes */ + const BYTE* data_src = data_block + depth; + size_t start = 0; + + if (orig_list_count < 2 || tbl->match_buffer_limit < 2) + return; + do { + size_t count = start; + U32 list_count = (U32)(start + orig_list_count); + U32 overlap; + + if (list_count > tbl->match_buffer_limit) { + list_count = (U32)tbl->match_buffer_limit; + } + for (; count < list_count; ++count) { + /* Pre-load next link */ + size_t const next_link = GetMatchLink(link); + /* Get 4 data characters for later. This doesn't block on a cache miss. */ + tbl->match_buffer[count].src.u32 = MEM_read32(data_src + link); + /* Record the actual location of this suffix */ + tbl->match_buffer[count].from = (U32)link; + /* Initialize the next link */ + tbl->match_buffer[count].next = (U32)(count + 1) | ((U32)depth << 24); + link = next_link; + } + /* Make the last element circular so pre-loading doesn't read past the end. */ + tbl->match_buffer[count - 1].next = (U32)(count - 1) | ((U32)depth << 24); + overlap = 0; + if (list_count < (U32)(start + orig_list_count)) { + overlap = list_count >> MATCH_BUFFER_OVERLAP; + overlap += !overlap; + } + RMF_recurseListChunk(tbl, data_block, block_start, depth, max_depth, list_count, stack_base); + orig_list_count -= (U32)(list_count - start); + /* Copy everything back, except the last link which never changes, and any extra overlap */ + count -= overlap + (overlap == 0); + for (size_t index = 0; index < count; ++index) { + size_t const from = tbl->match_buffer[index].from; + if (from < block_start) + return; + + { U32 length = tbl->match_buffer[index].next >> 24; + size_t next = tbl->match_buffer[index].next & BUFFER_LINK_MASK; + SetMatchLinkAndLength(from, tbl->match_buffer[next].from, length); + } + } + start = 0; + if (overlap) { + size_t dest = 0; + for (size_t src = list_count - overlap; src < list_count; ++src) { + tbl->match_buffer[dest].from = tbl->match_buffer[src].from; + tbl->match_buffer[dest].src.u32 = MEM_read32(data_src + tbl->match_buffer[src].from); + tbl->match_buffer[dest].next = (U32)(dest + 1) | ((U32)depth << 24); + ++dest; + } + start = dest; + } + } while (orig_list_count != 0); +} + +/* Parse the list with bounds checks on data reads. Stop at the point where bound checks are not required. */ +/* Buffering is used so that parsing can continue below the bound to find a few matches without altering the main table. */ +static void RecurseListsBound(RMF_builder* const tbl, + const BYTE* const data_block, + ptrdiff_t const block_size, + RMF_tableHead* const list_head, + U32 const max_depth) +{ + U32 list_count = list_head->count; + ptrdiff_t link = list_head->head; + ptrdiff_t const bounded_size = max_depth + MAX_READ_BEYOND_DEPTH; + ptrdiff_t const bounded_start = block_size - MIN(block_size, bounded_size); + /* Create an offset data buffer pointer for reading the next bytes */ + size_t count = 0; + size_t extra_count = (max_depth >> 4) + 4; + ptrdiff_t limit; + const BYTE* data_src; + U32 depth; + size_t index; + size_t st_index; + RMF_listTail* tails_8; + + if (list_count < 2) + return; + + list_count = MIN((U32)bounded_size, list_count); + list_count = MIN(list_count, (U32)tbl->match_buffer_size); + for (; count < list_count && extra_count; ++count) { + ptrdiff_t next_link = GetMatchLink(link); + if (link >= bounded_start) { + --list_head->count; + if (next_link < bounded_start) { + list_head->head = (U32)next_link; + } + } + else { + --extra_count; + } + /* Record the actual location of this suffix */ + tbl->match_buffer[count].from = (U32)link; + /* Initialize the next link */ + tbl->match_buffer[count].next = (U32)(count + 1) | ((U32)2 << 24); + link = next_link; + } + list_count = (U32)count; + limit = block_size - 2; + data_src = data_block + 2; + depth = 3; + index = 0; + st_index = 0; + tails_8 = tbl->tails_8; + do { + link = tbl->match_buffer[index].from; + if (link < limit) { + size_t const radix_8 = data_src[link]; + /* Seen this char before? */ + const U32 prev = tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tails_8[radix_8].list_count; + /* Link the previous occurrence to this one and record the new length */ + tbl->match_buffer[prev].next = (U32)index | (depth << 24); + } + else { + tails_8[radix_8].list_count = 1; + /* Add the new sub list to the stack */ + tbl->stack[st_index].head = (U32)index; + /* This will be converted to a count at the end */ + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tails_8[radix_8].prev_index = (U32)index; + } + ++index; + } while (index < list_count); + /* Convert radix values on the stack to counts and reset any used tail slots */ + for (size_t j = 0; j < st_index; ++j) { + tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK; + tbl->stack[j].count = tails_8[tbl->stack[j].count].list_count; + } + while (st_index > 0) { + size_t prev_st_index; + + /* Pop an item off the stack */ + --st_index; + list_count = tbl->stack[st_index].count; + if (list_count < 2) { + /* Nothing to match with */ + continue; + } + index = tbl->stack[st_index].head; + depth = (tbl->match_buffer[index].next >> 24); + if (depth >= max_depth) + continue; + link = tbl->match_buffer[index].from; + if (link < bounded_start) { + /* Chain starts before the bounded region */ + continue; + } + data_src = data_block + depth; + limit = block_size - depth; + ++depth; + prev_st_index = st_index; + do { + link = tbl->match_buffer[index].from; + if (link < limit) { + size_t const radix_8 = data_src[link]; + U32 const prev = tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tails_8[radix_8].list_count; + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + else { + tails_8[radix_8].list_count = 1; + tbl->stack[st_index].head = (U32)index; + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tails_8[radix_8].prev_index = (U32)index; + } + index = tbl->match_buffer[index].next & BUFFER_LINK_MASK; + } while (--list_count != 0); + for (size_t j = prev_st_index; j < st_index; ++j) { + tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK; + tbl->stack[j].count = tails_8[tbl->stack[j].count].list_count; + } + } + /* Copy everything back above the bound */ + --count; + for (index = 0; index < count; ++index) { + ptrdiff_t const from = tbl->match_buffer[index].from; + size_t next; + U32 length; + + if (from < bounded_start) + break; + length = tbl->match_buffer[index].next >> 24; + length = MIN(length, (U32)(block_size - from)); + next = tbl->match_buffer[index].next & BUFFER_LINK_MASK; + SetMatchLinkAndLength(from, tbl->match_buffer[next].from, length); + } +} + +/* Compare each string with all others to find the best match */ +static void BruteForce(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_start, + size_t link, + size_t const list_count, + U32 const depth, + U32 const max_depth) +{ + const BYTE* data_src = data_block + depth; + size_t buffer[MAX_BRUTE_FORCE_LIST_SIZE + 1]; + size_t const limit = max_depth - depth; + size_t i = 1; + + buffer[0] = link; + /* Pre-load all locations */ + do { + link = GetMatchLink(link); + buffer[i] = link; + } while (++i < list_count); + i = 0; + do { + size_t longest = 0; + size_t j = i + 1; + size_t longest_index = j; + const BYTE* const data = data_src + buffer[i]; + do { + const BYTE* data_2 = data_src + buffer[j]; + size_t len_test = 0; + while (data[len_test] == data_2[len_test] && len_test < limit) { + ++len_test; + } + if (len_test > longest) { + longest_index = j; + longest = len_test; + if (len_test >= limit) { + break; + } + } + } while (++j < list_count); + if (longest > 0) { + SetMatchLinkAndLength(buffer[i], + (U32)buffer[longest_index], + depth + (U32)longest); + } + ++i; + } while (i < list_count - 1 && buffer[i] >= block_start); +} + +static void RecurseLists16(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_start, + size_t link, + U32 count, + U32 const max_depth) +{ + /* Offset data pointer. This method is only called at depth 2 */ + const BYTE* const data_src = data_block + 2; + /* Load radix values from the data chars */ + size_t next_radix_8 = data_src[link]; + size_t next_radix_16 = next_radix_8 + ((size_t)(data_src[link + 1]) << 8); + size_t reset_list[RADIX8_TABLE_SIZE]; + size_t reset_count = 0; + size_t st_index = 0; + U32 prev; + /* Last one is done separately */ + --count; + do + { + /* Pre-load the next link */ + size_t const next_link = GetInitialMatchLink(link); + size_t const radix_8 = next_radix_8; + size_t const radix_16 = next_radix_16; + /* Initialization doesn't set lengths to 2 because it's a waste of time if buffering is used */ + SetMatchLength(link, (U32)next_link, 2); + + next_radix_8 = data_src[next_link]; + next_radix_16 = next_radix_8 + ((size_t)(data_src[next_link + 1]) << 8); + + prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + /* Link the previous occurrence to this one at length 3. */ + /* This will be overwritten if a 4 is found. */ + SetMatchLinkAndLength(prev, (U32)link, 3); + } + else { + reset_list[reset_count++] = radix_8; + } + tbl->tails_8[radix_8].prev_index = (U32)link; + + prev = tbl->tails_16[radix_16].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_16[radix_16].list_count; + /* Link at length 4, overwriting the 3 */ + SetMatchLinkAndLength(prev, (U32)link, 4); + } + else { + tbl->tails_16[radix_16].list_count = 1; + tbl->stack[st_index].head = (U32)link; + tbl->stack[st_index].count = (U32)radix_16; + ++st_index; + } + tbl->tails_16[radix_16].prev_index = (U32)link; + link = next_link; + } while (--count > 0); + /* Do the last location */ + prev = tbl->tails_8[next_radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + SetMatchLinkAndLength(prev, (U32)link, 3); + } + prev = tbl->tails_16[next_radix_16].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_16[next_radix_16].list_count; + SetMatchLinkAndLength(prev, (U32)link, 4); + } + for (size_t i = 0; i < reset_count; ++i) { + tbl->tails_8[reset_list[i]].prev_index = RADIX_NULL_LINK; + } + for (size_t i = 0; i < st_index; ++i) { + tbl->tails_16[tbl->stack[i].count].prev_index = RADIX_NULL_LINK; + tbl->stack[i].count = tbl->tails_16[tbl->stack[i].count].list_count; + } + while (st_index > 0) { + U32 list_count; + U32 depth; + + --st_index; + list_count = tbl->stack[st_index].count; + if (list_count < 2) { + /* Nothing to do */ + continue; + } + link = tbl->stack[st_index].head; + if (link < block_start) + continue; + if (st_index > STACK_SIZE - RADIX16_TABLE_SIZE + && st_index > STACK_SIZE - list_count) + { + /* Potential stack overflow. Rare. */ + continue; + } + /* The current depth */ + depth = GetMatchLength(link); + if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) { + /* Quicker to use brute force, each string compared with all previous strings */ + BruteForce(tbl, data_block, + block_start, + link, + list_count, + depth, + max_depth); + continue; + } + /* Send to the buffer at depth 4 */ + RecurseListsBuffered(tbl, + data_block, + block_start, + link, + (BYTE)depth, + (BYTE)max_depth, + list_count, + st_index); + } +} + +#if 0 +static void RecurseListsUnbuf16(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_start, + size_t link, + U32 count, + U32 const max_depth) +{ + /* Offset data pointer. This method is only called at depth 2 */ + const BYTE* data_src = data_block + 2; + /* Load radix values from the data chars */ + size_t next_radix_8 = data_src[link]; + size_t next_radix_16 = next_radix_8 + ((size_t)(data_src[link + 1]) << 8); + RMF_listTail* tails_8 = tbl->tails_8; + size_t reset_list[RADIX8_TABLE_SIZE]; + size_t reset_count = 0; + size_t st_index = 0; + U32 prev; + /* Last one is done separately */ + --count; + do + { + /* Pre-load the next link */ + size_t next_link = GetInitialMatchLink(link); + /* Initialization doesn't set lengths to 2 because it's a waste of time if buffering is used */ + SetMatchLength(link, (U32)next_link, 2); + size_t radix_8 = next_radix_8; + size_t radix_16 = next_radix_16; + next_radix_8 = data_src[next_link]; + next_radix_16 = next_radix_8 + ((size_t)(data_src[next_link + 1]) << 8); + prev = tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + /* Link the previous occurrence to this one at length 3. */ + /* This will be overwritten if a 4 is found. */ + SetMatchLinkAndLength(prev, (U32)link, 3); + } + else { + reset_list[reset_count++] = radix_8; + } + tails_8[radix_8].prev_index = (U32)link; + prev = tbl->tails_16[radix_16].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_16[radix_16].list_count; + /* Link at length 4, overwriting the 3 */ + SetMatchLinkAndLength(prev, (U32)link, 4); + } + else { + tbl->tails_16[radix_16].list_count = 1; + tbl->stack[st_index].head = (U32)link; + tbl->stack[st_index].count = (U32)radix_16; + ++st_index; + } + tbl->tails_16[radix_16].prev_index = (U32)link; + link = next_link; + } while (--count > 0); + /* Do the last location */ + prev = tails_8[next_radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + SetMatchLinkAndLength(prev, (U32)link, 3); + } + prev = tbl->tails_16[next_radix_16].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_16[next_radix_16].list_count; + SetMatchLinkAndLength(prev, (U32)link, 4); + } + for (size_t i = 0; i < reset_count; ++i) { + tails_8[reset_list[i]].prev_index = RADIX_NULL_LINK; + } + reset_count = 0; + for (size_t i = 0; i < st_index; ++i) { + tbl->tails_16[tbl->stack[i].count].prev_index = RADIX_NULL_LINK; + tbl->stack[i].count = tbl->tails_16[tbl->stack[i].count].list_count; + } + while (st_index > 0) { + --st_index; + U32 list_count = tbl->stack[st_index].count; + if (list_count < 2) { + /* Nothing to do */ + continue; + } + link = tbl->stack[st_index].head; + if (link < block_start) + continue; + if (st_index > STACK_SIZE - RADIX16_TABLE_SIZE + && st_index > STACK_SIZE - list_count) + { + /* Potential stack overflow. Rare. */ + continue; + } + /* The current depth */ + U32 depth = GetMatchLength(link); + if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) { + /* Quicker to use brute force, each string compared with all previous strings */ + BruteForce(tbl, data_block, + block_start, + link, + list_count, + depth, + max_depth); + continue; + } + const BYTE* data_src = data_block + depth; + size_t next_radix_8 = data_src[link]; + size_t next_radix_16 = next_radix_8 + ((size_t)(data_src[link + 1]) << 8); + /* Next depth for 1 extra char */ + ++depth; + /* and for 2 */ + U32 depth_2 = depth + 1; + size_t prev_st_index = st_index; + /* Last location is done separately */ + --list_count; + /* Last pass is done separately. Both of these values are always even. */ + if (depth_2 < max_depth) { + do { + size_t radix_8 = next_radix_8; + size_t radix_16 = next_radix_16; + size_t next_link = GetMatchLink(link); + next_radix_8 = data_src[next_link]; + next_radix_16 = next_radix_8 + ((size_t)(data_src[next_link + 1]) << 8); + size_t prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + /* Odd numbered match length, will be overwritten if 2 chars are matched */ + SetMatchLinkAndLength(prev, (U32)(link), depth); + } + else { + reset_list[reset_count++] = radix_8; + } + tbl->tails_8[radix_8].prev_index = (U32)link; + prev = tbl->tails_16[radix_16].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_16[radix_16].list_count; + SetMatchLinkAndLength(prev, (U32)(link), depth_2); + } + else { + tbl->tails_16[radix_16].list_count = 1; + tbl->stack[st_index].head = (U32)(link); + tbl->stack[st_index].count = (U32)(radix_16); + ++st_index; + } + tbl->tails_16[radix_16].prev_index = (U32)(link); + link = next_link; + } while (--list_count != 0); + size_t prev = tbl->tails_8[next_radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + SetMatchLinkAndLength(prev, (U32)(link), depth); + } + prev = tbl->tails_16[next_radix_16].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_16[next_radix_16].list_count; + SetMatchLinkAndLength(prev, (U32)(link), depth_2); + } + for (size_t i = prev_st_index; i < st_index; ++i) { + tbl->tails_16[tbl->stack[i].count].prev_index = RADIX_NULL_LINK; + tbl->stack[i].count = tbl->tails_16[tbl->stack[i].count].list_count; + } + for (size_t i = 0; i < reset_count; ++i) { + tails_8[reset_list[i]].prev_index = RADIX_NULL_LINK; + } + reset_count = 0; + } + else { + do { + size_t radix_8 = next_radix_8; + size_t radix_16 = next_radix_16; + size_t next_link = GetMatchLink(link); + next_radix_8 = data_src[next_link]; + next_radix_16 = next_radix_8 + ((size_t)(data_src[next_link + 1]) << 8); + size_t prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + SetMatchLinkAndLength(prev, (U32)(link), depth); + } + else { + reset_list[reset_count++] = radix_8; + } + tbl->tails_8[radix_8].prev_index = (U32)link; + prev = tbl->tails_16[radix_16].prev_index; + if (prev != RADIX_NULL_LINK) { + SetMatchLinkAndLength(prev, (U32)(link), depth_2); + } + else { + tbl->stack[st_index].count = (U32)radix_16; + ++st_index; + } + tbl->tails_16[radix_16].prev_index = (U32)(link); + link = next_link; + } while (--list_count != 0); + size_t prev = tbl->tails_8[next_radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + SetMatchLinkAndLength(prev, (U32)(link), depth); + } + prev = tbl->tails_16[next_radix_16].prev_index; + if (prev != RADIX_NULL_LINK) { + SetMatchLinkAndLength(prev, (U32)(link), depth_2); + } + for (size_t i = prev_st_index; i < st_index; ++i) { + tbl->tails_16[tbl->stack[i].count].prev_index = RADIX_NULL_LINK; + } + st_index = prev_st_index; + for (size_t i = 0; i < reset_count; ++i) { + tails_8[reset_list[i]].prev_index = RADIX_NULL_LINK; + } + reset_count = 0; + } + } +} +#endif + +#ifdef RMF_REFERENCE + +/* Simple, slow, complete parsing for reference */ +static void RecurseListsReference(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_size, + size_t link, + U32 count, + U32 const max_depth) +{ + /* Offset data pointer. This method is only called at depth 2 */ + const BYTE* data_src = data_block + 2; + size_t limit = block_size - 2; + size_t st_index = 0; + + do + { + if (link < limit) { + size_t const radix_8 = data_src[link]; + size_t const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_8[radix_8].list_count; + SetMatchLinkAndLength(prev, (U32)link, 3); + } + else { + tbl->tails_8[radix_8].list_count = 1; + tbl->stack[st_index].head = (U32)link; + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tbl->tails_8[radix_8].prev_index = (U32)link; + } + link = GetMatchLink(link); + } while (--count > 0); + for (size_t i = 0; i < st_index; ++i) { + tbl->stack[i].count = tbl->tails_8[tbl->stack[i].count].list_count; + } + memset(tbl->tails_8, 0xFF, sizeof(tbl->tails_8)); + while (st_index > 0) { + U32 list_count; + U32 depth; + size_t prev_st_index; + + --st_index; + list_count = tbl->stack[st_index].count; + if (list_count < 2) { + /* Nothing to do */ + continue; + } + if (st_index > STACK_SIZE - RADIX8_TABLE_SIZE + && st_index > STACK_SIZE - list_count) + { + /* Potential stack overflow. Rare. */ + continue; + } + link = tbl->stack[st_index].head; + /* The current depth */ + depth = GetMatchLength(link); + if (depth >= max_depth) + continue; + data_src = data_block + depth; + limit = block_size - depth; + /* Next depth for 1 extra char */ + ++depth; + prev_st_index = st_index; + do { + if (link < limit) { + size_t const radix_8 = data_src[link]; + size_t const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_8[radix_8].list_count; + SetMatchLinkAndLength(prev, (U32)link, depth); + } + else { + tbl->tails_8[radix_8].list_count = 1; + tbl->stack[st_index].head = (U32)link; + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tbl->tails_8[radix_8].prev_index = (U32)link; + } + link = GetMatchLink(link); + } while (--list_count != 0); + for (size_t i = prev_st_index; i < st_index; ++i) { + tbl->stack[i].count = tbl->tails_8[tbl->stack[i].count].list_count; + } + memset(tbl->tails_8, 0xFF, sizeof(tbl->tails_8)); + } +} + +#endif /* RMF_REFERENCE */ + +/* Atomically take a list from the head table */ +static ptrdiff_t RMF_getNextList(FL2_matchTable* const tbl, unsigned const multi_thread) +{ + if (tbl->st_index < tbl->end_index) { + long index = multi_thread ? FL2_atomic_increment(tbl->st_index) : FL2_nonAtomic_increment(tbl->st_index); + if (index < tbl->end_index) { + return index; + } + } + return -1; +} + +#define UPDATE_INTERVAL 0x40000U + +/* Iterate the head table concurrently with other threads, and recurse each list until max_depth is reached */ +int +#ifdef RMF_BITPACK +RMF_bitpackBuildTable +#else +RMF_structuredBuildTable +#endif +(FL2_matchTable* const tbl, + size_t const job, + unsigned const multi_thread, + FL2_dataBlock const block, + FL2_progressFn progress, void* opaque, U32 weight, size_t init_done) +{ + if (!block.end) + return 0; + U64 const enc_size = block.end - block.start; + unsigned const best = !tbl->params.divide_and_conquer; + unsigned const max_depth = MIN(tbl->params.depth, RADIX_MAX_LENGTH) & ~1; + size_t const bounded_start = block.end - max_depth - MAX_READ_BEYOND_DEPTH; + ptrdiff_t next_progress = 0; + size_t update = UPDATE_INTERVAL; + size_t total = init_done; + + for (;;) + { + /* Get the next to process */ + ptrdiff_t index = RMF_getNextList(tbl, multi_thread); + RMF_tableHead list_head; + + if (index < 0) { + break; + } + if (progress) { + while (next_progress < index) { + total += tbl->list_heads[tbl->stack[next_progress]].count; + ++next_progress; + } + if (total >= update) { + if (progress((size_t)((total * enc_size / block.end * weight) >> 4), opaque)) { + FL2_atomic_add(tbl->st_index, RADIX16_TABLE_SIZE); + return 1; + } + update = total + UPDATE_INTERVAL; + } + } + index = tbl->stack[index]; + list_head = tbl->list_heads[index]; + tbl->list_heads[index].head = RADIX_NULL_LINK; + if (list_head.count < 2 || list_head.head < block.start) { + continue; + } +#ifdef RMF_REFERENCE + if (tbl->params.use_ref_mf) { + RecurseListsReference(tbl->builders[job], block.data, block.end, list_head.head, list_head.count, max_depth); + continue; + } +#endif + if (list_head.head >= bounded_start) { + RecurseListsBound(tbl->builders[job], block.data, block.end, &list_head, (BYTE)max_depth); + if (list_head.count < 2 || list_head.head < block.start) { + continue; + } + } + if (best && list_head.count > tbl->builders[job]->match_buffer_limit) + { + /* Not worth buffering or too long */ + RecurseLists16(tbl->builders[job], block.data, block.start, list_head.head, list_head.count, max_depth); + } + else { + RecurseListsBuffered(tbl->builders[job], block.data, block.start, list_head.head, 2, (BYTE)max_depth, list_head.count, 0); + } + } + return 0; +} + +int +#ifdef RMF_BITPACK +RMF_bitpackIntegrityCheck +#else +RMF_structuredIntegrityCheck +#endif +(const FL2_matchTable* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth) +{ + int err = 0; + for (index += !index; index < end; ++index) { + U32 link; + U32 length; + U32 len_test; + U32 limit; + + if (IsNull(index)) + continue; + link = GetMatchLink(index); + if (link >= index) { + printf("Forward link at %X to %u\r\n", (U32)index, link); + err = 1; + continue; + } + length = GetMatchLength(index); + if (index && length < RADIX_MAX_LENGTH && link - 1 == GetMatchLink(index - 1) && length + 1 == GetMatchLength(index - 1)) + continue; + len_test = 0; + limit = MIN((U32)(end - index), RADIX_MAX_LENGTH); + for (; len_test < limit && data[link + len_test] == data[index + len_test]; ++len_test) { + } + if (len_test < length) { + printf("Failed integrity check: pos %X, length %u, actual %u\r\n", (U32)index, length, len_test); + err = 1; + } + if (length < max_depth && len_test > length) + printf("Shortened match at %X: %u of %u\r\n", (U32)index, length, len_test); + } + return err; +} + + +static size_t ExtendMatch(const FL2_matchTable* const tbl, + const BYTE* const data, + ptrdiff_t const start_index, + ptrdiff_t const limit, + U32 const link, + size_t const length) +{ + ptrdiff_t end_index = start_index + length; + ptrdiff_t const dist = start_index - link; + while (end_index < limit && end_index - GetMatchLink(end_index) == dist) { + end_index += GetMatchLength(end_index); + } + if (end_index >= limit) { + return limit - start_index; + } + while (end_index < limit && data[end_index - dist] == data[end_index]) { + ++end_index; + } + return end_index - start_index; +} + +size_t +#ifdef RMF_BITPACK +RMF_bitpackGetMatch +#else +RMF_structuredGetMatch +#endif +(const FL2_matchTable* const tbl, + const BYTE* const data, + size_t const index, + size_t const limit, + unsigned const max_depth, + size_t* const offset_ptr) +{ + size_t length; + size_t dist; + U32 link; + if (IsNull(index)) + return 0; + link = GetMatchLink(index); + length = GetMatchLength(index); + if (length < 2) + return 0; + dist = index - link; + *offset_ptr = dist; + if (length > limit - index) + return limit - index; + if (length == max_depth + || length == RADIX_MAX_LENGTH /* from HandleRepeat */) + { + length = ExtendMatch(tbl, data, index, limit, link, length); + } + return length; +} diff --git a/C/fast-lzma2/radix_internal.h b/C/fast-lzma2/radix_internal.h new file mode 100644 index 00000000..4a9ba359 --- /dev/null +++ b/C/fast-lzma2/radix_internal.h @@ -0,0 +1,143 @@ +/* +* Copyright (c) 2018, Conor McCarthy +* All rights reserved. +* +* This source code is licensed under both the BSD-style license (found in the +* LICENSE file in the root directory of this source tree) and the GPLv2 (found +* in the COPYING file in the root directory of this source tree). +* You may select, at your option, one of the above-listed licenses. +*/ + +#ifndef RADIX_INTERNAL_H +#define RADIX_INTERNAL_H + +#include "atomic.h" +#include "radix_mf.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +#define DICTIONARY_LOG_MIN 12U +#define DICTIONARY_LOG_MAX_64 30U +#define DICTIONARY_LOG_MAX_32 27U +#define DEFAULT_BUFFER_LOG 8U +#define DEFAULT_BLOCK_OVERLAP 2U +#define DEFAULT_SEARCH_DEPTH 32U +#define DEFAULT_DIVIDEANDCONQUER 1 +#define MAX_REPEAT 32 +#define RADIX16_TABLE_SIZE (1UL << 16) +#define RADIX8_TABLE_SIZE (1UL << 8) +#define STACK_SIZE (RADIX16_TABLE_SIZE * 3) +#define MAX_BRUTE_FORCE_LIST_SIZE 5 +#define BUFFER_LINK_MASK 0xFFFFFFU +#define MATCH_BUFFER_OVERLAP 6 +#define BITPACK_MAX_LENGTH 63UL +#define STRUCTURED_MAX_LENGTH 255UL + +#define RADIX_LINK_BITS 26 +#define RADIX_LINK_MASK ((1UL << RADIX_LINK_BITS) - 1) +#define RADIX_NULL_LINK 0xFFFFFFFFUL + +#define UNIT_BITS 2 +#define UNIT_MASK ((1UL << UNIT_BITS) - 1) + +typedef struct +{ + U32 head; + U32 count; +} RMF_tableHead; + +union src_data_u { + BYTE chars[4]; + U32 u32; +}; + +typedef struct +{ + U32 from; + union src_data_u src; + U32 next; +} RMF_buildMatch; + +typedef struct +{ + U32 prev_index; + U32 list_count; +} RMF_listTail; + +typedef struct +{ + U32 links[1 << UNIT_BITS]; + BYTE lengths[1 << UNIT_BITS]; +} RMF_unit; + +typedef struct +{ + unsigned max_len; + U32* table; + size_t match_buffer_size; + size_t match_buffer_limit; + RMF_listTail tails_8[RADIX8_TABLE_SIZE]; + RMF_tableHead stack[STACK_SIZE]; + RMF_listTail tails_16[RADIX16_TABLE_SIZE]; + RMF_buildMatch match_buffer[1]; +} RMF_builder; + +struct FL2_matchTable_s +{ + FL2_atomic st_index; + long end_index; + int isStruct; + int allocStruct; + unsigned thread_count; + RMF_parameters params; + RMF_builder** builders; + U32 stack[RADIX16_TABLE_SIZE]; + RMF_tableHead list_heads[RADIX16_TABLE_SIZE]; + U32 table[1]; +}; + +size_t RMF_bitpackInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const start, size_t const end); +size_t RMF_structuredInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const start, size_t const end); +int RMF_bitpackBuildTable(struct FL2_matchTable_s* const tbl, + size_t const job, + unsigned const multi_thread, + FL2_dataBlock const block, + FL2_progressFn progress, void* opaque, U32 weight, size_t init_done); +int RMF_structuredBuildTable(struct FL2_matchTable_s* const tbl, + size_t const job, + unsigned const multi_thread, + FL2_dataBlock const block, + FL2_progressFn progress, void* opaque, U32 weight, size_t init_done); +void RMF_recurseListChunk(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_start, + BYTE const depth, + BYTE const max_depth, + U32 const list_count, + size_t const stack_base); +int RMF_bitpackIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth); +int RMF_structuredIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth); +void RMF_bitpackLimitLengths(struct FL2_matchTable_s* const tbl, size_t const index); +void RMF_structuredLimitLengths(struct FL2_matchTable_s* const tbl, size_t const index); +BYTE* RMF_bitpackAsOutputBuffer(struct FL2_matchTable_s* const tbl, size_t const index); +BYTE* RMF_structuredAsOutputBuffer(struct FL2_matchTable_s* const tbl, size_t const index); +size_t RMF_bitpackGetMatch(const struct FL2_matchTable_s* const tbl, + const BYTE* const data, + size_t const index, + size_t const limit, + unsigned const max_depth, + size_t* const offset_ptr); +size_t RMF_structuredGetMatch(const struct FL2_matchTable_s* const tbl, + const BYTE* const data, + size_t const index, + size_t const limit, + unsigned const max_depth, + size_t* const offset_ptr); + +#if defined (__cplusplus) +} +#endif + +#endif /* RADIX_INTERNAL_H */ \ No newline at end of file diff --git a/C/fast-lzma2/radix_mf.c b/C/fast-lzma2/radix_mf.c new file mode 100644 index 00000000..55187638 --- /dev/null +++ b/C/fast-lzma2/radix_mf.c @@ -0,0 +1,672 @@ +/* +* Copyright (c) 2018, Conor McCarthy +* All rights reserved. +* +* This source code is licensed under both the BSD-style license (found in the +* LICENSE file in the root directory of this source tree) and the GPLv2 (found +* in the COPYING file in the root directory of this source tree). +* You may select, at your option, one of the above-listed licenses. +*/ + +#include /* size_t, ptrdiff_t */ +#include /* malloc, free */ +#include "fast-lzma2.h" +#include "mem.h" /* U32, U64, MEM_64bits */ +#include "fl2_internal.h" +#include "radix_internal.h" + +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" /* warning: 'rpt_head_next' may be used uninitialized in this function */ +#elif defined(_MSC_VER) +# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable */ +#endif + +#define MIN_MATCH_BUFFER_SIZE 256U /* min buffer size at least FL2_SEARCH_DEPTH_MAX + 2 for bounded build */ +#define MAX_MATCH_BUFFER_SIZE (1UL << 24) /* max buffer size constrained by 24-bit link values */ + +#define REPEAT_CHECK_TABLE ((1 << 1) | (1 << 2) | (1 << 4) | (1 << 8) | (1 << 16) | (1ULL << 32)) + +static void RMF_initTailTable(RMF_builder* const tbl) +{ + for (size_t i = 0; i < RADIX8_TABLE_SIZE; i += 2) { + tbl->tails_8[i].prev_index = RADIX_NULL_LINK; + tbl->tails_8[i + 1].prev_index = RADIX_NULL_LINK; + } + for (size_t i = 0; i < RADIX16_TABLE_SIZE; i += 2) { + tbl->tails_16[i].prev_index = RADIX_NULL_LINK; + tbl->tails_16[i + 1].prev_index = RADIX_NULL_LINK; + } +} + +static RMF_builder* RMF_createBuilder(size_t match_buffer_size) +{ + match_buffer_size = MIN(match_buffer_size, MAX_MATCH_BUFFER_SIZE); + match_buffer_size = MAX(match_buffer_size, MIN_MATCH_BUFFER_SIZE); + + { RMF_builder* const builder = (RMF_builder*)malloc( + sizeof(RMF_builder) + (match_buffer_size - 1) * sizeof(RMF_buildMatch)); + builder->match_buffer_size = match_buffer_size; + builder->match_buffer_limit = match_buffer_size; + RMF_initTailTable(builder); + return builder; + } +} + +static void RMF_freeBuilderTable(RMF_builder** const builders, unsigned const size) +{ + if (builders == NULL) + return; + for (unsigned i = 0; i < size; ++i) { + free(builders[i]); + } + free(builders); +} + +static RMF_builder** RMF_createBuilderTable(U32* const matchTable, size_t const match_buffer_size, unsigned const max_len, unsigned const size) +{ + RMF_builder** builders = (RMF_builder**)malloc(size * sizeof(RMF_builder*)); + DEBUGLOG(3, "RMF_createBuilderTable : match_buffer_size %u, builders %u", (U32)match_buffer_size, size); + if (builders == NULL) + return NULL; + for (unsigned i = 0; i < size; ++i) + builders[i] = NULL; + for (unsigned i = 0; i < size; ++i) { + builders[i] = RMF_createBuilder(match_buffer_size); + if (builders[i] == NULL) { + RMF_freeBuilderTable(builders, i); + return NULL; + } + builders[i]->table = matchTable; + builders[i]->max_len = max_len; + } + return builders; +} + +static int RMF_isStruct(unsigned dictionary_log, unsigned depth) +{ + return dictionary_log > RADIX_LINK_BITS || depth > BITPACK_MAX_LENGTH; +} + +static int RMF_isStructParam(const RMF_parameters* const params) +{ + return RMF_isStruct(params->dictionary_log, params->depth); +} + +/** RMF_clampCParams() : +* make CParam values within valid range. +* @return : valid CParams */ +static RMF_parameters RMF_clampParams(RMF_parameters params) +{ +# define CLAMP(val,min,max) { \ + if (val<(min)) val=(min); \ + else if (val>(max)) val=(max); \ + } + CLAMP(params.dictionary_log, DICTIONARY_LOG_MIN, MEM_64bits() ? DICTIONARY_LOG_MAX_64 : DICTIONARY_LOG_MAX_32); + CLAMP(params.match_buffer_log, FL2_BUFFER_SIZE_LOG_MIN, FL2_BUFFER_SIZE_LOG_MAX); + CLAMP(params.overlap_fraction, FL2_BLOCK_OVERLAP_MIN, FL2_BLOCK_OVERLAP_MAX); + CLAMP(params.depth, FL2_SEARCH_DEPTH_MIN, FL2_SEARCH_DEPTH_MAX); + return params; +} + +static size_t RMF_applyParameters_internal(FL2_matchTable* const tbl, const RMF_parameters* const params) +{ + int const isStruct = RMF_isStructParam(params); + unsigned const dictionary_log = tbl->params.dictionary_log; + /* dictionary is allocated with the struct and is immutable */ + if (params->dictionary_log > tbl->params.dictionary_log + || (params->dictionary_log == tbl->params.dictionary_log && isStruct > tbl->allocStruct)) + return FL2_ERROR(parameter_unsupported); + + { size_t const match_buffer_size = (size_t)1 << (params->dictionary_log - params->match_buffer_log); + tbl->params = *params; + tbl->params.dictionary_log = dictionary_log; + tbl->isStruct = isStruct; + if (tbl->builders == NULL + || match_buffer_size > tbl->builders[0]->match_buffer_size) + { + RMF_freeBuilderTable(tbl->builders, tbl->thread_count); + tbl->builders = RMF_createBuilderTable(tbl->table, match_buffer_size, tbl->isStruct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH, tbl->thread_count); + if (tbl->builders == NULL) { + return FL2_ERROR(memory_allocation); + } + } + else { + for (unsigned i = 0; i < tbl->thread_count; ++i) { + tbl->builders[i]->match_buffer_limit = match_buffer_size; + tbl->builders[i]->max_len = tbl->isStruct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH; + } + } + } + return 0; +} + +static void RMF_reduceDict(RMF_parameters* const params, size_t const dict_reduce) +{ + if (dict_reduce) + while (params->dictionary_log > DICTIONARY_LOG_MIN && (size_t)1 << (params->dictionary_log - 1) >= dict_reduce) { + --params->dictionary_log; + params->match_buffer_log = MAX(params->match_buffer_log - 1, FL2_BUFFER_SIZE_LOG_MIN); + } +} + +FL2_matchTable* RMF_createMatchTable(const RMF_parameters* const p, size_t const dict_reduce, unsigned const thread_count) +{ + int isStruct; + size_t dictionary_size; + size_t table_bytes; + FL2_matchTable* tbl; + RMF_parameters params = RMF_clampParams(*p); + + RMF_reduceDict(¶ms, dict_reduce); + isStruct = RMF_isStructParam(¶ms); + dictionary_size = (size_t)1 << params.dictionary_log; + + DEBUGLOG(3, "RMF_createMatchTable : isStruct %d, dict %u", isStruct, (U32)dictionary_size); + + table_bytes = isStruct ? ((dictionary_size + 3U) / 4U) * sizeof(RMF_unit) + : dictionary_size * sizeof(U32); + tbl = (FL2_matchTable*)malloc( + sizeof(FL2_matchTable) + table_bytes - sizeof(U32)); + if (!tbl) return NULL; + + tbl->isStruct = isStruct; + tbl->allocStruct = isStruct; + tbl->thread_count = thread_count + !thread_count; + tbl->params = params; + tbl->builders = NULL; + + RMF_applyParameters_internal(tbl, ¶ms); + + for (size_t i = 0; i < RADIX16_TABLE_SIZE; i += 2) { + tbl->list_heads[i].head = RADIX_NULL_LINK; + tbl->list_heads[i].count = 0; + tbl->list_heads[i + 1].head = RADIX_NULL_LINK; + tbl->list_heads[i + 1].count = 0; + } + return tbl; +} + +void RMF_freeMatchTable(FL2_matchTable* const tbl) +{ + if (tbl == NULL) + return; + DEBUGLOG(3, "RMF_freeMatchTable"); + RMF_freeBuilderTable(tbl->builders, tbl->thread_count); + free(tbl); +} + +BYTE RMF_compatibleParameters(const FL2_matchTable* const tbl, const RMF_parameters * const p, size_t const dict_reduce) +{ + RMF_parameters params = RMF_clampParams(*p); + RMF_reduceDict(¶ms, dict_reduce); + return tbl->params.dictionary_log > params.dictionary_log + || (tbl->params.dictionary_log == params.dictionary_log && tbl->allocStruct >= RMF_isStructParam(¶ms)); +} + +size_t RMF_applyParameters(FL2_matchTable* const tbl, const RMF_parameters* const p, size_t const dict_reduce) +{ + RMF_parameters params = RMF_clampParams(*p); + RMF_reduceDict(¶ms, dict_reduce); + return RMF_applyParameters_internal(tbl, ¶ms); +} + +size_t RMF_threadCount(const FL2_matchTable* const tbl) +{ + return tbl->thread_count; +} + +size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end) +{ + DEBUGLOG(5, "RMF_initTable : start %u, size %u", (U32)start, (U32)end); + if (tbl->isStruct) { + return RMF_structuredInit(tbl, data, start, end); + } + else { + return RMF_bitpackInit(tbl, data, start, end); + } +} + +static void HandleRepeat(RMF_buildMatch* const match_buffer, + const BYTE* const data_block, + size_t const next, + U32 count, + U32 const rpt_len, + U32 const depth, + U32 const max_len) +{ + size_t index = next; + size_t next_i; + U32 length = depth + rpt_len; + const BYTE* const data = data_block + match_buffer[index].from; + const BYTE* const data_2 = data - rpt_len; + while (data[length] == data_2[length] && length < max_len) + ++length; + for (; length <= max_len && count; --count) { + next_i = match_buffer[index].next & 0xFFFFFF; + match_buffer[index].next = (U32)next_i | (length << 24); + length += rpt_len; + index = next_i; + } + for (; count; --count) { + next_i = match_buffer[index].next & 0xFFFFFF; + match_buffer[index].next = (U32)next_i | (max_len << 24); + index = next_i; + } +} + +typedef struct +{ + size_t index; + const BYTE* data_src; + union src_data_u src; +} BruteForceMatch; + +static void BruteForceBuffered(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_start, + size_t index, + size_t list_count, + size_t const slot, + size_t const depth, + size_t const max_depth) +{ + BruteForceMatch buffer[MAX_BRUTE_FORCE_LIST_SIZE + 1]; + const BYTE* data_src = data_block + depth; + size_t limit = max_depth - depth; + const BYTE* start = data_src + block_start; + size_t i = 0; + for (;;) { + buffer[i].index = index; + buffer[i].data_src = data_src + tbl->match_buffer[index].from; + buffer[i].src.u32 = tbl->match_buffer[index].src.u32; + if (++i >= list_count) { + break; + } + index = tbl->match_buffer[index].next & 0xFFFFFF; + } + i = 0; + do { + size_t longest = 0; + size_t j = i + 1; + size_t longest_index = j; + const BYTE* data = buffer[i].data_src; + do { + size_t len_test = slot; + while (len_test < 4 && buffer[i].src.chars[len_test] == buffer[j].src.chars[len_test] && len_test - slot < limit) { + ++len_test; + } + len_test -= slot; + if (len_test) { + const BYTE* data_2 = buffer[j].data_src; + while (data[len_test] == data_2[len_test] && len_test < limit) { + ++len_test; + } + } + if (len_test > longest) { + longest_index = j; + longest = len_test; + if (len_test >= limit) { + break; + } + } + } while (++j < list_count); + if (longest > 0) { + index = buffer[i].index; + tbl->match_buffer[index].next = (U32)(buffer[longest_index].index | ((depth + longest) << 24)); + } + ++i; + } while (i < list_count - 1 && buffer[i].data_src >= start); +} + +FORCE_INLINE_TEMPLATE +void RMF_recurseListChunk_generic(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_start, + BYTE depth, + BYTE const max_depth, + U32 list_count, + size_t const stack_base) +{ + /* Create an offset data buffer pointer for reading the next bytes */ + const BYTE base_depth = depth; + size_t st_index = stack_base; + size_t index = 0; + ++depth; + /* The last element is done separately and won't be copied back at the end */ + --list_count; + do { + size_t const radix_8 = tbl->match_buffer[index].src.chars[0]; + /* Seen this char before? */ + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_8[radix_8].list_count; + /* Link the previous occurrence to this one and record the new length */ + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + else { + tbl->tails_8[radix_8].list_count = 1; + /* Add the new sub list to the stack */ + tbl->stack[st_index].head = (U32)index; + /* This will be converted to a count at the end */ + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tbl->tails_8[radix_8].prev_index = (U32)index; + ++index; + } while (index < list_count); + + { /* Do the last element */ + size_t const radix_8 = tbl->match_buffer[index].src.chars[0]; + /* Nothing to do if there was no previous */ + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_8[radix_8].list_count; + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + } + /* Convert radix values on the stack to counts and reset any used tail slots */ + for (size_t j = stack_base; j < st_index; ++j) { + tbl->tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK; + tbl->stack[j].count = (U32)tbl->tails_8[tbl->stack[j].count].list_count; + } + while (st_index > stack_base) { + const BYTE* data_src; + size_t link; + size_t slot; + U32 test; + + /* Pop an item off the stack */ + --st_index; + list_count = tbl->stack[st_index].count; + if (list_count < 2) { + /* Nothing to match with */ + continue; + } + index = tbl->stack[st_index].head; + link = tbl->match_buffer[index].from; + if (link < block_start) { + /* Chain starts in the overlap region which is already encoded */ + continue; + } + /* Check stack space. The first comparison is unnecessary but it's a constant so should be faster */ + if (st_index > STACK_SIZE - RADIX8_TABLE_SIZE + && st_index > STACK_SIZE - list_count) + { + /* Stack may not be able to fit all possible new items. This is very rare. */ + continue; + } + depth = tbl->match_buffer[index].next >> 24; + slot = (depth - base_depth) & 3; + if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) { + /* Quicker to use brute force, each string compared with all previous strings */ + BruteForceBuffered(tbl, + data_block, + block_start, + index, + list_count, + slot, + depth, + max_depth); + continue; + } + /* check for repeats at depth 4,8,16,32 etc */ + test = max_depth != 6 && ((depth & 3) == 0) && ((REPEAT_CHECK_TABLE >> ((depth >> 2) & 31)) & 1) && (max_depth >= depth + (depth >> 1)); + ++depth; + /* Update the offset data buffer pointer */ + data_src = data_block + depth; + /* Last pass is done separately */ + if (!test && depth < max_depth) { + size_t const prev_st_index = st_index; + /* Last element done separately */ + --list_count; + /* slot is the char cache index. If 3 then chars need to be loaded. */ + if (slot == 3 && max_depth != 6) do { + size_t const radix_8 = tbl->match_buffer[index].src.chars[3]; + size_t const next_index = tbl->match_buffer[index].next & BUFFER_LINK_MASK; + /* Pre-load the next link and data bytes to avoid waiting for RAM access */ + tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link); + size_t const next_link = tbl->match_buffer[next_index].from; + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev!=RADIX_NULL_LINK) { + ++tbl->tails_8[radix_8].list_count; + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + else { + tbl->tails_8[radix_8].list_count = 1; + tbl->stack[st_index].head = (U32)index; + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tbl->tails_8[radix_8].prev_index = (U32)index; + index = next_index; + link = next_link; + } while (--list_count != 0); + else do { + size_t const radix_8 = tbl->match_buffer[index].src.chars[slot]; + size_t const next_index = tbl->match_buffer[index].next & BUFFER_LINK_MASK; + /* Pre-load the next link to avoid waiting for RAM access */ + size_t const next_link = tbl->match_buffer[next_index].from; + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_8[radix_8].list_count; + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + else { + tbl->tails_8[radix_8].list_count = 1; + tbl->stack[st_index].head = (U32)index; + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tbl->tails_8[radix_8].prev_index = (U32)index; + index = next_index; + link = next_link; + } while (--list_count != 0); + + { size_t const radix_8 = tbl->match_buffer[index].src.chars[slot]; + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + if (slot == 3) { + tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link); + } + ++tbl->tails_8[radix_8].list_count; + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + } + for (size_t j = prev_st_index; j < st_index; ++j) { + tbl->tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK; + tbl->stack[j].count = (U32)tbl->tails_8[tbl->stack[j].count].list_count; + } + } + else if (test) { + S32 rpt = -1; + size_t rpt_head_next; + U32 rpt_dist = 0; + size_t const prev_st_index = st_index; + U32 const rpt_depth = depth - 1; + /* Last element done separately */ + --list_count; + do { + size_t const radix_8 = tbl->match_buffer[index].src.chars[slot]; + size_t const next_index = tbl->match_buffer[index].next & BUFFER_LINK_MASK; + size_t const next_link = tbl->match_buffer[next_index].from; + if ((link - next_link) > rpt_depth) { + if (rpt > 0) { + HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len); + } + rpt = -1; + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_8[radix_8].list_count; + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + else { + tbl->tails_8[radix_8].list_count = 1; + tbl->stack[st_index].head = (U32)index; + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tbl->tails_8[radix_8].prev_index = (U32)index; + index = next_index; + link = next_link; + } + else { + U32 const dist = (U32)(link - next_link); + if (rpt < 0 || dist != rpt_dist) { + if (rpt > 0) { + HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len); + } + rpt = 0; + rpt_head_next = next_index; + rpt_dist = dist; + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + ++tbl->tails_8[radix_8].list_count; + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + else { + tbl->tails_8[radix_8].list_count = 1; + tbl->stack[st_index].head = (U32)index; + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tbl->tails_8[radix_8].prev_index = (U32)index; + } + else { + ++rpt; + } + index = next_index; + link = next_link; + } + } while (--list_count != 0); + if (rpt > 0) { + HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len); + } + + { size_t const radix_8 = tbl->match_buffer[index].src.chars[slot]; + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + if (slot == 3) { + tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link); + } + ++tbl->tails_8[radix_8].list_count; + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + } + for (size_t j = prev_st_index; j < st_index; ++j) { + tbl->tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK; + tbl->stack[j].count = (U32)tbl->tails_8[tbl->stack[j].count].list_count; + } + } + else { + size_t prev_st_index = st_index; + /* The last pass at max_depth */ + do { + size_t const radix_8 = tbl->match_buffer[index].src.chars[slot]; + size_t const next_index = tbl->match_buffer[index].next & BUFFER_LINK_MASK; + /* Pre-load the next link. */ + /* The last element in tbl->match_buffer is circular so this is never an access violation. */ + size_t const next_link = tbl->match_buffer[next_index].from; + U32 const prev = tbl->tails_8[radix_8].prev_index; + if (prev != RADIX_NULL_LINK) { + tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24); + } + else { + tbl->stack[st_index].count = (U32)radix_8; + ++st_index; + } + tbl->tails_8[radix_8].prev_index = (U32)index; + index = next_index; + link = next_link; + } while (--list_count != 0); + for (size_t j = prev_st_index; j < st_index; ++j) { + tbl->tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK; + } + st_index = prev_st_index; + } + } +} + +void RMF_recurseListChunk(RMF_builder* const tbl, + const BYTE* const data_block, + size_t const block_start, + BYTE const depth, + BYTE const max_depth, + U32 const list_count, + size_t const stack_base) +{ + if (max_depth > 6) { + RMF_recurseListChunk_generic(tbl, data_block, block_start, depth, max_depth, list_count, stack_base); + } + else { + RMF_recurseListChunk_generic(tbl, data_block, block_start, depth, 6, list_count, stack_base); + } +} + +/* Iterate the head table concurrently with other threads, and recurse each list until max_depth is reached */ +int RMF_buildTable(FL2_matchTable* const tbl, + size_t const job, + unsigned const multi_thread, + FL2_dataBlock const block, + FL2_progressFn progress, void* opaque, U32 weight, size_t init_done) +{ + DEBUGLOG(5, "RMF_buildTable : thread %u", (U32)job); + if (tbl->isStruct) { + return RMF_structuredBuildTable(tbl, job, multi_thread, block, progress, opaque, weight, init_done); + } + else { + return RMF_bitpackBuildTable(tbl, job, multi_thread, block, progress, opaque, weight, init_done); + } +} + +int RMF_integrityCheck(const FL2_matchTable* const tbl, const BYTE* const data, size_t const index, size_t const end, unsigned const max_depth) +{ + if (tbl->isStruct) { + return RMF_structuredIntegrityCheck(tbl, data, index, end, max_depth); + } + else { + return RMF_bitpackIntegrityCheck(tbl, data, index, end, max_depth); + } +} + +size_t RMF_getMatch(FL2_matchTable* const tbl, + const BYTE* const data, + size_t const index, + size_t const limit, + unsigned max_depth, + size_t* const offset_ptr) +{ + if (tbl->isStruct) { + return RMF_structuredGetMatch(tbl, data, index, limit, max_depth, offset_ptr); + } + else { + return RMF_bitpackGetMatch(tbl, data, index, limit, max_depth, offset_ptr); + } +} + +void RMF_limitLengths(FL2_matchTable* const tbl, size_t const index) +{ + if (tbl->isStruct) { + RMF_structuredLimitLengths(tbl, index); + } + else { + RMF_bitpackLimitLengths(tbl, index); + } +} + +BYTE* RMF_getTableAsOutputBuffer(FL2_matchTable* const tbl, size_t const index) +{ + if (tbl->isStruct) { + return RMF_structuredAsOutputBuffer(tbl, index); + } + else { + return RMF_bitpackAsOutputBuffer(tbl, index); + } +} + +size_t RMF_memoryUsage(unsigned const dict_log, unsigned const buffer_log, unsigned const depth, unsigned thread_count) +{ + size_t size = (size_t)(4U + RMF_isStruct(dict_log, depth)) << dict_log; + U32 buf_size = (U32)1 << (dict_log - buffer_log); + size += ((buf_size - 1) * sizeof(RMF_buildMatch) + sizeof(RMF_builder)) * thread_count; + return size; +} diff --git a/C/fast-lzma2/radix_mf.h b/C/fast-lzma2/radix_mf.h new file mode 100644 index 00000000..c5bf943d --- /dev/null +++ b/C/fast-lzma2/radix_mf.h @@ -0,0 +1,60 @@ +/* +* Copyright (c) 2018, Conor McCarthy +* All rights reserved. +* +* This source code is licensed under both the BSD-style license (found in the +* LICENSE file in the root directory of this source tree) and the GPLv2 (found +* in the COPYING file in the root directory of this source tree). +* You may select, at your option, one of the above-listed licenses. +*/ + +#ifndef RADIX_MF_H +#define RADIX_MF_H + +#include "fast-lzma2.h" +#include "data_block.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +typedef struct FL2_matchTable_s FL2_matchTable; + +#define OVERLAP_FROM_DICT_LOG(d, o) (((size_t)1 << ((d) - 4)) * (o)) + +#define RMF_MIN_BYTES_PER_THREAD 1024 + +typedef struct +{ + unsigned dictionary_log; + unsigned match_buffer_log; + unsigned overlap_fraction; + unsigned block_size_log; + unsigned divide_and_conquer; + unsigned depth; +#ifdef RMF_REFERENCE + unsigned use_ref_mf; +#endif +} RMF_parameters; + +FL2_matchTable* RMF_createMatchTable(const RMF_parameters* const params, size_t const dict_reduce, unsigned const thread_count); +void RMF_freeMatchTable(FL2_matchTable* const tbl); +BYTE RMF_compatibleParameters(const FL2_matchTable* const tbl, const RMF_parameters* const params, size_t const dict_reduce); +size_t RMF_applyParameters(FL2_matchTable* const tbl, const RMF_parameters* const params, size_t const dict_reduce); +size_t RMF_threadCount(const FL2_matchTable * const tbl); +size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end); +int RMF_buildTable(FL2_matchTable* const tbl, + size_t const job, + unsigned const multi_thread, + FL2_dataBlock const block, + FL2_progressFn progress, void* opaque, U32 weight, size_t init_done); +int RMF_integrityCheck(const FL2_matchTable* const tbl, const BYTE* const data, size_t const index, size_t const end, unsigned const max_depth); +void RMF_limitLengths(FL2_matchTable* const tbl, size_t const index); +BYTE* RMF_getTableAsOutputBuffer(FL2_matchTable* const tbl, size_t const index); +size_t RMF_memoryUsage(unsigned const dict_log, unsigned const buffer_log, unsigned const depth, unsigned thread_count); + +#if defined (__cplusplus) +} +#endif + +#endif /* RADIX_MF_H */ \ No newline at end of file diff --git a/C/fast-lzma2/radix_struct.c b/C/fast-lzma2/radix_struct.c new file mode 100644 index 00000000..2aac9093 --- /dev/null +++ b/C/fast-lzma2/radix_struct.c @@ -0,0 +1,62 @@ +/* +* Copyright (c) 2018, Conor McCarthy +* All rights reserved. +* +* This source code is licensed under both the BSD-style license (found in the +* LICENSE file in the root directory of this source tree) and the GPLv2 (found +* in the COPYING file in the root directory of this source tree). +* You may select, at your option, one of the above-listed licenses. +*/ + +#include "mem.h" /* U32, U64 */ +#include "fl2threading.h" +#include "fl2_internal.h" +#include "radix_internal.h" + +typedef struct FL2_matchTable_s FL2_matchTable; + +#undef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#define RMF_STRUCTURED + +#define RADIX_MAX_LENGTH STRUCTURED_MAX_LENGTH + +#define InitMatchLink(index, link) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] = (U32)(link) + +#define GetMatchLink(index) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] + +#define GetInitialMatchLink(index) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] + +#define GetMatchLength(index) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].lengths[(index) & UNIT_MASK] + +#define SetMatchLink(index, link, length) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] = (U32)(link) + +#define SetMatchLength(index, link, length) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].lengths[(index) & UNIT_MASK] = (BYTE)(length) + +#define SetMatchLinkAndLength(index, link, length) { size_t i_ = (index) >> UNIT_BITS, u_ = (index) & UNIT_MASK; ((RMF_unit*)tbl->table)[i_].links[u_] = (U32)(link); ((RMF_unit*)tbl->table)[i_].lengths[u_] = (BYTE)(length); } + +#define SetNull(index) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] = RADIX_NULL_LINK + +#define IsNull(index) (((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] == RADIX_NULL_LINK) + +BYTE* RMF_structuredAsOutputBuffer(FL2_matchTable* const tbl, size_t const index) +{ + return (BYTE*)((RMF_unit*)tbl->table + (index >> UNIT_BITS) + ((index & UNIT_MASK) != 0)); +} + +/* Restrict the match lengths so that they don't reach beyond index */ +void RMF_structuredLimitLengths(FL2_matchTable* const tbl, size_t const index) +{ + DEBUGLOG(5, "RMF_limitLengths : end %u, max length %u", (U32)index, RADIX_MAX_LENGTH); + SetNull(index - 1); + for (size_t length = 2; length < RADIX_MAX_LENGTH && length <= index; ++length) { + size_t const i = (index - length) >> UNIT_BITS; + size_t const u = (index - length) & UNIT_MASK; + if (((RMF_unit*)tbl->table)[i].links[u] != RADIX_NULL_LINK) { + ((RMF_unit*)tbl->table)[i].lengths[u] = MIN((BYTE)length, ((RMF_unit*)tbl->table)[i].lengths[u]); + } + } +} + +#include "radix_engine.h" \ No newline at end of file diff --git a/C/fast-lzma2/range_enc.c b/C/fast-lzma2/range_enc.c new file mode 100644 index 00000000..aff9ab80 --- /dev/null +++ b/C/fast-lzma2/range_enc.c @@ -0,0 +1,101 @@ +/* +* Bitwise range encoder by Igor Pavlov +* Modified by Conor McCarthy +* +* Public domain +*/ + +#include "fl2_internal.h" +#include "mem.h" +#include "range_enc.h" + +const unsigned price_table[kBitModelTotal >> kNumMoveReducingBits] = { + 128, 103, 91, 84, 78, 73, 69, 66, + 63, 61, 58, 56, 54, 52, 51, 49, + 48, 46, 45, 44, 43, 42, 41, 40, + 39, 38, 37, 36, 35, 34, 34, 33, + 32, 31, 31, 30, 29, 29, 28, 28, + 27, 26, 26, 25, 25, 24, 24, 23, + 23, 22, 22, 22, 21, 21, 20, 20, + 19, 19, 19, 18, 18, 17, 17, 17, + 16, 16, 16, 15, 15, 15, 14, 14, + 14, 13, 13, 13, 12, 12, 12, 11, + 11, 11, 11, 10, 10, 10, 10, 9, + 9, 9, 9, 8, 8, 8, 8, 7, + 7, 7, 7, 6, 6, 6, 6, 5, + 5, 5, 5, 5, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 2, 2, 2, + 2, 2, 2, 1, 1, 1, 1, 1 +}; + +void SetOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size) +{ + rc->out_buffer = out_buffer; + rc->chunk_size = chunk_size; + rc->out_index = 0; +} + +void RangeEncReset(RangeEncoder* const rc) +{ + rc->low = 0; + rc->range = (U32)-1; + rc->cache_size = 1; + rc->cache = 0; +} + +void ShiftLow(RangeEncoder* const rc) +{ + if (rc->low < 0xFF000000 || rc->low > 0xFFFFFFFF) + { + BYTE temp = rc->cache; + do { + assert (rc->out_index < rc->chunk_size - 4096); + rc->out_buffer[rc->out_index++] = temp + (BYTE)(rc->low >> 32); + temp = 0xFF; + } while (--rc->cache_size != 0); + rc->cache = (BYTE)(rc->low >> 24); + } + ++rc->cache_size; + rc->low = (rc->low << 8) & 0xFFFFFFFF; +} + +void EncodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol) +{ + size_t tree_index = 1; + assert(bit_count > 0); + do { + unsigned bit; + --bit_count; + bit = (symbol >> bit_count) & 1; + EncodeBit(rc, &probs[tree_index], bit); + tree_index = (tree_index << 1) | bit; + } while (bit_count != 0); +} + +void EncodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol) +{ + unsigned tree_index = 1; + assert(bit_count != 0); + do { + unsigned bit = symbol & 1; + EncodeBit(rc, &probs[tree_index], bit); + tree_index = (tree_index << 1) + bit; + symbol >>= 1; + } while (--bit_count != 0); +} + +void EncodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count) +{ + assert(bit_count > 0); + do { + rc->range >>= 1; + --bit_count; + rc->low += rc->range & -((int)(value >> bit_count) & 1); + if (rc->range < kTopValue) { + rc->range <<= 8; + ShiftLow(rc); + } + } while (bit_count != 0); +} + + diff --git a/C/fast-lzma2/range_enc.h b/C/fast-lzma2/range_enc.h new file mode 100644 index 00000000..54672f4e --- /dev/null +++ b/C/fast-lzma2/range_enc.h @@ -0,0 +1,157 @@ +/* +* Bitwise range encoder by Igor Pavlov +* Modified by Conor McCarthy +* +* Public domain +*/ + +#ifndef RANGE_ENCODER_H +#define RANGE_ENCODER_H + +#include "mem.h" +#include "compiler.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifdef LZMA_ENC_PROB32 +typedef U32 Probability; +#else +typedef U16 Probability; +#endif + +#define kNumTopBits 24U +#define kTopValue (1UL << kNumTopBits) +#define kNumBitModelTotalBits 11U +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5U +#define kProbInitValue (kBitModelTotal >> 1U) +#define kNumMoveReducingBits 4U +#define kNumBitPriceShiftBits 4U + +extern const unsigned price_table[kBitModelTotal >> kNumMoveReducingBits]; + +typedef struct +{ + BYTE *out_buffer; + size_t out_index; + size_t chunk_size; + U64 cache_size; + U64 low; + U32 range; + BYTE cache; +} RangeEncoder; + +void RangeEncReset(RangeEncoder* const rc); + +void SetOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size); + +void RangeEncReset(RangeEncoder* const rc); + +void ShiftLow(RangeEncoder* const rc); + +void EncodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol); + +void EncodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol); + +void EncodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count); + +HINT_INLINE +void EncodeBit0(RangeEncoder* const rc, Probability *const rprob) +{ + unsigned prob = *rprob; + rc->range = (rc->range >> kNumBitModelTotalBits) * prob; + prob += (kBitModelTotal - prob) >> kNumMoveBits; + *rprob = (Probability)prob; + if (rc->range < kTopValue) { + rc->range <<= 8; + ShiftLow(rc); + } +} + +HINT_INLINE +void EncodeBit1(RangeEncoder* const rc, Probability *const rprob) +{ + unsigned prob = *rprob; + U32 new_bound = (rc->range >> kNumBitModelTotalBits) * prob; + rc->low += new_bound; + rc->range -= new_bound; + prob -= prob >> kNumMoveBits; + *rprob = (Probability)prob; + if (rc->range < kTopValue) { + rc->range <<= 8; + ShiftLow(rc); + } +} + +HINT_INLINE +void EncodeBit(RangeEncoder* const rc, Probability *const rprob, unsigned const bit) +{ + unsigned prob = *rprob; + if (bit != 0) { + U32 new_bound = (rc->range >> kNumBitModelTotalBits) * prob; + rc->low += new_bound; + rc->range -= new_bound; + prob -= prob >> kNumMoveBits; + } + else { + rc->range = (rc->range >> kNumBitModelTotalBits) * prob; + prob += (kBitModelTotal - prob) >> kNumMoveBits; + } + *rprob = (Probability)prob; + if (rc->range < kTopValue) { + rc->range <<= 8; + ShiftLow(rc); + } +} + +#define GET_PRICE(rc, prob, symbol) \ + price_table[((prob) ^ ((-(int)(symbol)) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICE_0(rc, prob) price_table[(prob) >> kNumMoveReducingBits] + +#define GET_PRICE_1(rc, prob) price_table[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +HINT_INLINE +unsigned GetTreePrice(RangeEncoder* const rc, const Probability* const prob_table, unsigned const bit_count, size_t symbol) +{ + unsigned price = 0; + symbol |= ((size_t)1 << bit_count); + while (symbol != 1) { + size_t next_symbol = symbol >> 1; + unsigned prob = prob_table[next_symbol]; + unsigned bit = (unsigned)symbol & 1; + price += GET_PRICE(rc, prob, bit); + symbol = next_symbol; + } + return price; +} + +HINT_INLINE +unsigned GetReverseTreePrice(RangeEncoder* const rc, const Probability* const prob_table, unsigned const bit_count, size_t symbol) +{ + unsigned price = 0; + size_t m = 1; + for (unsigned i = bit_count; i != 0; --i) { + unsigned prob = prob_table[m]; + unsigned bit = symbol & 1; + symbol >>= 1; + price += GET_PRICE(rc, prob, bit); + m = (m << 1) | bit; + } + return price; +} + +HINT_INLINE +void Flush(RangeEncoder* const rc) +{ + for (int i = 0; i < 5; ++i) + ShiftLow(rc); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* RANGE_ENCODER_H */ \ No newline at end of file diff --git a/C/fast-lzma2/util.h b/C/fast-lzma2/util.h new file mode 100644 index 00000000..fe8b6fa4 --- /dev/null +++ b/C/fast-lzma2/util.h @@ -0,0 +1,765 @@ +/* + * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef UTIL_H_MODULE +#define UTIL_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/*-**************************************** +* Dependencies +******************************************/ +#include "platform.h" /* PLATFORM_POSIX_VERSION */ +#include /* malloc */ +#include /* size_t, ptrdiff_t */ +#include /* fprintf */ +#include /* strncmp */ +#include /* stat, utime */ +#include /* stat */ +#if defined(_MSC_VER) +# include /* utime */ +# include /* _chmod */ +#else +# include /* chown, stat */ +# include /* utime */ +#endif +#include /* time */ +#include +#include "mem.h" /* U32, U64 */ + + +/* ************************************************************ +* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW +***************************************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +# define UTIL_fseek _fseeki64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define UTIL_fseek fseeko +#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) +# define UTIL_fseek fseeko64 +#else +# define UTIL_fseek fseek +#endif + + +/*-**************************************** +* Sleep functions: Windows - Posix - others +******************************************/ +#if defined(_WIN32) +# include +# define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS) +# define UTIL_sleep(s) Sleep(1000*s) +# define UTIL_sleepMilli(milli) Sleep(milli) +#elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */ +# include +# include /* setpriority */ +# include /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */ +# if defined(PRIO_PROCESS) +# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20) +# else +# define SET_REALTIME_PRIORITY /* disabled */ +# endif +# define UTIL_sleep(s) sleep(s) +# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L) /* nanosleep requires POSIX.1-2001 */ +# define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); } +# else +# define UTIL_sleepMilli(milli) /* disabled */ +# endif +#else +# define SET_REALTIME_PRIORITY /* disabled */ +# define UTIL_sleep(s) /* disabled */ +# define UTIL_sleepMilli(milli) /* disabled */ +#endif + + +/* ************************************* +* Constants +***************************************/ +#define LIST_SIZE_INCREASE (8*1024) + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(__INTEL_COMPILER) +# pragma warning(disable : 177) /* disable: message #177: function was declared but never referenced, useful with UTIL_STATIC */ +#endif +#if defined(__GNUC__) +# define UTIL_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define UTIL_STATIC static inline +#elif defined(_MSC_VER) +# define UTIL_STATIC static __inline +# pragma warning(disable : 4996) /* disable: C4996: 'strncpy': This function or variable may be unsafe. */ +#else +# define UTIL_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Console log +******************************************/ +static int g_utilDisplayLevel; +#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } } + + +/*-**************************************** +* Time functions +******************************************/ +#if defined(_WIN32) /* Windows */ + typedef LARGE_INTEGER UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) + UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n"); + init = 1; + } + return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) + UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n"); + init = 1; + } + return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; + } +#elif defined(__APPLE__) && defined(__MACH__) + #include + typedef U64 UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom); + } +#elif (PLATFORM_POSIX_VERSION >= 200112L) + #include + typedef struct timespec UTIL_freq_t; + typedef struct timespec UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) + { + UTIL_time_t time; + if (clock_gettime(CLOCK_MONOTONIC, &time)) + UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n"); /* we could also exit() */ + return time; + } + UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t diff; + if (end.tv_nsec < begin.tv_nsec) { + diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec; + diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec; + } else { + diff.tv_sec = end.tv_sec - begin.tv_sec; + diff.tv_nsec = end.tv_nsec - begin.tv_nsec; + } + return diff; + } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + U64 micro = 0; + micro += 1000000ULL * diff.tv_sec; + micro += diff.tv_nsec / 1000ULL; + return micro; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + U64 nano = 0; + nano += 1000000000ULL * diff.tv_sec; + nano += diff.tv_nsec; + return nano; + } +#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */ + typedef clock_t UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } +#endif + + +/* returns time span in microseconds */ +UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart ) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeMicro(clockStart, clockEnd); +} + + +UTIL_STATIC void UTIL_waitForNextTick(void) +{ + UTIL_time_t const clockStart = UTIL_getTime(); + UTIL_time_t clockEnd; + do { + clockEnd = UTIL_getTime(); + } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0); +} + + + +/*-**************************************** +* File functions +******************************************/ +#if defined(_MSC_VER) + #define chmod _chmod + typedef struct __stat64 stat_t; +#else + typedef struct stat stat_t; +#endif + + +UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf) +{ + int res = 0; + struct utimbuf timebuf; + + timebuf.actime = time(NULL); + timebuf.modtime = statbuf->st_mtime; + res += utime(filename, &timebuf); /* set access and modification times */ + +#if !defined(_WIN32) + res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ +#endif + + res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */ + + errno = 0; + return -res; /* number of errors is returned */ +} + + +UTIL_STATIC int UTIL_getFileStat(const char* infilename, stat_t *statbuf) +{ + int r; +#if defined(_MSC_VER) + r = _stat64(infilename, statbuf); + if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */ +#else + r = stat(infilename, statbuf); + if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */ +#endif + return 1; +} + + +UTIL_STATIC int UTIL_isRegularFile(const char* infilename) +{ + stat_t statbuf; + return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */ +} + + +UTIL_STATIC U32 UTIL_isDirectory(const char* infilename) +{ + int r; + stat_t statbuf; +#if defined(_MSC_VER) + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + +UTIL_STATIC U32 UTIL_isLink(const char* infilename) +{ +#if defined(_WIN32) + /* no symlinks on windows */ + (void)infilename; +#else + int r; + stat_t statbuf; + r = lstat(infilename, &statbuf); + if (!r && S_ISLNK(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +#define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) +UTIL_STATIC U64 UTIL_getFileSize(const char* infilename) +{ + if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN; + { int r; +#if defined(_MSC_VER) + struct __stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; +#elif defined(__MINGW32__) && defined (__MSVCRT__) + struct _stati64 statbuf; + r = _stati64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN; +#endif + return (U64)statbuf.st_size; + } +} + + +UTIL_STATIC U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles) +{ + U64 total = 0; + int error = 0; + unsigned n; + for (n=0; n= *bufEnd) { + ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos)); + *pos += pathLength + 1; + nbFiles++; + } + } + free(path); + } while (FindNextFileA(hFile, &cFile)); + + FindClose(hFile); + return nbFiles; +} + +#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ +# define UTIL_HAS_CREATEFILELIST +# include /* opendir, readdir */ +# include /* strerror, memcpy */ + +UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks) +{ + DIR *dir; + struct dirent *entry; + char* path; + int dirLength, fnameLength, pathLength, nbFiles = 0; + + if (!(dir = opendir(dirName))) { + UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno)); + return 0; + } + + dirLength = (int)strlen(dirName); + errno = 0; + while ((entry = readdir(dir)) != NULL) { + if (strcmp (entry->d_name, "..") == 0 || + strcmp (entry->d_name, ".") == 0) continue; + fnameLength = (int)strlen(entry->d_name); + path = (char*) malloc(dirLength + fnameLength + 2); + if (!path) { closedir(dir); return 0; } + memcpy(path, dirName, dirLength); + + path[dirLength] = '/'; + memcpy(path+dirLength+1, entry->d_name, fnameLength); + pathLength = dirLength+1+fnameLength; + path[pathLength] = 0; + + if (!followLinks && UTIL_isLink(path)) { + UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path); + continue; + } + + if (UTIL_isDirectory(path)) { + nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */ + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } else { + if (*bufStart + *pos + pathLength >= *bufEnd) { + ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos)); + *pos += pathLength + 1; + nbFiles++; + } + } + free(path); + errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */ + } + + if (errno != 0) { + UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno)); + free(*bufStart); + *bufStart = NULL; + } + closedir(dir); + return nbFiles; +} + +#else + +UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks) +{ + (void)bufStart; (void)bufEnd; (void)pos; + UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName); + return 0; +} + +#endif /* #ifdef _WIN32 */ + +/* + * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, + * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). + * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer) + * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called. + */ +UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb, int followLinks) +{ + size_t pos; + unsigned i, nbFiles; + char* buf = (char*)malloc(LIST_SIZE_INCREASE); + char* bufend = buf + LIST_SIZE_INCREASE; + const char** fileTable; + + if (!buf) return NULL; + + for (i=0, pos=0, nbFiles=0; i= bufend) { + ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE; + buf = (char*)UTIL_realloc(buf, newListSize); + bufend = buf + newListSize; + if (!buf) return NULL; + } + if (buf + pos + len < bufend) { + strncpy(buf + pos, inputNames[i], bufend - (buf + pos)); + pos += len + 1; + nbFiles++; + } + } else { + nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks); + if (buf == NULL) return NULL; + } } + + if (nbFiles == 0) { free(buf); return NULL; } + + fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*)); + if (!fileTable) { free(buf); return NULL; } + + for (i=0, pos=0; i bufend) { free(buf); free((void*)fileTable); return NULL; } + + *allocatedBuffer = buf; + *allocatedNamesNb = nbFiles; + + return fileTable; +} + + +UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer) +{ + if (allocatedBuffer) free(allocatedBuffer); + if (filenameTable) free((void*)filenameTable); +} + +/* count the number of physical cores */ +#if defined(_WIN32) || defined(WIN32) + +#include + +typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + static int numPhysicalCores = 0; + if (numPhysicalCores != 0) return numPhysicalCores; + + { LPFN_GLPI glpi; + BOOL done = FALSE; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL; + DWORD returnLength = 0; + size_t byteOffset = 0; + + glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), + "GetLogicalProcessorInformation"); + + if (glpi == NULL) { + goto failed; + } + + while(!done) { + DWORD rc = glpi(buffer, &returnLength); + if (FALSE == rc) { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + if (buffer) + free(buffer); + buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength); + + if (buffer == NULL) { + perror("zstd"); + exit(1); + } + } else { + /* some other error */ + goto failed; + } + } else { + done = TRUE; + } + } + + ptr = buffer; + + while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) { + + if (ptr->Relationship == RelationProcessorCore) { + numPhysicalCores++; + } + + ptr++; + byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + } + + free(buffer); + + return numPhysicalCores; + } + +failed: + /* try to fall back on GetSystemInfo */ + { SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + numPhysicalCores = sysinfo.dwNumberOfProcessors; + if (numPhysicalCores == 0) numPhysicalCores = 1; /* just in case */ + } + return numPhysicalCores; +} + +#elif defined(__APPLE__) + +#include + +/* Use apple-provided syscall + * see: man 3 sysctl */ +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + static S32 numPhysicalCores = 0; /* apple specifies int32_t */ + if (numPhysicalCores != 0) return numPhysicalCores; + + { size_t size = sizeof(S32); + int const ret = sysctlbyname("hw.physicalcpu", &numPhysicalCores, &size, NULL, 0); + if (ret != 0) { + if (errno == ENOENT) { + /* entry not present, fall back on 1 */ + numPhysicalCores = 1; + } else { + perror("zstd: can't get number of physical cpus"); + exit(1); + } + } + + return numPhysicalCores; + } +} + +#elif defined(__linux__) + +/* parse /proc/cpuinfo + * siblings / cpu cores should give hyperthreading ratio + * otherwise fall back on sysconf */ +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + static int numPhysicalCores = 0; + + if (numPhysicalCores != 0) return numPhysicalCores; + + numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numPhysicalCores == -1) { + /* value not queryable, fall back on 1 */ + return numPhysicalCores = 1; + } + + /* try to determine if there's hyperthreading */ + { FILE* const cpuinfo = fopen("/proc/cpuinfo", "r"); +#define BUF_SIZE 80 + char buff[BUF_SIZE]; + + int siblings = 0; + int cpu_cores = 0; + int ratio = 1; + + if (cpuinfo == NULL) { + /* fall back on the sysconf value */ + return numPhysicalCores; + } + + /* assume the cpu cores/siblings values will be constant across all + * present processors */ + while (!feof(cpuinfo)) { + if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) { + if (strncmp(buff, "siblings", 8) == 0) { + const char* const sep = strchr(buff, ':'); + if (*sep == '\0') { + /* formatting was broken? */ + goto failed; + } + + siblings = atoi(sep + 1); + } + if (strncmp(buff, "cpu cores", 9) == 0) { + const char* const sep = strchr(buff, ':'); + if (*sep == '\0') { + /* formatting was broken? */ + goto failed; + } + + cpu_cores = atoi(sep + 1); + } + } else if (ferror(cpuinfo)) { + /* fall back on the sysconf value */ + goto failed; + } + } + if (siblings && cpu_cores) { + ratio = siblings / cpu_cores; + } +failed: + fclose(cpuinfo); + return numPhysicalCores = numPhysicalCores / ratio; + } +} + +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) + +/* Use apple-provided syscall + * see: man 3 sysctl */ +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + static int numPhysicalCores = 0; + + if (numPhysicalCores != 0) return numPhysicalCores; + + numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numPhysicalCores == -1) { + /* value not queryable, fall back on 1 */ + return numPhysicalCores = 1; + } + return numPhysicalCores; +} + +#else + +UTIL_STATIC int UTIL_countPhysicalCores(void) +{ + /* assume 1 */ + return 1; +} + +#endif + +#if defined (__cplusplus) +} +#endif + +#endif /* UTIL_H_MODULE */ diff --git a/CPP/7zip/7zip.mak b/CPP/7zip/7zip.mak index 4ff0eaba..4dac0e4d 100644 --- a/CPP/7zip/7zip.mak +++ b/CPP/7zip/7zip.mak @@ -33,6 +33,7 @@ OBJS = \ $(LZ5_OBJS) \ $(ZSTD_OBJS) \ $(ZSTDMT_OBJS) \ + $(FASTLZMA2_OBJS) \ $(ASM_OBJS) \ $O\resource.res \ @@ -208,6 +209,11 @@ $(ZSTDMT_OBJS): ../../../../C/zstdmt/$(*B).c $(COMPL_O2) !ENDIF +!IFDEF FASTLZMA2_OBJS +$(FASTLZMA2_OBJS): ../../../../C/fast-lzma2/$(*B).c + $(COMPL_O2) -DNO_XXHASH +!ENDIF + !ELSE @@ -287,6 +293,8 @@ $(ZSTDMT_OBJS): ../../../../C/zstdmt/$(*B).c -I ../../../../C/lz4 \ -I ../../../../C/lz5 \ -I ../../../../C/zstd +{../../../../C/fast-lzma2}.c{$O}.obj:: + $(COMPLB_O2) -DNO_XXHASH !ENDIF diff --git a/CPP/7zip/Bundles/Alone/makefile b/CPP/7zip/Bundles/Alone/makefile index 86d1d983..f93205b8 100644 --- a/CPP/7zip/Bundles/Alone/makefile +++ b/CPP/7zip/Bundles/Alone/makefile @@ -320,6 +320,18 @@ ZSTDMT_OBJS = \ $O\lz5-mt_decompress.obj \ $O\zstd-mt_threading.obj \ +FASTLZMA2_OBJS = \ + $O\fl2_error_private.obj \ + $O\fl2pool.obj \ + $O\fl2threading.obj \ + $O\fl2_common.obj \ + $O\fl2_compress.obj \ + $O\lzma2_enc.obj \ + $O\radix_bitpack.obj \ + $O\radix_mf.obj \ + $O\radix_struct.obj \ + $O\range_enc.obj \ + !include "../../UI/Console/Console.mak" !include "../../Aes.mak" diff --git a/CPP/7zip/Bundles/Format7z/makefile b/CPP/7zip/Bundles/Format7z/makefile index 2702daf6..8e11500d 100644 --- a/CPP/7zip/Bundles/Format7z/makefile +++ b/CPP/7zip/Bundles/Format7z/makefile @@ -242,4 +242,18 @@ ZSTDMT_OBJS = \ $O\lz5-mt_decompress.obj \ $O\zstd-mt_threading.obj \ +FASTLZMA2_OBJS = \ + $O\fl2_error_private.obj \ + $O\fl2pool.obj \ + $O\fl2threading.obj \ + $O\fl2_common.obj \ + $O\fl2_compress.obj \ + $O\lzma2_enc.obj \ + $O\radix_bitpack.obj \ + $O\radix_mf.obj \ + $O\radix_struct.obj \ + $O\range_enc.obj \ + + + !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/Format7zF/makefile b/CPP/7zip/Bundles/Format7zF/makefile index 9292325f..7274402a 100644 --- a/CPP/7zip/Bundles/Format7zF/makefile +++ b/CPP/7zip/Bundles/Format7zF/makefile @@ -117,4 +117,16 @@ ZSTDMT_OBJS = \ $O\lz5-mt_decompress.obj \ $O\zstd-mt_threading.obj \ +FASTLZMA2_OBJS = \ + $O\fl2_error_private.obj \ + $O\fl2pool.obj \ + $O\fl2threading.obj \ + $O\fl2_common.obj \ + $O\fl2_compress.obj \ + $O\lzma2_enc.obj \ + $O\radix_bitpack.obj \ + $O\radix_mf.obj \ + $O\radix_struct.obj \ + $O\range_enc.obj \ + !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/Format7zFO/makefile b/CPP/7zip/Bundles/Format7zFO/makefile index 4efb869a..4da2018f 100644 --- a/CPP/7zip/Bundles/Format7zFO/makefile +++ b/CPP/7zip/Bundles/Format7zFO/makefile @@ -118,4 +118,16 @@ ZSTDMT_OBJS = \ $O\lz5-mt_decompress.obj \ $O\zstd-mt_threading.obj \ +FASTLZMA2_OBJS = \ + $O\fl2_error_private.obj \ + $O\fl2pool.obj \ + $O\fl2threading.obj \ + $O\fl2_common.obj \ + $O\fl2_compress.obj \ + $O\lzma2_enc.obj \ + $O\radix_bitpack.obj \ + $O\radix_mf.obj \ + $O\radix_struct.obj \ + $O\range_enc.obj \ + !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/Format7zUSB/makefile b/CPP/7zip/Bundles/Format7zUSB/makefile index da0d67f1..b1609727 100644 --- a/CPP/7zip/Bundles/Format7zUSB/makefile +++ b/CPP/7zip/Bundles/Format7zUSB/makefile @@ -234,4 +234,16 @@ ZSTDMT_OBJS = \ $O\lz5-mt_decompress.obj \ $O\zstd-mt_threading.obj \ +FASTLZMA2_OBJS = \ + $O\fl2_error_private.obj \ + $O\fl2pool.obj \ + $O\fl2threading.obj \ + $O\fl2_common.obj \ + $O\fl2_compress.obj \ + $O\lzma2_enc.obj \ + $O\radix_bitpack.obj \ + $O\radix_mf.obj \ + $O\radix_struct.obj \ + $O\range_enc.obj \ + !include "../../7zip.mak" diff --git a/CPP/7zip/Compress/Lzma2Encoder.cpp b/CPP/7zip/Compress/Lzma2Encoder.cpp index 18f7d029..da5b7b31 100644 --- a/CPP/7zip/Compress/Lzma2Encoder.cpp +++ b/CPP/7zip/Compress/Lzma2Encoder.cpp @@ -4,6 +4,8 @@ #include "../../../C/Alloc.h" +#include "../../../C/fast-lzma2/fl2_errors.h" + #include "../Common/CWrappers.h" #include "../Common/StreamUtils.h" @@ -119,4 +121,167 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream, ISequentialOutStream return SResToHRESULT(res); } +CFastEncoder::CFastEncoder() +{ + _encoder = NULL; + reduceSize = 0; +} + +CFastEncoder::~CFastEncoder() +{ + if (_encoder) + FL2_freeCCtx(_encoder); +} + + +#define CHECK_F(f) if (FL2_isError(f)) return E_INVALIDARG; /* check and convert error code */ + +STDMETHODIMP CFastEncoder::SetCoderProperties(const PROPID *propIDs, + const PROPVARIANT *coderProps, UInt32 numProps) +{ + CLzma2EncProps lzma2Props; + Lzma2EncProps_Init(&lzma2Props); + + for (UInt32 i = 0; i < numProps; i++) + { + RINOK(SetLzma2Prop(propIDs[i], coderProps[i], lzma2Props)); + } + if (_encoder == NULL) { + _encoder = FL2_createCCtxMt(lzma2Props.numTotalThreads); + if (_encoder == NULL) + return E_OUTOFMEMORY; + } + if (lzma2Props.lzmaProps.algo > 2) { + if (lzma2Props.lzmaProps.algo > 3) + return E_INVALIDARG; + lzma2Props.lzmaProps.algo = 2; + FL2_CCtx_setParameter(_encoder, FL2_p_highCompression, 1); + FL2_CCtx_setParameter(_encoder, FL2_p_compressionLevel, lzma2Props.lzmaProps.level); + } + else { + FL2_CCtx_setParameter(_encoder, FL2_p_7zLevel, lzma2Props.lzmaProps.level); + } + dictSize = lzma2Props.lzmaProps.dictSize; + if (!dictSize) { + dictSize = (UInt32)1 << FL2_CCtx_setParameter(_encoder, FL2_p_dictionaryLog, 0); + } + reduceSize = lzma2Props.lzmaProps.reduceSize; + reduceSize += (reduceSize < (UInt64)-1); /* prevent extra buffer shift after read */ + dictSize = (UInt32)min(dictSize, reduceSize); + unsigned dictLog = FL2_DICTLOG_MIN; + while (((UInt32)1 << dictLog) < dictSize) + ++dictLog; + CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_dictionaryLog, dictLog)); + if (lzma2Props.lzmaProps.algo >= 0) { + CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_strategy, (unsigned)lzma2Props.lzmaProps.algo)); + } + if (lzma2Props.lzmaProps.fb > 0) + CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_fastLength, lzma2Props.lzmaProps.fb)); + if (lzma2Props.lzmaProps.mc) { + unsigned ml = 0; + while (((UInt32)1 << ml) < lzma2Props.lzmaProps.mc) + ++ml; + CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_searchLog, ml)); + } + if (lzma2Props.lzmaProps.lc >= 0) + CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_literalCtxBits, lzma2Props.lzmaProps.lc)); + if (lzma2Props.lzmaProps.lp >= 0) + CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_literalPosBits, lzma2Props.lzmaProps.lp)); + if (lzma2Props.lzmaProps.pb >= 0) + CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_posBits, lzma2Props.lzmaProps.pb)); + FL2_CCtx_setParameter(_encoder, FL2_p_omitProperties, 1); + FL2_CCtx_setParameter(_encoder, FL2_p_doXXHash, 0); + return S_OK; +} + + +#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)) + +STDMETHODIMP CFastEncoder::WriteCoderProperties(ISequentialOutStream *outStream) +{ + Byte prop; + unsigned i; + for (i = 0; i < 40; i++) + if (dictSize <= LZMA2_DIC_SIZE_FROM_PROP(i)) + break; + prop = (Byte)i; + return WriteStream(outStream, &prop, 1); +} + + +typedef struct +{ + ISequentialOutStream* outStream; + ICompressProgressInfo* progress; + UInt64 in_processed; + UInt64 out_processed; + HRESULT res; +} EncodingObjects; + +static int FL2LIB_CALL Progress(size_t done, void* opaque) +{ + EncodingObjects* p = (EncodingObjects*)opaque; + if (p && p->progress) { + UInt64 in_processed = p->in_processed + done; + p->res = p->progress->SetRatioInfo(&in_processed, &p->out_processed); + return p->res != S_OK; + } + return 0; +} + +static int FL2LIB_CALL Write(const void* src, size_t srcSize, void* opaque) +{ + EncodingObjects* p = (EncodingObjects*)opaque; + p->res = WriteStream(p->outStream, src, srcSize); + return p->res != S_OK; +} + +STDMETHODIMP CFastEncoder::Code(ISequentialInStream *inStream, ISequentialOutStream *outStream, + const UInt64 * /* inSize */, const UInt64 * /* outSize */, ICompressProgressInfo *progress) +{ + HRESULT err = S_OK; + inBuffer.AllocAtLeast(dictSize); + EncodingObjects objs = { outStream, progress, 0, 0, S_OK }; + FL2_blockBuffer block = { inBuffer, 0, 0, dictSize }; + do + { + FL2_shiftBlock(_encoder, &block); + size_t inSize = dictSize - block.start; + err = ReadStream(inStream, inBuffer + block.start, &inSize); + if (err != S_OK) + break; + block.end += inSize; + if (inSize) { + size_t cSize = FL2_compressCCtxBlock_toFn(_encoder, Write, &objs, &block, Progress); + if (FL2_isError(cSize)) { + if (FL2_getErrorCode(cSize) == FL2_error_memory_allocation) + return E_OUTOFMEMORY; + return objs.res != S_OK ? objs.res : S_FALSE; + } + if (objs.res != S_OK) + return objs.res; + objs.out_processed += cSize; + objs.in_processed += inSize; + if (progress) { + err = progress->SetRatioInfo(&objs.in_processed, &objs.out_processed); + if (err != S_OK) + break; + } + if (block.end < dictSize) + break; + } + else break; + + } while (err == S_OK); + + if (err == S_OK) { + size_t cSize = FL2_endFrame_toFn(_encoder, Write, &objs); + if (FL2_isError(cSize)) + return S_FALSE; + objs.out_processed += cSize; + err = objs.res; + } + return err; +} + }} diff --git a/CPP/7zip/Compress/Lzma2Encoder.h b/CPP/7zip/Compress/Lzma2Encoder.h index 6539e73a..5628c095 100644 --- a/CPP/7zip/Compress/Lzma2Encoder.h +++ b/CPP/7zip/Compress/Lzma2Encoder.h @@ -4,8 +4,10 @@ #define __LZMA2_ENCODER_H #include "../../../C/Lzma2Enc.h" +#include "../../../C/fast-lzma2/fast-lzma2.h" #include "../../Common/MyCom.h" +#include "../../Common/MyBuffer.h" #include "../ICoder.h" @@ -37,6 +39,32 @@ public: virtual ~CEncoder(); }; +class CFastEncoder : + public ICompressCoder, + public ICompressSetCoderProperties, + public ICompressWriteCoderProperties, + public CMyUnknownImp +{ + FL2_CCtx* _encoder; + CByteBuffer inBuffer; + UInt64 reduceSize; + UInt32 dictSize; + +public: + MY_UNKNOWN_IMP3( + ICompressCoder, + ICompressSetCoderProperties, + ICompressWriteCoderProperties) + + STDMETHOD(Code)(ISequentialInStream *inStream, ISequentialOutStream *outStream, + const UInt64 *inSize, const UInt64 *outSize, ICompressProgressInfo *progress); + STDMETHOD(SetCoderProperties)(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps); + STDMETHOD(WriteCoderProperties)(ISequentialOutStream *outStream); + + CFastEncoder(); + virtual ~CFastEncoder(); +}; + }} #endif diff --git a/CPP/7zip/Compress/Lzma2Register.cpp b/CPP/7zip/Compress/Lzma2Register.cpp index 43671056..42089e3b 100644 --- a/CPP/7zip/Compress/Lzma2Register.cpp +++ b/CPP/7zip/Compress/Lzma2Register.cpp @@ -14,9 +14,19 @@ namespace NCompress { namespace NLzma2 { REGISTER_CODEC_E(LZMA2, - CDecoder(), - CEncoder(), - 0x21, - "LZMA2") + CDecoder(), + CEncoder(), + 0x21, + "LZMA2") +} -}} +namespace NFLzma2 { + +REGISTER_CODEC_E(FLZMA2, + NCompress::NLzma2::CDecoder(), + NCompress::NLzma2::CFastEncoder(), + 0x21, + "FLZMA2") +} + +} diff --git a/CPP/7zip/UI/GUI/CompressDialog.cpp b/CPP/7zip/UI/GUI/CompressDialog.cpp index c3c8c72e..c47228fb 100644 --- a/CPP/7zip/UI/GUI/CompressDialog.cpp +++ b/CPP/7zip/UI/GUI/CompressDialog.cpp @@ -110,6 +110,7 @@ enum EMethodID kLIZARD_M4, kLZMA, kLZMA2, + kFLZMA2, kPPMd, kBZip2, kDeflate, @@ -130,6 +131,7 @@ static LPCSTR const kMethodsLongnames[] = , "Lizard, LIZv1 + Huffman" , "LZMA" , "LZMA2" + , "LZMA2, Fast" , "PPMd" , "BZip2" , "Deflate" @@ -150,6 +152,7 @@ static LPCSTR const kMethodsNames[] = , "Lizard" , "LZMA" , "LZMA2" + , "FLZMA2" , "PPMd" , "BZip2" , "Deflate" @@ -195,6 +198,7 @@ static const EMethodID g_7zMethods[] = kLIZARD_M2, kLIZARD_M3, kLIZARD_M4, + kFLZMA2, kLZMA2, kLZMA, kPPMd, @@ -209,6 +213,7 @@ static const EMethodID g_7zSfxMethods[] = kZSTD, kLZMA, kLZMA2, + kFLZMA2, kPPMd }; @@ -978,6 +983,12 @@ bool CCompressDialog::OnCommand(int code, int itemID, LPARAM lParam) case IDC_COMPRESS_LEVEL: { + { + const CArcInfoEx &ai = (*ArcFormats)[GetFormatIndex()]; + int index = FindRegistryFormatAlways(ai.Name); + NCompression::CFormatOptions &fo = m_RegistryInfo.Formats[index]; + fo.ResetForLevelChange(); + } SetMethod(GetMethodID()); SetSolidBlockSize(); SetNumThreads(); @@ -1304,7 +1315,7 @@ void CCompressDialog::SetMethod(int keepMethodId) } } - if (!weUseSameMethod) +// if (!weUseSameMethod) { SetDictionary(); SetOrder(); @@ -1392,6 +1403,42 @@ void CCompressDialog::AddDictionarySize(UInt32 size) m_Dictionary.SetItemData(index, size); } +typedef enum { + FL2_fast, + FL2_opt, + FL2_ultra +} FL2_strategy; + +typedef struct { + unsigned dictionaryLog; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory, slower */ + unsigned overlapFraction; /* overlap between consecutive blocks in 1/16 units: larger == more compression, slower */ + unsigned chainLog; /* fully searched segment : larger == more compression, slower, more memory; hybrid mode only (ultra) */ + unsigned searchLog; /* nb of searches : larger == more compression, slower; hybrid mode only (ultra) */ + unsigned searchDepth; /* maximum depth for resolving string matches : larger == more compression, slower; >= 64 == more memory, slower */ + unsigned fastLength; /* acceptable match size for parser, not less than searchDepth : larger == more compression, slower; fast bytes parameter from 7-zip */ + unsigned divideAndConquer; /* split long chains of 2-byte matches into shorter chains with a small overlap : faster, somewhat less compression; enabled by default */ + unsigned bufferLog; /* buffer size for processing match chains is (dictionaryLog - bufferLog) : when divideAndConquer enabled, affects compression; */ + /* when divideAndConquer disabled, affects speed in a hardware-dependent manner */ + FL2_strategy strategy; /* encoder strategy : fast, optimized or ultra (hybrid) */ +} FL2_compressionParameters; + +#define FL2_MAX_7Z_CLEVEL 9 + +static const FL2_compressionParameters FL2_7zCParameters[FL2_MAX_7Z_CLEVEL + 1] = { + { 0,0,0,0,0,0,0 }, + { 20, 1, 7, 0, 6, 32, 1, 8, FL2_fast }, /* 1 */ + { 20, 2, 7, 0, 12, 32, 1, 8, FL2_fast }, /* 2 */ + { 21, 2, 7, 0, 16, 32, 1, 8, FL2_fast }, /* 3 */ + { 20, 2, 7, 0, 16, 32, 1, 8, FL2_opt }, /* 4 */ + { 24, 2, 9, 0, 40, 48, 1, 8, FL2_ultra }, /* 5 */ + { 25, 2, 10, 0, 48, 64, 1, 8, FL2_ultra }, /* 6 */ + { 26, 2, 11, 1, 60, 96, 1, 9, FL2_ultra }, /* 7 */ + { 27, 2, 12, 2, 128, 128, 1, 10, FL2_ultra }, /* 8 */ + { 27, 3, 14, 3, 252, 160, 0, 10, FL2_ultra } /* 9 */ +}; + +#define RMF_BUILDER_SIZE (8 * 0x40100U) + void CCompressDialog::SetDictionary() { m_Dictionary.ResetContent(); @@ -1458,6 +1505,39 @@ void CCompressDialog::SetDictionary() break; } + case kFLZMA2: + { + static const UInt32 kMinDicSize = (1 << 20); + level += !level; + if (level > FL2_MAX_7Z_CLEVEL) + level = FL2_MAX_7Z_CLEVEL; + if (defaultDict == (UInt32)(Int32)-1) + defaultDict = (UInt32)1 << FL2_7zCParameters[level].dictionaryLog; + + m_Dictionary.SetCurSel(0); + + for (unsigned i = 20; i <= 31; i++) { + UInt32 dict = (UInt32)1 << i; + + if (dict > + #ifdef MY_CPU_64BIT + (1 << 30) + #else + (1 << 27) + #endif + ) + continue; + + AddDictionarySize(dict); + UInt64 decomprSize; + UInt64 requiredComprSize = GetMemoryUsage(dict, decomprSize); + if (dict <= defaultDict && (!maxRamSize_Defined || requiredComprSize <= maxRamSize)) + m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1); + } + + break; + } + case kPPMd: { if (defaultDict == (UInt32)(Int32)-1) @@ -1598,9 +1678,14 @@ void CCompressDialog::SetOrder() { case kLZMA: case kLZMA2: + case kFLZMA2: { - if (defaultOrder == (UInt32)(Int32)-1) - defaultOrder = (level >= 7) ? 64 : 32; + if (defaultOrder == (UInt32)(Int32)-1) { + if (methodID == kFLZMA2) + defaultOrder = FL2_7zCParameters[level].fastLength; + else + defaultOrder = (level >= 7) ? 64 : 32; + } for (unsigned i = 3; i <= 8; i++) for (unsigned j = 0; j < 2; j++) { @@ -1820,6 +1905,7 @@ void CCompressDialog::SetNumThreads() case kLIZARD_M4: numAlgoThreadsMax = 128; break; case kLZMA: numAlgoThreadsMax = 2; break; case kLZMA2: numAlgoThreadsMax = 32; break; + case kFLZMA2: numAlgoThreadsMax = 128; break; case kBZip2: numAlgoThreadsMax = 32; break; } if (IsZipFormat()) @@ -1930,6 +2016,22 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory) return size; } + case kFLZMA2: + { + if (level > FL2_MAX_7Z_CLEVEL) + level = FL2_MAX_7Z_CLEVEL; + size += dict * 5 + (1UL << 18) * numThreads; + unsigned depth = FL2_7zCParameters[level].searchDepth; + UInt32 bufSize = UInt32(1) << (FL2_7zCParameters[level].dictionaryLog - FL2_7zCParameters[level].bufferLog); + size += (bufSize * 12 + RMF_BUILDER_SIZE) * numThreads; + if (dict > (UInt32(1) << 26) || depth > 63) + size += dict; + if (FL2_7zCParameters[level].strategy == FL2_ultra) + size += (UInt32(4) << 14) + (UInt32(4) << FL2_7zCParameters[level].chainLog); + decompressMemory = dict + (2 << 20); + return size; + } + case kPPMd: { decompressMemory = dict + (2 << 20);