Add Fast LZMA2 codec

This commit is contained in:
conor42
2018-11-05 21:22:10 +10:00
parent ed069bab9e
commit ab10047253
41 changed files with 9060 additions and 8 deletions

455
C/fast-lzma2/fast-lzma2.h Normal file
View File

@@ -0,0 +1,455 @@
/*
* Copyright (c) 2017-present, Conor McCarthy
* All rights reserved.
* Based on zstd.h copyright Yann Collet
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#if defined (__cplusplus)
extern "C" {
#endif
#ifndef FAST_LZMA2_H
#define FAST_LZMA2_H
/* ====== Dependency ======*/
#include <stddef.h> /* size_t */
/* ===== FL2LIB_API : control library symbols visibility ===== */
#ifndef FL2LIB_VISIBILITY
# if defined(__GNUC__) && (__GNUC__ >= 4)
# define FL2LIB_VISIBILITY __attribute__ ((visibility ("default")))
# else
# define FL2LIB_VISIBILITY
# endif
#endif
#if defined(FL2_DLL_EXPORT) && (FL2_DLL_EXPORT==1)
# define FL2LIB_API __declspec(dllexport) FL2LIB_VISIBILITY
#elif defined(FL2_DLL_IMPORT) && (FL2_DLL_IMPORT==1)
# define FL2LIB_API __declspec(dllimport) FL2LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
#else
# define FL2LIB_API FL2LIB_VISIBILITY
#endif
/* ====== Calling convention ======*/
#if !defined _WIN32 || defined __x86_64__s || defined _M_X64 || (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8)
# define FL2LIB_CALL
#elif defined(__GNUC__)
# define FL2LIB_CALL __attribute__((cdecl))
#elif defined(_MSC_VER)
# define FL2LIB_CALL __cdecl
#else
# define FL2LIB_CALL
#endif
/*******************************************************************************************************
Introduction
*********************************************************************************************************/
/*------ Version ------*/
#define FL2_VERSION_MAJOR 0
#define FL2_VERSION_MINOR 9
#define FL2_VERSION_RELEASE 2
#define FL2_VERSION_NUMBER (FL2_VERSION_MAJOR *100*100 + FL2_VERSION_MINOR *100 + FL2_VERSION_RELEASE)
FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void); /**< useful to check dll version */
#define FL2_LIB_VERSION FL2_VERSION_MAJOR.FL2_VERSION_MINOR.FL2_VERSION_RELEASE
#define FL2_QUOTE(str) #str
#define FL2_EXPAND_AND_QUOTE(str) FL2_QUOTE(str)
#define FL2_VERSION_STRING FL2_EXPAND_AND_QUOTE(FL2_LIB_VERSION)
FL2LIB_API const char* FL2LIB_CALL FL2_versionString(void);
/***************************************
* Simple API
***************************************/
#define FL2_MAXTHREADS 200
/*! FL2_compress() :
* Compresses `src` content as a single LZMA2 compressed stream into already allocated `dst`.
* Call FL2_compressMt() to use > 1 thread. Specify nbThreads = 0 to use all cores.
* @return : compressed size written into `dst` (<= `dstCapacity),
* or an error code if it fails (which can be tested using FL2_isError()). */
FL2LIB_API size_t FL2LIB_CALL FL2_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
FL2LIB_API size_t FL2LIB_CALL FL2_compressMt(void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel,
unsigned nbThreads);
/*! FL2_decompress() :
* `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
* `dstCapacity` is an upper bound of originalSize to regenerate.
* If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
* @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
* or an errorCode if it fails (which can be tested using FL2_isError()). */
FL2LIB_API size_t FL2LIB_CALL FL2_decompress(void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
/*! FL2_findDecompressedSize()
* `src` should point to the start of a LZMA2 encoded stream.
* `srcSize` must be at least as large as the LZMA2 stream including end marker.
* @return : - decompressed size of the stream in `src`, if known
* - FL2_CONTENTSIZE_ERROR if an error occurred (e.g. corruption, srcSize too small)
* note 1 : a 0 return value means the frame is valid but "empty".
* note 2 : decompressed size can be very large (64-bits value),
* potentially larger than what local system can handle as a single memory segment.
* In which case, it's necessary to use streaming mode to decompress data.
* note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
* Always ensure return value fits within application's authorized limits.
* Each application can set its own limits. */
#define FL2_CONTENTSIZE_ERROR (size_t)-1
FL2LIB_API size_t FL2LIB_CALL FL2_findDecompressedSize(const void *src, size_t srcSize);
/*====== Helper functions ======*/
#define FL2_COMPRESSBOUND(srcSize) ((srcSize) + (((srcSize) + 0xFFF) / 0x1000) * 3 + 6) /* this formula calculates the maximum size of data stored in uncompressed chunks */
FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code); /*!< provides readable string from an error code */
FL2LIB_API int FL2LIB_CALL FL2_maxCLevel(void); /*!< maximum compression level available */
FL2LIB_API int FL2LIB_CALL FL2_maxHighCLevel(void); /*!< maximum compression level available in high mode */
/***************************************
* Explicit memory management
***************************************/
/*= Compression context
* When compressing many times,
* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
* This will make workload friendlier for system's memory.
* The context may not use the number of threads requested if the library is compiled for single-threaded
* compression or nbThreads > FL2_MAXTHREADS. Call FL2_CCtx_nbThreads to obtain the actual number. */
typedef struct FL2_CCtx_s FL2_CCtx;
FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtx(void);
FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtxMt(unsigned nbThreads);
FL2LIB_API void FL2LIB_CALL FL2_freeCCtx(FL2_CCtx* cctx);
FL2LIB_API unsigned FL2LIB_CALL FL2_CCtx_nbThreads(const FL2_CCtx* ctx);
/*! FL2_compressCCtx() :
* Same as FL2_compress(), requires an allocated FL2_CCtx (see FL2_createCCtx()). */
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtx(FL2_CCtx* ctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
/************************************************
* Caller-managed data buffer and overlap section
************************************************/
typedef struct {
unsigned char *data;
size_t start; /* start = 0 (first block) or overlap */
size_t end; /* never < overlap */
size_t bufSize; /* allocation size */
} FL2_blockBuffer;
typedef int (FL2LIB_CALL *FL2_progressFn)(size_t done, void* opaque);
/* Get the size of the overlap section. */
FL2LIB_API size_t FL2LIB_CALL FL2_blockOverlap(const FL2_CCtx* ctx);
/* Copy the overlap section to the start to prepare for more data */
FL2LIB_API void FL2LIB_CALL FL2_shiftBlock(FL2_CCtx* ctx, FL2_blockBuffer *block);
/* Copy the overlap to a different buffer. This allows a dual-buffer configuration where
* data is read into one block while the other is compressed. */
FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(FL2_CCtx* ctx, FL2_blockBuffer *block, unsigned char *dst);
FL2LIB_API void FL2LIB_CALL FL2_beginFrame(FL2_CCtx* const cctx);
/*! FL2_compressCCtxBlock() :
* Same as FL2_compressCCtx except the caller is responsible for supplying an overlap section.
* The FL2_p_overlapFraction parameter will not be used.
* srcStart + srcSize should equal the dictionary size except on the last call.
* Can be called multiple times. FL2_endFrame() must be called when finished.
* For compatibility with this library the caller must write a property byte at
* the beginning of the output. Obtain it by calling FL2_dictSizeProp() before
* compressing the first block or after the last. No hash will be written, but
* the caller can calculate it using the interface in xxhash.h, write it at the end,
* and set bit 7 in the property byte. */
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock(FL2_CCtx* ctx,
void* dst, size_t dstCapacity,
const FL2_blockBuffer *block,
FL2_progressFn progress, void* opaque);
/*! FL2_endFrame() :
* Write the end marker to terminate the LZMA2 stream.
* Must be called after compressing with FL2_compressCCtxBlock() */
FL2LIB_API size_t FL2LIB_CALL FL2_endFrame(FL2_CCtx* ctx,
void* dst, size_t dstCapacity);
typedef int (FL2LIB_CALL *FL2_writerFn)(const void* src, size_t srcSize, void* opaque);
/*! FL2_compressCCtxBlock_toFn() :
* Same as FL2_compressCCtx except the caller is responsible for supplying an
* overlap section, and compressed data is written to a callback function.
* The FL2_p_overlapFraction parameter will not be used.
* Can be called multiple times. FL2_endFrame_toFn() must be called when finished. */
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock_toFn(FL2_CCtx* ctx,
FL2_writerFn writeFn, void* opaque,
const FL2_blockBuffer *block,
FL2_progressFn progress);
/*! FL2_endFrame() :
* Write the end marker to a callback function to terminate the LZMA2 stream.
* Must be called after compressing with FL2_compressCCtxBlock_toFn() */
FL2LIB_API size_t FL2LIB_CALL FL2_endFrame_toFn(FL2_CCtx* ctx,
FL2_writerFn writeFn, void* opaque);
/*! FL2_dictSizeProp() :
* Get the dictionary size property.
* Intended for use with the FL2_p_omitProperties parameter for creating a
* 7-zip compatible LZMA2 stream. */
FL2LIB_API unsigned char FL2LIB_CALL FL2_dictSizeProp(FL2_CCtx* ctx);
/*= Decompression context
* When decompressing many times,
* it is recommended to allocate a context only once,
* and re-use it for each successive compression operation.
* This will make the workload friendlier for the system's memory.
* Use one context per thread for parallel execution. */
typedef struct CLzma2Dec_s FL2_DCtx;
FL2LIB_API FL2_DCtx* FL2LIB_CALL FL2_createDCtx(void);
FL2LIB_API size_t FL2LIB_CALL FL2_freeDCtx(FL2_DCtx* dctx);
/*! FL2_decompressDCtx() :
* Same as FL2_decompress(), requires an allocated FL2_DCtx (see FL2_createDCtx()) */
FL2LIB_API size_t FL2LIB_CALL FL2_decompressDCtx(FL2_DCtx* ctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
/****************************
* Streaming
****************************/
typedef struct FL2_inBuffer_s {
const void* src; /**< start of input buffer */
size_t size; /**< size of input buffer */
size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
} FL2_inBuffer;
typedef struct FL2_outBuffer_s {
void* dst; /**< start of output buffer */
size_t size; /**< size of output buffer */
size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
} FL2_outBuffer;
/*-***********************************************************************
* Streaming compression - HowTo
*
* A FL2_CStream object is required to track streaming operation.
* Use FL2_createCStream() and FL2_freeCStream() to create/release resources.
* FL2_CStream objects can be reused multiple times on consecutive compression operations.
* It is recommended to re-use FL2_CStream in situations where many streaming operations will be achieved consecutively,
* since it will play nicer with system's memory, by re-using already allocated memory.
*
* Start a new compression by initializing FL2_CStream.
* Use FL2_initCStream() to start a new compression operation.
*
* Use FL2_compressStream() repetitively to consume input stream.
* The function will automatically update both `pos` fields.
* It will always consume the entire input unless an error occurs,
* unlike the decompression function.
* @return : a size hint - remaining capacity to fill before compression occurs,
* or an error code, which can be tested using FL2_isError().
* Note : it's just a hint, any other value will work fine.
*
* At any moment, it's possible, but not recommended, to flush whatever data remains
* within internal buffer using FL2_flushStream().
* `output->pos` will be updated.
* Note 1 : this will reduce compression ratio because the algorithm is block-based.
* Note 2 : some content might still be left within internal buffers if `output->size` is too small.
* @return : nb of bytes still present within internal buffers (0 if they're empty)
* or an error code, which can be tested using FL2_isError().
*
* FL2_endStream() instructs to finish a frame.
* It will perform a flush and write the LZMA2 termination byte (required).
* FL2_endStream() may not be able to flush full data if `output->size` is too small.
* In which case, call again FL2_endStream() to complete the flush.
* @return : 0 if stream fully completed and flushed,
* or >0 to indicate the nb of bytes still present within the internal buffers,
* or an error code, which can be tested using FL2_isError().
*
* *******************************************************************/
typedef struct FL2_CStream_s FL2_CStream;
/*===== FL2_CStream management functions =====*/
FL2LIB_API FL2_CStream* FL2LIB_CALL FL2_createCStream(void);
FL2LIB_API size_t FL2LIB_CALL FL2_freeCStream(FL2_CStream* fcs);
/*===== Streaming compression functions =====*/
FL2LIB_API size_t FL2LIB_CALL FL2_initCStream(FL2_CStream* fcs, int compressionLevel);
FL2LIB_API size_t FL2LIB_CALL FL2_compressStream(FL2_CStream* fcs, FL2_outBuffer* output, FL2_inBuffer* input);
FL2LIB_API size_t FL2LIB_CALL FL2_flushStream(FL2_CStream* fcs, FL2_outBuffer* output);
FL2LIB_API size_t FL2LIB_CALL FL2_endStream(FL2_CStream* fcs, FL2_outBuffer* output);
/*-***************************************************************************
* Streaming decompression - HowTo
*
* A FL2_DStream object is required to track streaming operations.
* Use FL2_createDStream() and FL2_freeDStream() to create/release resources.
* FL2_DStream objects can be re-used multiple times.
*
* Use FL2_initDStream() to start a new decompression operation.
* @return : recommended first input size
*
* Use FL2_decompressStream() repetitively to consume your input.
* The function will update both `pos` fields.
* If `input.pos < input.size`, some input has not been consumed.
* It's up to the caller to present again remaining data.
* More data must be loaded if `input.pos + LZMA_REQUIRED_INPUT_MAX >= input.size`
* If `output.pos < output.size`, decoder has flushed everything it could.
* @return : 0 when a frame is completely decoded and fully flushed,
* an error code, which can be tested using FL2_isError(),
* 1, which means there is still some decoding to do to complete current frame.
* *******************************************************************************/
#define LZMA_REQUIRED_INPUT_MAX 20
typedef struct FL2_DStream_s FL2_DStream;
/*===== FL2_DStream management functions =====*/
FL2LIB_API FL2_DStream* FL2LIB_CALL FL2_createDStream(void);
FL2LIB_API size_t FL2LIB_CALL FL2_freeDStream(FL2_DStream* fds);
/*===== Streaming decompression functions =====*/
FL2LIB_API size_t FL2LIB_CALL FL2_initDStream(FL2_DStream* fds);
FL2LIB_API size_t FL2LIB_CALL FL2_decompressStream(FL2_DStream* fds, FL2_outBuffer* output, FL2_inBuffer* input);
/*-***************************************************************************
* Compression parameters - HowTo
*
* Any function that takes a 'compressionLevel' parameter will replace any
* parameters affected by compression level that are already set.
* Call FL2_CCtx_setParameter with FL2_p_compressionLevel to set the level,
* then call FL2_CCtx_setParameter again with any other settings to change.
* Specify compressionLevel=0 when calling a compression function.
* *******************************************************************************/
#define FL2_DICTLOG_MAX_32 27
#define FL2_DICTLOG_MAX_64 30
#define FL2_DICTLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? FL2_DICTLOG_MAX_32 : FL2_DICTLOG_MAX_64))
#define FL2_DICTLOG_MIN 20
#define FL2_CHAINLOG_MAX 14
#define FL2_CHAINLOG_MIN 4
#define FL2_SEARCHLOG_MAX (FL2_CHAINLOG_MAX-1)
#define FL2_SEARCHLOG_MIN 0
#define FL2_FASTLENGTH_MIN 6 /* only used by optimizer */
#define FL2_FASTLENGTH_MAX 273 /* only used by optimizer */
#define FL2_BLOCK_OVERLAP_MIN 0
#define FL2_BLOCK_OVERLAP_MAX 14
#define FL2_BLOCK_LOG_MIN 12
#define FL2_BLOCK_LOG_MAX 32
#define FL2_SEARCH_DEPTH_MIN 6
#define FL2_SEARCH_DEPTH_MAX 254
#define FL2_BUFFER_SIZE_LOG_MIN 6
#define FL2_BUFFER_SIZE_LOG_MAX 12
#define FL2_LC_MIN 0
#define FL2_LC_MAX 4
#define FL2_LP_MIN 0
#define FL2_LP_MAX 4
#define FL2_PB_MIN 0
#define FL2_PB_MAX 4
typedef enum {
/* compression parameters */
FL2_p_compressionLevel, /* Update all compression parameters according to pre-defined cLevel table
* Default level is FL2_CLEVEL_DEFAULT==9.
* Setting FL2_p_highCompression to 1 switches to an alternate cLevel table.
* Special: value 0 means "do not change cLevel". */
FL2_p_highCompression, /* Maximize compression ratio for a given dictionary size.
* Has 9 levels instead of 12, with dictionaryLog 20 - 28. */
FL2_p_7zLevel, /* For use by the 7-zip fork employing this library. 1 - 9 */
FL2_p_dictionaryLog, /* Maximum allowed back-reference distance, expressed as power of 2.
* Must be clamped between FL2_DICTLOG_MIN and FL2_DICTLOG_MAX.
* Special: value 0 means "do not change dictionaryLog". */
FL2_p_overlapFraction, /* The radix match finder is block-based, so some overlap is retained from
* each block to improve compression of the next. This value is expressed
* as n / 16 of the block size (dictionary size). Larger values are slower.
* Values above 2 mostly yield only a small improvement in compression. */
FL2_p_blockSize,
FL2_p_bufferLog, /* Buffering speeds up the matchfinder. Buffer size is
* 2 ^ (dictionaryLog - bufferLog). Lower number = slower, better compression,
* higher memory usage. */
FL2_p_chainLog, /* Size of the full-search table, as a power of 2.
* Resulting table size is (1 << (chainLog+2)).
* Larger tables result in better and slower compression.
* This parameter is useless when using "fast" strategy.
* Special: value 0 means "do not change chainLog". */
FL2_p_searchLog, /* Number of search attempts, as a power of 2, made by the HC3 match finder
* used only in hybrid mode.
* More attempts result in slightly better and slower compression.
* This parameter is not used by the "fast" and "optimize" strategies.
* Special: value 0 means "do not change searchLog". */
FL2_p_literalCtxBits, /* lc value for LZMA2 encoder */
FL2_p_literalPosBits, /* lp value for LZMA2 encoder */
FL2_p_posBits, /* pb value for LZMA2 encoder */
FL2_p_searchDepth, /* Match finder will resolve string matches up to this length. If a longer
* match exists further back in the input, it will not be found. */
FL2_p_fastLength, /* Only useful for strategies >= opt.
* Length of Match considered "good enough" to stop search.
* Larger values make compression stronger and slower.
* Special: value 0 means "do not change fastLength". */
FL2_p_divideAndConquer, /* Split long chains of 2-byte matches into shorter chains with a small overlap
* during further processing. Allows buffering of all chains at length 2.
* Faster, less compression. Generally a good tradeoff. Enabled by default. */
FL2_p_strategy, /* 1 = fast; 2 = optimize, 3 = ultra (hybrid mode).
* The higher the value of the selected strategy, the more complex it is,
* resulting in stronger and slower compression.
* Special: value 0 means "do not change strategy". */
#ifndef NO_XXHASH
FL2_p_doXXHash, /* Calculate a 32-bit xxhash value from the input data and store it
* after the stream terminator. The value will be checked on decompression.
* 0 = do not calculate; 1 = calculate (default) */
#endif
FL2_p_omitProperties, /* Omit the property byte at the start of the stream. For use within 7-zip */
/* or other containers which store the property byte elsewhere. */
/* Cannot be decoded by this library. */
#ifdef RMF_REFERENCE
FL2_p_useReferenceMF /* Use the reference matchfinder for development purposes. SLOW. */
#endif
} FL2_cParameter;
/*! FL2_CCtx_setParameter() :
* Set one compression parameter, selected by enum FL2_cParameter.
* @result : informational value (typically, the one being set, possibly corrected),
* or an error code (which can be tested with FL2_isError()). */
FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_setParameter(FL2_CCtx* cctx, FL2_cParameter param, unsigned value);
FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cParameter param, unsigned value);
/***************************************
* Context memory usage
***************************************/
/*! FL2_estimate*() :
* These functions estimate memory usage of a CCtx before its creation or before any operation has begun.
* FL2_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
* To use FL2_estimateCCtxSize_usingCCtx, set the compression level and any other settings for the context,
* then call the function. Some allocation occurs when the context is created, but the large memory buffers
* used for string matching are allocated only when compression begins. */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize(int compressionLevel, unsigned nbThreads); /*!< memory usage determined by level */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize_usingCCtx(const FL2_CCtx* cctx); /*!< memory usage determined by settings */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize(int compressionLevel, unsigned nbThreads);
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_usingCCtx(const FL2_CStream* fcs);
#endif /* FAST_LZMA2_H */
#if defined (__cplusplus)
}
#endif