Update to Fast LZMA2 1.0.0

This commit is contained in:
conor42
2019-03-18 00:05:50 +10:00
parent f531a44f1c
commit d85962e654
43 changed files with 5467 additions and 3943 deletions

View File

@@ -1,6 +1,7 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
* Modified for FL2 by Conor McCarthy
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
@@ -8,13 +9,15 @@
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H
#ifndef FL2_COMPILER_H
#define FL2_COMPILER_H
/*-*******************************************************
* Compiler specifics
*********************************************************/
/* force inlining */
#if !defined(FL2_NO_INLINE)
#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# define INLINE_KEYWORD inline
#else
@@ -29,6 +32,13 @@
# define FORCE_INLINE_ATTR
#endif
#else
#define INLINE_KEYWORD
#define FORCE_INLINE_ATTR
#endif
/**
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
* parameters. They must be inlined for the compiler to eliminate the constant
@@ -54,24 +64,69 @@
/* force no inlining */
#ifdef _MSC_VER
# define FORCE_NOINLINE static __declspec(noinline)
# define FORCE_NOINLINE __declspec(noinline)
#else
# ifdef __GNUC__
# define FORCE_NOINLINE static __attribute__((__noinline__))
# define FORCE_NOINLINE __attribute__((__noinline__))
# else
# define FORCE_NOINLINE static
# define FORCE_NOINLINE
# endif
#endif
/* prefetch */
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
#elif defined(__GNUC__)
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
#else
# define PREFETCH(ptr) /* disabled */
/* target attribute */
#ifndef __has_attribute
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
#endif
#if defined(__GNUC__)
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
#else
# define TARGET_ATTRIBUTE(target)
#endif
/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/
#ifndef DYNAMIC_BMI2
#if ((defined(__clang__) && __has_attribute(__target__)) \
|| (defined(__GNUC__) \
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
&& (defined(__x86_64__) || defined(_M_X86)) \
&& !defined(__BMI2__)
# define DYNAMIC_BMI2 1
#else
# define DYNAMIC_BMI2 0
#endif
#endif
/* prefetch
* can be disabled, by declaring NO_PREFETCH build macro */
#if defined(NO_PREFETCH)
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
#else
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
# else
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
# endif
#endif /* NO_PREFETCH */
#define CACHELINE_SIZE 64
#define PREFETCH_AREA(p, s) { \
const char* const _ptr = (const char*)(p); \
size_t const _size = (size_t)(s); \
size_t _pos; \
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
PREFETCH_L2(_ptr + _pos); \
} \
}
/* disable warnings */
#ifdef _MSC_VER /* Visual Studio */
@@ -83,4 +138,4 @@
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
#endif
#endif /* ZSTD_COMPILER_H */
#endif /* FL2_COMPILER_H */

View File

@@ -1,3 +1,13 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef ZSTD_COUNT_H_
#define ZSTD_COUNT_H_
@@ -86,7 +96,7 @@ static unsigned ZSTD_NbCommonBytes(register size_t val)
}
MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
{
const BYTE* const pStart = pIn;
const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t) - 1);

230
C/fast-lzma2/dict_buffer.c Normal file
View File

@@ -0,0 +1,230 @@
/*
* Copyright (c) 2019, Conor McCarthy
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include <stdlib.h>
#include "dict_buffer.h"
#include "fl2_internal.h"
#define ALIGNMENT_SIZE 16U
#define ALIGNMENT_MASK (~(size_t)(ALIGNMENT_SIZE-1))
/* DICT_buffer functions */
int DICT_construct(DICT_buffer * const buf, int const async)
{
buf->data[0] = NULL;
buf->data[1] = NULL;
buf->size = 0;
buf->async = (async != 0);
#ifndef NO_XXHASH
buf->xxh = NULL;
#endif
return 0;
}
int DICT_init(DICT_buffer * const buf, size_t const dict_size, size_t const overlap, unsigned const reset_multiplier, int const do_hash)
{
/* Allocate if not yet allocated or existing dict too small */
if (buf->data[0] == NULL || dict_size > buf->size) {
/* Free any existing buffers */
DICT_destruct(buf);
buf->data[0] = malloc(dict_size);
buf->data[1] = NULL;
if (buf->async)
buf->data[1] = malloc(dict_size);
if (buf->data[0] == NULL || (buf->async && buf->data[1] == NULL)) {
DICT_destruct(buf);
return 1;
}
}
buf->index = 0;
buf->overlap = overlap;
buf->start = 0;
buf->end = 0;
buf->size = dict_size;
buf->total = 0;
buf->reset_interval = (reset_multiplier != 0) ? dict_size * reset_multiplier : ((size_t)1 << 31);
#ifndef NO_XXHASH
if (do_hash) {
if (buf->xxh == NULL) {
buf->xxh = XXH32_createState();
if (buf->xxh == NULL) {
DICT_destruct(buf);
return 1;
}
}
XXH32_reset(buf->xxh, 0);
}
else {
XXH32_freeState(buf->xxh);
buf->xxh = NULL;
}
#else
(void)do_hash;
#endif
return 0;
}
void DICT_destruct(DICT_buffer * const buf)
{
free(buf->data[0]);
free(buf->data[1]);
buf->data[0] = NULL;
buf->data[1] = NULL;
buf->size = 0;
#ifndef NO_XXHASH
XXH32_freeState(buf->xxh);
buf->xxh = NULL;
#endif
}
size_t DICT_size(const DICT_buffer * const buf)
{
return buf->size;
}
/* Get the dictionary buffer for adding input */
size_t DICT_get(DICT_buffer * const buf, void **const dict)
{
DICT_shift(buf);
DEBUGLOG(5, "Getting dict buffer %u, pos %u, avail %u", (unsigned)buf->index, (unsigned)buf->end, (unsigned)(buf->size - buf->end));
*dict = buf->data[buf->index] + buf->end;
return buf->size - buf->end;
}
/* Update with the amount added */
int DICT_update(DICT_buffer * const buf, size_t const added_size)
{
DEBUGLOG(5, "Added %u bytes to dict buffer %u", (unsigned)added_size, (unsigned)buf->index);
buf->end += added_size;
assert(buf->end <= buf->size);
return !DICT_availSpace(buf);
}
/* Read from input and write to the dict */
void DICT_put(DICT_buffer * const buf, FL2_inBuffer * const input)
{
size_t const to_read = MIN(buf->size - buf->end, input->size - input->pos);
DEBUGLOG(5, "CStream : reading %u bytes", (U32)to_read);
memcpy(buf->data[buf->index] + buf->end, (BYTE*)input->src + input->pos, to_read);
input->pos += to_read;
buf->end += to_read;
}
size_t DICT_availSpace(const DICT_buffer * const buf)
{
return buf->size - buf->end;
}
/* Get the size of uncompressed data. start is set to end after compression */
int DICT_hasUnprocessed(const DICT_buffer * const buf)
{
return buf->start < buf->end;
}
/* Get the buffer, overlap and end for compression */
void DICT_getBlock(DICT_buffer * const buf, FL2_dataBlock * const block)
{
block->data = buf->data[buf->index];
block->start = buf->start;
block->end = buf->end;
#ifndef NO_XXHASH
if (buf->xxh != NULL)
XXH32_update(buf->xxh, buf->data[buf->index] + buf->start, buf->end - buf->start);
#endif
buf->total += buf->end - buf->start;
buf->start = buf->end;
}
/* Shift occurs when all is processed and end is beyond the overlap size */
int DICT_needShift(DICT_buffer * const buf)
{
if (buf->start < buf->end)
return 0;
/* Reset the dict if the next compression cycle would exceed the reset interval */
size_t overlap = (buf->total + buf->size - buf->overlap > buf->reset_interval) ? 0 : buf->overlap;
return buf->start == buf->end && (overlap == 0 || buf->end >= overlap + ALIGNMENT_SIZE);
}
int DICT_async(const DICT_buffer * const buf)
{
return (int)buf->async;
}
/* Shift the overlap amount to the start of either the only dict buffer or the alternate one
* if it exists */
void DICT_shift(DICT_buffer * const buf)
{
if (buf->start < buf->end)
return;
size_t overlap = buf->overlap;
/* Reset the dict if the next compression cycle would exceed the reset interval */
if (buf->total + buf->size - buf->overlap > buf->reset_interval) {
DEBUGLOG(4, "Resetting dictionary after %u bytes", (unsigned)buf->total);
overlap = 0;
}
if (overlap == 0) {
/* No overlap means a simple buffer switch */
buf->start = 0;
buf->end = 0;
buf->index ^= buf->async;
buf->total = 0;
}
else if (buf->end >= overlap + ALIGNMENT_SIZE) {
size_t const from = (buf->end - overlap) & ALIGNMENT_MASK;
const BYTE *const src = buf->data[buf->index];
/* Copy to the alternate if one exists */
BYTE *const dst = buf->data[buf->index ^ buf->async];
overlap = buf->end - from;
if (overlap <= from || dst != src) {
DEBUGLOG(5, "Copy overlap data : %u bytes from %u", (unsigned)overlap, (unsigned)from);
memcpy(dst, src + from, overlap);
}
else if (from != 0) {
DEBUGLOG(5, "Move overlap data : %u bytes from %u", (unsigned)overlap, (unsigned)from);
memmove(dst, src + from, overlap);
}
/* New data will be written after the overlap */
buf->start = overlap;
buf->end = overlap;
/* Switch buffers */
buf->index ^= buf->async;
}
}
#ifndef NO_XXHASH
XXH32_hash_t DICT_getDigest(const DICT_buffer * const buf)
{
return XXH32_digest(buf->xxh);
}
#endif
size_t DICT_memUsage(const DICT_buffer * const buf)
{
return (1 + buf->async) * buf->size;
}

View File

@@ -0,0 +1,81 @@
/*
* Copyright (c) 2018, Conor McCarthy
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "fast-lzma2.h"
#include "mem.h"
#include "data_block.h"
#ifndef NO_XXHASH
# include "xxhash.h"
#endif
#ifndef FL2_DICT_BUFFER_H_
#define FL2_DICT_BUFFER_H_
#if defined (__cplusplus)
extern "C" {
#endif
/* DICT_buffer structure.
* Maintains one or two dictionary buffers. In a dual dict configuration (asyc==1), when the
* current buffer is full, the overlap region will be copied to the other buffer and it
* becomes the destination for input while the first is compressed. This is useful when I/O
* is much slower than compression. */
typedef struct {
BYTE* data[2];
size_t index;
size_t async;
size_t overlap;
size_t start; /* start = 0 (first block) or overlap */
size_t end; /* never < overlap */
size_t size; /* allocation size */
size_t total; /* total size compressed after last dict reset */
size_t reset_interval;
#ifndef NO_XXHASH
XXH32_state_t *xxh;
#endif
} DICT_buffer;
int DICT_construct(DICT_buffer *const buf, int const async);
int DICT_init(DICT_buffer *const buf, size_t const dict_size, size_t const overlap, unsigned const reset_multiplier, int const do_hash);
void DICT_destruct(DICT_buffer *const buf);
size_t DICT_size(const DICT_buffer *const buf);
size_t DICT_get(DICT_buffer *const buf, void **const dict);
int DICT_update(DICT_buffer *const buf, size_t const added_size);
void DICT_put(DICT_buffer *const buf, FL2_inBuffer* const input);
size_t DICT_availSpace(const DICT_buffer *const buf);
int DICT_hasUnprocessed(const DICT_buffer *const buf);
void DICT_getBlock(DICT_buffer *const buf, FL2_dataBlock *const block);
int DICT_needShift(DICT_buffer *const buf);
int DICT_async(const DICT_buffer *const buf);
void DICT_shift(DICT_buffer *const buf);
#ifndef NO_XXHASH
XXH32_hash_t DICT_getDigest(const DICT_buffer *const buf);
#endif
size_t DICT_memUsage(const DICT_buffer *const buf);
#if defined (__cplusplus)
}
#endif
#endif /* FL2_DICT_BUFFER_H_ */

View File

@@ -53,9 +53,9 @@ Introduction
*********************************************************************************************************/
/*------ Version ------*/
#define FL2_VERSION_MAJOR 0
#define FL2_VERSION_MINOR 9
#define FL2_VERSION_RELEASE 2
#define FL2_VERSION_MAJOR 1
#define FL2_VERSION_MINOR 0
#define FL2_VERSION_RELEASE 0
#define FL2_VERSION_NUMBER (FL2_VERSION_MAJOR *100*100 + FL2_VERSION_MINOR *100 + FL2_VERSION_RELEASE)
FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void); /**< useful to check dll version */
@@ -67,12 +67,13 @@ FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void); /**< useful to check
FL2LIB_API const char* FL2LIB_CALL FL2_versionString(void);
#define FL2_MAXTHREADS 200
/***************************************
* Simple API
***************************************/
#define FL2_MAXTHREADS 200
/*! FL2_compress() :
* Compresses `src` content as a single LZMA2 compressed stream into already allocated `dst`.
* Call FL2_compressMt() to use > 1 thread. Specify nbThreads = 0 to use all cores.
@@ -88,20 +89,30 @@ FL2LIB_API size_t FL2LIB_CALL FL2_compressMt(void* dst, size_t dstCapacity,
unsigned nbThreads);
/*! FL2_decompress() :
* `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
* `dstCapacity` is an upper bound of originalSize to regenerate.
* If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
* Decompresses a single LZMA2 compressed stream from `src` into already allocated `dst`.
* `compressedSize` : must be at least the size of the LZMA2 stream.
* `dstCapacity` is the original, uncompressed size to regenerate, returned by calling
* FL2_findDecompressedSize().
* Call FL2_decompressMt() to use > 1 thread. Specify nbThreads = 0 to use all cores. The stream
* must contain dictionary resets to use multiple threads. These are inserted during compression by
* default. The frequency can be changed/disabled with the FL2_p_resetInterval parameter setting.
* @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
* or an errorCode if it fails (which can be tested using FL2_isError()). */
FL2LIB_API size_t FL2LIB_CALL FL2_decompress(void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
FL2LIB_API size_t FL2LIB_CALL FL2_decompressMt(void* dst, size_t dstCapacity,
const void* src, size_t compressedSize,
unsigned nbThreads);
/*! FL2_findDecompressedSize()
* `src` should point to the start of a LZMA2 encoded stream.
* `srcSize` must be at least as large as the LZMA2 stream including end marker.
* A property byte is assumed to exist at position 0 in `src`. If the stream was created without one,
* subtract 1 byte from `src` when passing it to the function.
* @return : - decompressed size of the stream in `src`, if known
* - FL2_CONTENTSIZE_ERROR if an error occurred (e.g. corruption, srcSize too small)
* note 1 : a 0 return value means the frame is valid but "empty".
* note 1 : a 0 return value means the stream is valid but "empty".
* note 2 : decompressed size can be very large (64-bits value),
* potentially larger than what local system can handle as a single memory segment.
* In which case, it's necessary to use streaming mode to decompress data.
@@ -109,122 +120,80 @@ FL2LIB_API size_t FL2LIB_CALL FL2_decompress(void* dst, size_t dstCapacity,
* Always ensure return value fits within application's authorized limits.
* Each application can set its own limits. */
#define FL2_CONTENTSIZE_ERROR (size_t)-1
FL2LIB_API size_t FL2LIB_CALL FL2_findDecompressedSize(const void *src, size_t srcSize);
FL2LIB_API unsigned long long FL2LIB_CALL FL2_findDecompressedSize(const void *src, size_t srcSize);
/*====== Helper functions ======*/
#define FL2_COMPRESSBOUND(srcSize) ((srcSize) + (((srcSize) + 0xFFF) / 0x1000) * 3 + 6) /* this formula calculates the maximum size of data stored in uncompressed chunks */
#define FL2_COMPRESSBOUND(srcSize) ((srcSize) + (((srcSize) + 0xFFF) / 0x1000) * 3 + 6) /*!< calculates the maximum size of data stored in a sequence of uncompressed chunks */
FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
FL2LIB_API unsigned FL2LIB_CALL FL2_isTimedOut(size_t code); /*!< tells if a `size_t` function result is the timeout code */
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code); /*!< provides readable string from an error code */
FL2LIB_API int FL2LIB_CALL FL2_maxCLevel(void); /*!< maximum compression level available */
FL2LIB_API int FL2LIB_CALL FL2_maxHighCLevel(void); /*!< maximum compression level available in high mode */
/***************************************
* Explicit memory management
***************************************/
/*= Compression context
* When compressing many times,
* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
* This will make workload friendlier for system's memory.
* The context may not use the number of threads requested if the library is compiled for single-threaded
* compression or nbThreads > FL2_MAXTHREADS. Call FL2_CCtx_nbThreads to obtain the actual number. */
* When compressing many times, it is recommended to allocate a context just once,
* and re-use it for each successive compression operation. This will make workload
* friendlier for system's memory. The context may not use the number of threads requested
* if the library is compiled for single-threaded compression or nbThreads > FL2_MAXTHREADS.
* Call FL2_getCCtxThreadCount to obtain the actual number allocated. */
typedef struct FL2_CCtx_s FL2_CCtx;
FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtx(void);
FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtxMt(unsigned nbThreads);
FL2LIB_API void FL2LIB_CALL FL2_freeCCtx(FL2_CCtx* cctx);
FL2LIB_API unsigned FL2LIB_CALL FL2_CCtx_nbThreads(const FL2_CCtx* ctx);
FL2LIB_API unsigned FL2LIB_CALL FL2_getCCtxThreadCount(const FL2_CCtx* cctx);
/*! FL2_compressCCtx() :
* Same as FL2_compress(), requires an allocated FL2_CCtx (see FL2_createCCtx()). */
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtx(FL2_CCtx* ctx,
* Same as FL2_compress(), but requires an allocated FL2_CCtx (see FL2_createCCtx()). */
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtx(FL2_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
/************************************************
* Caller-managed data buffer and overlap section
************************************************/
typedef struct {
unsigned char *data;
size_t start; /* start = 0 (first block) or overlap */
size_t end; /* never < overlap */
size_t bufSize; /* allocation size */
} FL2_blockBuffer;
typedef int (FL2LIB_CALL *FL2_progressFn)(size_t done, void* opaque);
/* Get the size of the overlap section. */
FL2LIB_API size_t FL2LIB_CALL FL2_blockOverlap(const FL2_CCtx* ctx);
/* Copy the overlap section to the start to prepare for more data */
FL2LIB_API void FL2LIB_CALL FL2_shiftBlock(FL2_CCtx* ctx, FL2_blockBuffer *block);
/* Copy the overlap to a different buffer. This allows a dual-buffer configuration where
* data is read into one block while the other is compressed. */
FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(FL2_CCtx* ctx, FL2_blockBuffer *block, unsigned char *dst);
FL2LIB_API void FL2LIB_CALL FL2_beginFrame(FL2_CCtx* const cctx);
/*! FL2_compressCCtxBlock() :
* Same as FL2_compressCCtx except the caller is responsible for supplying an overlap section.
* The FL2_p_overlapFraction parameter will not be used.
* srcStart + srcSize should equal the dictionary size except on the last call.
* Can be called multiple times. FL2_endFrame() must be called when finished.
* For compatibility with this library the caller must write a property byte at
* the beginning of the output. Obtain it by calling FL2_dictSizeProp() before
* compressing the first block or after the last. No hash will be written, but
* the caller can calculate it using the interface in xxhash.h, write it at the end,
* and set bit 7 in the property byte. */
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock(FL2_CCtx* ctx,
void* dst, size_t dstCapacity,
const FL2_blockBuffer *block,
FL2_progressFn progress, void* opaque);
/*! FL2_endFrame() :
* Write the end marker to terminate the LZMA2 stream.
* Must be called after compressing with FL2_compressCCtxBlock() */
FL2LIB_API size_t FL2LIB_CALL FL2_endFrame(FL2_CCtx* ctx,
void* dst, size_t dstCapacity);
typedef int (FL2LIB_CALL *FL2_writerFn)(const void* src, size_t srcSize, void* opaque);
/*! FL2_compressCCtxBlock_toFn() :
* Same as FL2_compressCCtx except the caller is responsible for supplying an
* overlap section, and compressed data is written to a callback function.
* The FL2_p_overlapFraction parameter will not be used.
* Can be called multiple times. FL2_endFrame_toFn() must be called when finished. */
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock_toFn(FL2_CCtx* ctx,
FL2_writerFn writeFn, void* opaque,
const FL2_blockBuffer *block,
FL2_progressFn progress);
/*! FL2_endFrame() :
* Write the end marker to a callback function to terminate the LZMA2 stream.
* Must be called after compressing with FL2_compressCCtxBlock_toFn() */
FL2LIB_API size_t FL2LIB_CALL FL2_endFrame_toFn(FL2_CCtx* ctx,
FL2_writerFn writeFn, void* opaque);
/*! FL2_dictSizeProp() :
/*! FL2_getCCtxDictProp() :
* Get the dictionary size property.
* Intended for use with the FL2_p_omitProperties parameter for creating a
* 7-zip compatible LZMA2 stream. */
FL2LIB_API unsigned char FL2LIB_CALL FL2_dictSizeProp(FL2_CCtx* ctx);
* 7-zip or XZ compatible LZMA2 stream. */
FL2LIB_API unsigned char FL2LIB_CALL FL2_getCCtxDictProp(FL2_CCtx* cctx);
/****************************
* Decompression
****************************/
/*= Decompression context
* When decompressing many times,
* it is recommended to allocate a context only once,
* and re-use it for each successive compression operation.
* This will make the workload friendlier for the system's memory.
* Use one context per thread for parallel execution. */
typedef struct CLzma2Dec_s FL2_DCtx;
* When decompressing many times, it is recommended to allocate a context only once,
* and re-use it for each successive decompression operation. This will make the workload
* friendlier for the system's memory.
* The context may not allocate the number of threads requested if the library is
* compiled for single-threaded compression or nbThreads > FL2_MAXTHREADS.
* Call FL2_getDCtxThreadCount to obtain the actual number allocated.
* At least nbThreads dictionary resets must exist in the stream to use all of the
* threads. Dictionary resets are inserted into the stream according to the
* FL2_p_resetInterval parameter used in the compression context. */
typedef struct FL2_DCtx_s FL2_DCtx;
FL2LIB_API FL2_DCtx* FL2LIB_CALL FL2_createDCtx(void);
FL2LIB_API FL2_DCtx* FL2LIB_CALL FL2_createDCtxMt(unsigned nbThreads);
FL2LIB_API size_t FL2LIB_CALL FL2_freeDCtx(FL2_DCtx* dctx);
FL2LIB_API unsigned FL2LIB_CALL FL2_getDCtxThreadCount(const FL2_DCtx* dctx);
/*! FL2_initDCtx() :
* Use only when a property byte is not present at input byte 0. No init is necessary otherwise.
* The caller must store the result from FL2_getCCtxDictProp() and pass it to this function. */
FL2LIB_API size_t FL2LIB_CALL FL2_initDCtx(FL2_DCtx* dctx, unsigned char prop);
/*! FL2_decompressDCtx() :
* Same as FL2_decompress(), requires an allocated FL2_DCtx (see FL2_createDCtx()) */
FL2LIB_API size_t FL2LIB_CALL FL2_decompressDCtx(FL2_DCtx* ctx,
FL2LIB_API size_t FL2LIB_CALL FL2_decompressDCtx(FL2_DCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
@@ -232,90 +201,180 @@ FL2LIB_API size_t FL2LIB_CALL FL2_decompressDCtx(FL2_DCtx* ctx,
* Streaming
****************************/
typedef struct FL2_inBuffer_s {
typedef struct {
const void* src; /**< start of input buffer */
size_t size; /**< size of input buffer */
size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
} FL2_inBuffer;
typedef struct FL2_outBuffer_s {
typedef struct {
void* dst; /**< start of output buffer */
size_t size; /**< size of output buffer */
size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
} FL2_outBuffer;
/*** Push/pull structs ***/
typedef struct {
void* dst; /**< start of available dict buffer */
unsigned long size; /**< size of dict remaining */
} FL2_dictBuffer;
typedef struct {
const void* src; /**< start of compressed data */
size_t size; /**< size of compressed data */
} FL2_cBuffer;
/*-***********************************************************************
* Streaming compression - HowTo
* Streaming compression
*
* A FL2_CStream object is required to track streaming operation.
* Use FL2_createCStream() and FL2_freeCStream() to create/release resources.
* FL2_CStream objects can be reused multiple times on consecutive compression operations.
* It is recommended to re-use FL2_CStream in situations where many streaming operations will be achieved consecutively,
* since it will play nicer with system's memory, by re-using already allocated memory.
* It is recommended to re-use FL2_CStream in situations where many streaming operations will be done
* consecutively, since it will reduce allocation and initialization time.
*
* Start a new compression by initializing FL2_CStream.
* Use FL2_initCStream() to start a new compression operation.
* Call FL2_createCStreamMt() with a nonzero dualBuffer parameter to use two input dictionary buffers.
* The stream will not block on FL2_compressStream() and continues to accept data while compression is
* underway, until both buffers are full. Useful when I/O is slow.
* To compress with a single thread with dual buffering, call FL2_createCStreamMt with nbThreads=1.
*
* Use FL2_initCStream() on the FL2_CStream object to start a new compression operation.
*
* Use FL2_compressStream() repetitively to consume input stream.
* The function will automatically update both `pos` fields.
* It will always consume the entire input unless an error occurs,
* The function will automatically update the `pos` field.
* It will always consume the entire input unless an error occurs or the dictionary buffer is filled,
* unlike the decompression function.
* @return : a size hint - remaining capacity to fill before compression occurs,
* or an error code, which can be tested using FL2_isError().
* Note : it's just a hint, any other value will work fine.
*
* At any moment, it's possible, but not recommended, to flush whatever data remains
* within internal buffer using FL2_flushStream().
* `output->pos` will be updated.
* Note 1 : this will reduce compression ratio because the algorithm is block-based.
* Note 2 : some content might still be left within internal buffers if `output->size` is too small.
* @return : nb of bytes still present within internal buffers (0 if they're empty)
* or an error code, which can be tested using FL2_isError().
* The radix match finder allows compressed data to be stored in its match table during encoding.
* Applications may call streaming compression functions with output == NULL. In this case,
* when the function returns 1, the compressed data must be read from the internal buffers.
* Call FL2_getNextCStreamBuffer() repeatedly until it returns 0.
* Each call returns buffer information in the FL2_inBuffer parameter. Applications typically will
* passed this to an I/O write function or downstream filter.
* Alternately, applications may pass an FL2_outBuffer object pointer to receive the output. In this
* case the return value is 1 if the buffer is full and more compressed data remains.
*
* FL2_endStream() instructs to finish a frame.
* It will perform a flush and write the LZMA2 termination byte (required).
* FL2_endStream() may not be able to flush full data if `output->size` is too small.
* In which case, call again FL2_endStream() to complete the flush.
* @return : 0 if stream fully completed and flushed,
* or >0 to indicate the nb of bytes still present within the internal buffers,
* or an error code, which can be tested using FL2_isError().
* FL2_endStream() instructs to finish a stream. It will perform a flush and write the LZMA2
* termination byte (required). Call FL2_endStream() repeatedly until it returns 0.
*
* Most functions may return a size_t error code, which can be tested using FL2_isError().
*
* *******************************************************************/
typedef struct FL2_CStream_s FL2_CStream;
typedef struct FL2_CCtx_s FL2_CStream;
/*===== FL2_CStream management functions =====*/
FL2LIB_API FL2_CStream* FL2LIB_CALL FL2_createCStream(void);
FL2LIB_API size_t FL2LIB_CALL FL2_freeCStream(FL2_CStream* fcs);
FL2LIB_API FL2_CStream* FL2LIB_CALL FL2_createCStreamMt(unsigned nbThreads, int dualBuffer);
FL2LIB_API void FL2LIB_CALL FL2_freeCStream(FL2_CStream * fcs);
/*===== Streaming compression functions =====*/
FL2LIB_API size_t FL2LIB_CALL FL2_initCStream(FL2_CStream* fcs, int compressionLevel);
FL2LIB_API size_t FL2LIB_CALL FL2_compressStream(FL2_CStream* fcs, FL2_outBuffer* output, FL2_inBuffer* input);
FL2LIB_API size_t FL2LIB_CALL FL2_flushStream(FL2_CStream* fcs, FL2_outBuffer* output);
FL2LIB_API size_t FL2LIB_CALL FL2_endStream(FL2_CStream* fcs, FL2_outBuffer* output);
/*! FL2_initCStream() :
* Call this function before beginning a new compressed data stream. To keep the stream object's
* current parameters, specify zero for the compression level. The object is set to the default
* level upon creation. */
FL2LIB_API size_t FL2LIB_CALL FL2_initCStream(FL2_CStream* fcs, int compressionLevel);
/*! FL2_setCStreamTimeout() :
* Sets a timeout in milliseconds. Zero disables the timeout (default). If a nonzero timout is set, functions
* FL2_compressStream(), FL2_updateDictionary(), FL2_getNextCStreamBuffer(), FL2_flushStream(), and
* FL2_endStream() may return a timeout code before compression of the current dictionary of data
* completes. FL2_isError() returns true for the timeout code, so check the code with FL2_isTimedOut() before
* testing for errors. With the exception of FL2_updateDictionary(), the above functions may be called again
* to wait for completion. A typical application for timeouts is to update the user on compression progress. */
FL2LIB_API size_t FL2LIB_CALL FL2_setCStreamTimeout(FL2_CStream * fcs, unsigned timeout);
/*! FL2_compressStream() :
* Reads data from input into the dictionary buffer. Compression will begin if the buffer fills up.
* A dual buffering stream will fill the second buffer while compression proceeds on the first.
* A call to FL2_compressStream() will wait for ongoing compression to complete if all dictionary space
* is filled. FL2_compressStream() must not be called with output == NULL unless the caller has read all
* compressed data from the CStream object.
* Returns 1 to indicate compressed data must be read (or output is full), or 0 otherwise. */
FL2LIB_API size_t FL2LIB_CALL FL2_compressStream(FL2_CStream* fcs, FL2_outBuffer *output, FL2_inBuffer* input);
/*** Push/pull functions ***/
/*! FL2_getDictionaryBuffer() :
* Returns a buffer in the FL2_outBuffer object, which the caller can directly read data into.
* Applications will normally pass this buffer to an I/O read function or upstream filter.
* Returns 0, or an error or timeout code. */
FL2LIB_API size_t FL2LIB_CALL FL2_getDictionaryBuffer(FL2_CStream* fcs, FL2_dictBuffer* dict);
/*! FL2_updateDictionary() :
* Informs the CStream how much data was added to the buffer. Compression begins if the dictionary
* was filled. Returns 1 to indicate compressed data must be read, 0 if not, or an error code. */
FL2LIB_API size_t FL2LIB_CALL FL2_updateDictionary(FL2_CStream* fcs, size_t addedSize);
/*! FL2_getNextCStreamBuffer() :
* Returns a buffer containing a slice of the compressed data. Call this function and process the data
* until the function returns zero. In most cases it will return a buffer for each compression thread
* used. It is sometimes less but never more than nbThreads. If asynchronous compression is in progress,
* this function will wait for completion before returning, or it will return the timeout code. */
FL2LIB_API size_t FL2LIB_CALL FL2_getNextCStreamBuffer(FL2_CStream* fcs, FL2_cBuffer* cbuf);
/******/
/*! FL2_getCStreamProgress() :
* Returns the number of bytes processed since the stream was initialized. This is a synthetic
* estimate because the match finder does not proceed sequentially through the data. If
* outputSize is not NULL, returns the number of bytes of compressed data generated. */
FL2LIB_API unsigned long long FL2LIB_CALL FL2_getCStreamProgress(const FL2_CStream * fcs, unsigned long long *outputSize);
/*! FL2_waitCStream() :
* Waits for compression to end. This function returns after the timeout set using
* FL2_setCStreamTimeout has elapsed. Unnecessary when no timeout is set.
* Returns 1 if compressed output is available, 0 if not, or the timeout code. */
FL2LIB_API size_t FL2LIB_CALL FL2_waitCStream(FL2_CStream * fcs);
/*! FL2_cancelCStream() :
* Cancels any compression operation underway. Useful only when dual buffering and/or timeouts
* are enabled. The stream will be returned to an uninitialized state. */
FL2LIB_API void FL2LIB_CALL FL2_cancelCStream(FL2_CStream *fcs);
/*! FL2_remainingOutputSize() :
* The amount of compressed data remaining to be read from the CStream object. */
FL2LIB_API size_t FL2LIB_CALL FL2_remainingOutputSize(const FL2_CStream* fcs);
/*! FL2_flushStream() :
* Compress all data remaining in the dictionary buffer(s). It may be necessary to call
* FL2_flushStream() more than once. If output == NULL the compressed data must be read from the
* CStream object after each call.
* Flushing is not normally useful and produces larger output.
* Returns 1 if input or output still exists in the CStream object, 0 if complete, or an error code. */
FL2LIB_API size_t FL2LIB_CALL FL2_flushStream(FL2_CStream* fcs, FL2_outBuffer *output);
/*! FL2_endStream() :
* Compress all data remaining in the dictionary buffer(s) and write the stream end marker. It may
* be necessary to call FL2_endStream() more than once. If output == NULL the compressed data must
* be read from the CStream object after each call.
* Returns 0 when compression is complete and all output has been flushed, 1 if not complete, or
* an error code. */
FL2LIB_API size_t FL2LIB_CALL FL2_endStream(FL2_CStream* fcs, FL2_outBuffer *output);
/*-***************************************************************************
* Streaming decompression - HowTo
* Streaming decompression
*
* A FL2_DStream object is required to track streaming operations.
* Use FL2_createDStream() and FL2_freeDStream() to create/release resources.
* FL2_DStream objects can be re-used multiple times.
*
* Use FL2_initDStream() to start a new decompression operation.
* @return : recommended first input size
* @return : zero or an error code
*
* Use FL2_decompressStream() repetitively to consume your input.
* The function will update both `pos` fields.
* If `input.pos < input.size`, some input has not been consumed.
* It's up to the caller to present again remaining data.
* More data must be loaded if `input.pos + LZMA_REQUIRED_INPUT_MAX >= input.size`
* It's up to the caller to present again the remaining data.
* More data must be loaded if `input.pos + LZMA_REQUIRED_INPUT_MAX >= input.size`. In this case,
* move the remaining input (<= LZMA_REQUIRED_INPUT_MAX bytes) to the start of the buffer and
* load new data after it.
* If `output.pos < output.size`, decoder has flushed everything it could.
* @return : 0 when a frame is completely decoded and fully flushed,
* an error code, which can be tested using FL2_isError(),
* 1, which means there is still some decoding to do to complete current frame.
* @return : 0 when a stream is completely decoded and fully flushed,
* 1, which means there is still some decoding to do to complete the stream,
* or an error code, which can be tested using FL2_isError().
* *******************************************************************************/
#define LZMA_REQUIRED_INPUT_MAX 20
@@ -324,101 +383,187 @@ typedef struct FL2_DStream_s FL2_DStream;
/*===== FL2_DStream management functions =====*/
FL2LIB_API FL2_DStream* FL2LIB_CALL FL2_createDStream(void);
FL2LIB_API FL2_DStream* FL2LIB_CALL FL2_createDStreamMt(unsigned nbThreads);
FL2LIB_API size_t FL2LIB_CALL FL2_freeDStream(FL2_DStream* fds);
/*! FL2_setDStreamMemoryLimitMt() :
* Set a total size limit for multithreaded decoder input and output buffers. MT decoder memory
* usage is unknown until the input is parsed. If the limit is exceeded, the decoder switches to
* using a single thread.
* MT decoding memory usage is typically dictionary_size * 4 * nbThreads for the output
* buffers plus the size of the compressed input for that amount of output. */
FL2LIB_API void FL2LIB_CALL FL2_setDStreamMemoryLimitMt(FL2_DStream* fds, size_t limit);
/*! FL2_setDStreamTimeout() :
* Sets a timeout in milliseconds. Zero disables the timeout. If a nonzero timout is set,
* FL2_decompressStream() may return a timeout code before decompression of the available data
* completes. FL2_isError() returns true for the timeout code, so check the code with FL2_isTimedOut()
* before testing for errors. After a timeout occurs, do not call FL2_decompressStream() again unless
* a call to FL2_waitDStream() returns 1. A typical application for timeouts is to update the user on
* decompression progress. */
FL2LIB_API size_t FL2LIB_CALL FL2_setDStreamTimeout(FL2_DStream * fds, unsigned timeout);
/*! FL2_waitDStream() :
* Waits for decompression to end after a timeout has occurred. This function returns after the
* timeout set using FL2_setDStreamTimeout() has elapsed, or when decompression of available input is
* complete. Unnecessary when no timeout is set.
* Returns 0 if the stream is complete, 1 if not complete, or an error code. */
FL2LIB_API size_t FL2LIB_CALL FL2_waitDStream(FL2_DStream * fds);
/*! FL2_cancelDStream() :
* Frees memory allocated for MT decoding. If a timeout is set and the caller is waiting
* for completion of MT decoding, decompression in progress will be canceled. */
FL2LIB_API void FL2LIB_CALL FL2_cancelDStream(FL2_DStream *fds);
/*! FL2_getDStreamProgress() :
* Returns the number of bytes decoded since the stream was initialized. */
FL2LIB_API unsigned long long FL2LIB_CALL FL2_getDStreamProgress(const FL2_DStream * fds);
/*===== Streaming decompression functions =====*/
/*! FL2_initDStream() :
* Call this function before decompressing a stream. FL2_initDStream_withProp()
* must be used for streams which do not include a property byte at position zero.
* The caller is responsible for storing and passing the property byte.
* Returns 0 if okay, or an error if the stream object is still in use from a
* previous call to FL2_decompressStream() (see timeout info above). */
FL2LIB_API size_t FL2LIB_CALL FL2_initDStream(FL2_DStream* fds);
FL2LIB_API size_t FL2LIB_CALL FL2_initDStream_withProp(FL2_DStream* fds, unsigned char prop);
/*! FL2_decompressStream() :
* Reads data from input and decompresses to output.
* Returns 1 if the stream is unfinished, 0 if the terminator was encountered (he'll be back)
* and all data was written to output, or an error code. Call this function repeatedly if
* necessary, removing data from output and/or loading data into input before each call.
* Note the requirement for LZMA_REQUIRED_INPUT_MAX bytes of input if the input data is
* incomplete (see intro above). */
FL2LIB_API size_t FL2LIB_CALL FL2_decompressStream(FL2_DStream* fds, FL2_outBuffer* output, FL2_inBuffer* input);
/*-***************************************************************************
* Compression parameters - HowTo
* Compression parameters
*
* Any function that takes a 'compressionLevel' parameter will replace any
* parameters affected by compression level that are already set.
* Call FL2_CCtx_setParameter with FL2_p_compressionLevel to set the level,
* then call FL2_CCtx_setParameter again with any other settings to change.
* Specify compressionLevel=0 when calling a compression function.
* To use a preset level and modify it, call FL2_CCtx_setParameter with
* FL2_p_compressionLevel to set the level, then call FL2_CCtx_setParameter again
* with any other settings to change.
* Specify a compressionLevel of 0 when calling a compression function to keep
* the current parameters.
* *******************************************************************************/
#define FL2_DICTLOG_MIN 20
#define FL2_DICTLOG_MAX_32 27
#define FL2_DICTLOG_MAX_64 30
#define FL2_DICTLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? FL2_DICTLOG_MAX_32 : FL2_DICTLOG_MAX_64))
#define FL2_DICTLOG_MIN 20
#define FL2_CHAINLOG_MAX 14
#define FL2_CHAINLOG_MIN 4
#define FL2_SEARCHLOG_MAX (FL2_CHAINLOG_MAX-1)
#define FL2_SEARCHLOG_MIN 0
#define FL2_FASTLENGTH_MIN 6 /* only used by optimizer */
#define FL2_FASTLENGTH_MAX 273 /* only used by optimizer */
#define FL2_DICTSIZE_MAX (1U << FL2_DICTLOG_MAX)
#define FL2_DICTSIZE_MIN (1U << FL2_DICTLOG_MIN)
#define FL2_BLOCK_OVERLAP_MIN 0
#define FL2_BLOCK_OVERLAP_MAX 14
#define FL2_BLOCK_LOG_MIN 12
#define FL2_BLOCK_LOG_MAX 32
#define FL2_RESET_INTERVAL_MIN 1
#define FL2_RESET_INTERVAL_MAX 16 /* small enough to fit FL2_DICTSIZE_MAX * FL2_RESET_INTERVAL_MAX in 32-bit size_t */
#define FL2_BUFFER_SIZE_LOG_MIN 0
#define FL2_BUFFER_SIZE_LOG_MAX 6
#define FL2_CHAINLOG_MIN 4
#define FL2_CHAINLOG_MAX 14
#define FL2_HYBRIDCYCLES_MIN 1
#define FL2_HYBRIDCYCLES_MAX 64
#define FL2_SEARCH_DEPTH_MIN 6
#define FL2_SEARCH_DEPTH_MAX 254
#define FL2_BUFFER_SIZE_LOG_MIN 6
#define FL2_BUFFER_SIZE_LOG_MAX 12
#define FL2_FASTLENGTH_MIN 6 /* only used by optimizer */
#define FL2_FASTLENGTH_MAX 273 /* only used by optimizer */
#define FL2_LC_MIN 0
#define FL2_LC_MAX 4
#define FL2_LP_MIN 0
#define FL2_LP_MAX 4
#define FL2_PB_MIN 0
#define FL2_PB_MAX 4
#define FL2_LCLP_MAX 4
typedef enum {
FL2_fast,
FL2_opt,
FL2_ultra
} FL2_strategy;
typedef struct {
size_t dictionarySize; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory per byte, slower */
unsigned overlapFraction; /* overlap between consecutive blocks in 1/16 units: larger == more compression, slower */
unsigned chainLog; /* HC3 sliding window : larger == more compression, slower; hybrid mode only (ultra) */
unsigned cyclesLog; /* nb of searches : larger == more compression, slower; hybrid mode only (ultra) */
unsigned searchDepth; /* maximum depth for resolving string matches : larger == more compression, slower */
unsigned fastLength; /* acceptable match size for parser : larger == more compression, slower; fast bytes parameter from 7-zip */
unsigned divideAndConquer; /* split long chains of 2-byte matches into shorter chains with a small overlap : faster, somewhat less compression; enabled by default */
unsigned bufferLog; /* buffer size for processing match chains is (dictionarySize >> (12 - bufferLog)) : affects compression when divideAndConquer enabled; */
/* when divideAndConquer disabled, affects speed in a hardware-dependent manner */
FL2_strategy strategy; /* encoder strategy : fast, optimized or ultra (hybrid) */
} FL2_compressionParameters;
typedef enum {
/* compression parameters */
FL2_p_compressionLevel, /* Update all compression parameters according to pre-defined cLevel table
* Default level is FL2_CLEVEL_DEFAULT==9.
* Setting FL2_p_highCompression to 1 switches to an alternate cLevel table.
* Special: value 0 means "do not change cLevel". */
* Default level is FL2_CLEVEL_DEFAULT==6.
* Setting FL2_p_highCompression to 1 switches to an alternate cLevel table. */
FL2_p_highCompression, /* Maximize compression ratio for a given dictionary size.
* Has 9 levels instead of 12, with dictionaryLog 20 - 28. */
FL2_p_7zLevel, /* For use by the 7-zip fork employing this library. 1 - 9 */
* Levels 1..10 = dictionaryLog 20..29 (1 Mb..512 Mb).
* Typically provides a poor speed/ratio tradeoff. */
FL2_p_dictionaryLog, /* Maximum allowed back-reference distance, expressed as power of 2.
* Must be clamped between FL2_DICTLOG_MIN and FL2_DICTLOG_MAX.
* Special: value 0 means "do not change dictionaryLog". */
* Default = 24 */
FL2_p_dictionarySize, /* Same as above but expressed as an absolute value.
* Must be clamped between FL2_DICTSIZE_MIN and FL2_DICTSIZE_MAX.
* Default = 16 Mb */
FL2_p_overlapFraction, /* The radix match finder is block-based, so some overlap is retained from
* each block to improve compression of the next. This value is expressed
* as n / 16 of the block size (dictionary size). Larger values are slower.
* Values above 2 mostly yield only a small improvement in compression. */
FL2_p_blockSize,
* Values above 2 mostly yield only a small improvement in compression.
* A large value for a small dictionary may worsen multithreaded compression.
* Default = 2 */
FL2_p_resetInterval, /* For multithreaded decompression. A dictionary reset will occur
* after each dictionarySize * resetInterval bytes of input.
* Default = 4 */
FL2_p_bufferLog, /* Buffering speeds up the matchfinder. Buffer size is
* 2 ^ (dictionaryLog - bufferLog). Lower number = slower, better compression,
* higher memory usage. */
FL2_p_chainLog, /* Size of the full-search table, as a power of 2.
* Resulting table size is (1 << (chainLog+2)).
* (dictionarySize >> (12 - bufferLog)) * 12 bytes. Higher number = slower,
* better compression, higher memory usage. A CPU with a large memory cache
* may make effective use of a larger buffer.
* Default = 4 */
FL2_p_hybridChainLog, /* Size of the hybrid mode HC3 hash chain, as a power of 2.
* Resulting table size is (1 << (chainLog+2)) bytes.
* Larger tables result in better and slower compression.
* This parameter is useless when using "fast" strategy.
* Special: value 0 means "do not change chainLog". */
FL2_p_searchLog, /* Number of search attempts, as a power of 2, made by the HC3 match finder
* used only in hybrid mode.
* This parameter is only used by the hybrid "ultra" strategy.
* Default = 9 */
FL2_p_hybridCycles, /* Number of search attempts made by the HC3 match finder.
* Used only by the hybrid "ultra" strategy.
* More attempts result in slightly better and slower compression.
* This parameter is not used by the "fast" and "optimize" strategies.
* Special: value 0 means "do not change searchLog". */
FL2_p_literalCtxBits, /* lc value for LZMA2 encoder */
FL2_p_literalPosBits, /* lp value for LZMA2 encoder */
FL2_p_posBits, /* pb value for LZMA2 encoder */
* Default = 1 */
FL2_p_searchDepth, /* Match finder will resolve string matches up to this length. If a longer
* match exists further back in the input, it will not be found. */
* match exists further back in the input, it will not be found.
* Default = 42 */
FL2_p_fastLength, /* Only useful for strategies >= opt.
* Length of Match considered "good enough" to stop search.
* Length of match considered "good enough" to stop search.
* Larger values make compression stronger and slower.
* Special: value 0 means "do not change fastLength". */
* Default = 48 */
FL2_p_divideAndConquer, /* Split long chains of 2-byte matches into shorter chains with a small overlap
* during further processing. Allows buffering of all chains at length 2.
* Faster, less compression. Generally a good tradeoff. Enabled by default. */
FL2_p_strategy, /* 1 = fast; 2 = optimize, 3 = ultra (hybrid mode).
* for further processing. Allows buffering of all chains at length 2.
* Faster, less compression. Generally a good tradeoff.
* Default = enabled */
FL2_p_strategy, /* 1 = fast; 2 = optimized, 3 = ultra (hybrid mode).
* The higher the value of the selected strategy, the more complex it is,
* resulting in stronger and slower compression.
* Special: value 0 means "do not change strategy". */
* Default = ultra */
FL2_p_literalCtxBits, /* lc value for LZMA2 encoder
* Default = 3 */
FL2_p_literalPosBits, /* lp value for LZMA2 encoder
* Default = 0 */
FL2_p_posBits, /* pb value for LZMA2 encoder
* Default = 2 */
FL2_p_omitProperties, /* Omit the property byte at the start of the stream. For use within 7-zip */
/* or other containers which store the property byte elsewhere. */
/* A stream compressed under this setting cannot be decoded by this library. */
#ifndef NO_XXHASH
FL2_p_doXXHash, /* Calculate a 32-bit xxhash value from the input data and store it
* after the stream terminator. The value will be checked on decompression.
* 0 = do not calculate; 1 = calculate (default) */
#endif
FL2_p_omitProperties, /* Omit the property byte at the start of the stream. For use within 7-zip */
/* or other containers which store the property byte elsewhere. */
/* Cannot be decoded by this library. */
#ifdef RMF_REFERENCE
FL2_p_useReferenceMF /* Use the reference matchfinder for development purposes. SLOW. */
#endif
@@ -429,8 +574,32 @@ typedef enum {
* Set one compression parameter, selected by enum FL2_cParameter.
* @result : informational value (typically, the one being set, possibly corrected),
* or an error code (which can be tested with FL2_isError()). */
FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_setParameter(FL2_CCtx* cctx, FL2_cParameter param, unsigned value);
FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cParameter param, unsigned value);
FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_setParameter(FL2_CCtx* cctx, FL2_cParameter param, size_t value);
/*! FL2_CCtx_getParameter() :
* Get one compression parameter, selected by enum FL2_cParameter.
* @result : the parameter value, or the parameter_unsupported error code
* (which can be tested with FL2_isError()). */
FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_getParameter(FL2_CCtx* cctx, FL2_cParameter param);
/*! FL2_CStream_setParameter() :
* Set one compression parameter, selected by enum FL2_cParameter.
* @result : informational value (typically, the one being set, possibly corrected),
* or an error code (which can be tested with FL2_isError()). */
FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cParameter param, size_t value);
/*! FL2_CStream_getParameter() :
* Get one compression parameter, selected by enum FL2_cParameter.
* @result : the parameter value, or the parameter_unsupported error code
* (which can be tested with FL2_isError()). */
FL2LIB_API size_t FL2LIB_CALL FL2_CStream_getParameter(FL2_CStream* fcs, FL2_cParameter param);
/*! FL2_getLevelParameters() :
* Get all compression parameter values defined by the preset compressionLevel.
* @result : the values in a FL2_compressionParameters struct, or the parameter_outOfBound error code
* (which can be tested with FL2_isError()) if compressionLevel is invalid. */
FL2LIB_API size_t FL2LIB_CALL FL2_getLevelParameters(int compressionLevel, int high, FL2_compressionParameters *params);
/***************************************
* Context memory usage
@@ -441,12 +610,29 @@ FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cPa
* FL2_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
* To use FL2_estimateCCtxSize_usingCCtx, set the compression level and any other settings for the context,
* then call the function. Some allocation occurs when the context is created, but the large memory buffers
* used for string matching are allocated only when compression begins. */
* used for string matching are allocated only when compression is initialized. */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize(int compressionLevel, unsigned nbThreads); /*!< memory usage determined by level */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize_byParams(const FL2_compressionParameters *params, unsigned nbThreads); /*!< memory usage determined by params */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize_usingCCtx(const FL2_CCtx* cctx); /*!< memory usage determined by settings */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize(int compressionLevel, unsigned nbThreads);
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_usingCCtx(const FL2_CStream* fcs);
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize(int compressionLevel, unsigned nbThreads, int dualBuffer); /*!< memory usage determined by level */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_byParams(const FL2_compressionParameters *params, unsigned nbThreads, int dualBuffer); /*!< memory usage determined by params */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_usingCStream(const FL2_CStream* fcs); /*!< memory usage determined by settings */
/*! FL2_getDictSizeFromProp() :
* Get the dictionary size from the property byte for a stream. The property byte is the first byte
* in the stream, unless omitProperties was enabled, in which case the caller must store it. */
FL2LIB_API size_t FL2LIB_CALL FL2_getDictSizeFromProp(unsigned char prop);
/*! FL2_estimateDCtxSize() :
* The size of a DCtx does not include a dictionary buffer because the caller must supply one. */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateDCtxSize(unsigned nbThreads);
/*! FL2_estimateDStreamSize() :
* Estimate decompression memory use from the dictionary size and number of threads.
* For nbThreads == 0 the number of available cores will be used.
* Obtain dictSize by passing the property byte to FL2_getDictSizeFromProp. */
FL2LIB_API size_t FL2LIB_CALL FL2_estimateDStreamSize(size_t dictSize, unsigned nbThreads); /*!< obtain dictSize from FL2_getDictSizeFromProp() */
#endif /* FAST_LZMA2_H */

View File

@@ -14,10 +14,8 @@
/*-*************************************
* Dependencies
***************************************/
#include <stdlib.h> /* malloc, calloc, free */
#include <string.h> /* memset */
#include "fast-lzma2.h"
#include "fl2_error_private.h"
#include "fl2_errors.h"
#include "fl2_internal.h"
@@ -29,6 +27,9 @@ FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void) { return FL2_VERSION_NUM
FL2LIB_API const char* FL2LIB_CALL FL2_versionString(void) { return FL2_VERSION_STRING; }
/*-****************************************
* Compression helpers
******************************************/
FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize)
{
return FL2_COMPRESSBOUND(srcSize);
@@ -37,21 +38,70 @@ FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize)
/*-****************************************
* FL2 Error Management
******************************************/
HINT_INLINE
unsigned IsError(size_t code)
{
return (code > FL2_ERROR(maxCode));
}
/*! FL2_isError() :
* tells if a return value is an error code */
FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code) { return ERR_isError(code); }
FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code)
{
return IsError(code);
}
/*! FL2_isTimedOut() :
* tells if a return value is the timeout code */
FL2LIB_API unsigned FL2LIB_CALL FL2_isTimedOut(size_t code)
{
return (code == FL2_ERROR(timedOut));
}
/*! FL2_getErrorName() :
* provides error code string from function result (useful for debugging) */
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code) { return ERR_getErrorName(code); }
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code)
{
return FL2_getErrorString(FL2_getErrorCode(code));
}
/*! FL2_getError() :
* convert a `size_t` function result into a proper FL2_errorCode enum */
FL2LIB_API FL2_ErrorCode FL2LIB_CALL FL2_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
FL2LIB_API FL2_ErrorCode FL2LIB_CALL FL2_getErrorCode(size_t code)
{
if (!IsError(code))
return (FL2_ErrorCode)0;
return (FL2_ErrorCode)(0 - code);
}
/*! FL2_getErrorString() :
* provides error code string from enum */
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorString(FL2_ErrorCode code) { return ERR_getFL2ErrorString(code); }
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorString(FL2_ErrorCode code)
{
static const char* const notErrorCode = "Unspecified error code";
switch (code)
{
case PREFIX(no_error): return "No error detected";
case PREFIX(GENERIC): return "Error (generic)";
case PREFIX(corruption_detected): return "Corrupted block detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(parameter_unsupported): return "Unsupported parameter";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(lclpMax_exceeded): return "Parameters lc+lp > 4";
case PREFIX(stage_wrong): return "Not possible at this stage of encoding";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
case PREFIX(srcSize_wrong): return "Src size is incorrect";
case PREFIX(canceled): return "Processing was canceled by a call to FL2_cancelCStream() or FL2_cancelDStream()";
case PREFIX(buffer): return "Streaming progress halted due to buffer(s) full/empty";
case PREFIX(timedOut): return "Wait timed out. Timeouts should be handled before errors using FL2_isTimedOut()";
/* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(maxCode):
default: return notErrorCode;
}
}
/*! g_debuglog_enable :
* turn on/off debug traces (global switch) */

View File

File diff suppressed because it is too large Load Diff

View File

@@ -20,8 +20,9 @@
#include "radix_internal.h"
#include "lzma2_enc.h"
#include "fast-lzma2.h"
#include "fl2threading.h"
#include "fl2pool.h"
#include "fl2_threading.h"
#include "fl2_pool.h"
#include "dict_buffer.h"
#ifndef NO_XXHASH
# include "xxhash.h"
#endif
@@ -30,19 +31,6 @@
extern "C" {
#endif
typedef struct {
unsigned dictionaryLog; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory, slower */
unsigned overlapFraction; /* overlap between consecutive blocks in 1/16 units: larger == more compression, slower */
unsigned chainLog; /* fully searched segment : larger == more compression, slower, more memory; hybrid mode only (ultra) */
unsigned searchLog; /* nb of searches : larger == more compression, slower; hybrid mode only (ultra) */
unsigned searchDepth; /* maximum depth for resolving string matches : larger == more compression, slower; >= 64 == more memory, slower */
unsigned fastLength; /* acceptable match size for parser, not less than searchDepth : larger == more compression, slower; fast bytes parameter from 7-zip */
unsigned divideAndConquer; /* split long chains of 2-byte matches into shorter chains with a small overlap : faster, somewhat less compression; enabled by default */
unsigned bufferLog; /* buffer size for processing match chains is (dictionaryLog - bufferLog) : when divideAndConquer enabled, affects compression; */
/* when divideAndConquer disabled, affects speed in a hardware-dependent manner */
FL2_strategy strategy; /* encoder strategy : fast, optimized or ultra (hybrid) */
} FL2_compressionParameters;
/*-*************************************
* Context memory management
***************************************/
@@ -60,38 +48,43 @@ typedef struct {
typedef struct {
FL2_CCtx* cctx;
FL2_lzmaEncoderCtx* enc;
LZMA2_ECtx* enc;
FL2_dataBlock block;
size_t cSize;
} FL2_job;
struct FL2_CCtx_s {
DICT_buffer buf;
FL2_CCtx_params params;
#ifndef FL2_SINGLETHREAD
FL2POOL_ctx* factory;
FL2POOL_ctx* compressThread;
#endif
FL2_dataBlock curBlock;
size_t asyncRes;
size_t threadCount;
size_t outThread;
size_t outPos;
size_t dictMax;
U64 block_total;
U64 streamTotal;
U64 streamCsize;
FL2_matchTable* matchTable;
#ifndef FL2_SINGLETHREAD
U32 timeout;
#endif
U32 rmfWeight;
U32 encWeight;
FL2_atomic progressIn;
FL2_atomic progressOut;
int canceled;
BYTE wroteProp;
BYTE endMarked;
BYTE loopCount;
BYTE lockParams;
unsigned jobCount;
FL2_job jobs[1];
};
struct FL2_CStream_s {
FL2_CCtx* cctx;
FL2_blockBuffer inBuff;
#ifndef NO_XXHASH
XXH32_state_t *xxh;
#endif
size_t thread_count;
size_t out_thread;
size_t out_pos;
size_t hash_pos;
BYTE end_marked;
BYTE wrote_prop;
};
#if defined (__cplusplus)
}
#endif

View File

@@ -1,35 +0,0 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
* Modified for FL2 by Conor McCarthy
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* The purpose of this file is to have a single list of error strings embedded in binary */
#include "fl2_error_private.h"
const char* ERR_getFL2ErrorString(ERR_enum code)
{
static const char* const notErrorCode = "Unspecified error code";
switch( code )
{
case PREFIX(no_error): return "No error detected";
case PREFIX(GENERIC): return "Error (generic)";
case PREFIX(corruption_detected): return "Corrupted block detected";
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
case PREFIX(parameter_unsupported): return "Unsupported parameter";
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
case PREFIX(srcSize_wrong): return "Src size is incorrect";
/* following error codes are not stable and may be removed or changed in a future version */
case PREFIX(maxCode):
default: return notErrorCode;
}
}

View File

@@ -1,75 +0,0 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
* Modified for FL2 by Conor McCarthy
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* Note : this module is expected to remain private, do not expose it */
#ifndef ERROR_H_MODULE
#define ERROR_H_MODULE
#if defined (__cplusplus)
extern "C" {
#endif
/* ****************************************
* Dependencies
******************************************/
#include <stddef.h> /* size_t */
#include "fl2_errors.h" /* enum list */
/* ****************************************
* Compiler-specific
******************************************/
#if defined(__GNUC__)
# define ERR_STATIC static __attribute__((unused))
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define ERR_STATIC static inline
#elif defined(_MSC_VER)
# define ERR_STATIC static __inline
#else
# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
/*-****************************************
* Customization (error_public.h)
******************************************/
typedef FL2_ErrorCode ERR_enum;
#define PREFIX(name) FL2_error_##name
/*-****************************************
* Error codes handling
******************************************/
#define FL2_ERROR(name) ((size_t)-PREFIX(name))
ERR_STATIC unsigned ERR_isError(size_t code) { return (code > FL2_ERROR(maxCode)); }
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
/*-****************************************
* Error Strings
******************************************/
const char* ERR_getFL2ErrorString(ERR_enum code); /* error_private.c */
ERR_STATIC const char* ERR_getErrorName(size_t code)
{
return ERR_getFL2ErrorString(ERR_getErrorCode(code));
}
#if defined (__cplusplus)
}
#endif
#endif /* ERROR_H_MODULE */

View File

@@ -35,13 +35,15 @@ typedef enum {
FL2_error_checksum_wrong = 4,
FL2_error_parameter_unsupported = 5,
FL2_error_parameter_outOfBound = 6,
FL2_error_stage_wrong = 7,
FL2_error_init_missing = 8,
FL2_error_memory_allocation = 9,
FL2_error_dstSize_tooSmall = 10,
FL2_error_srcSize_wrong = 11,
FL2_error_write_failed = 12,
FL2_error_lclpMax_exceeded = 7,
FL2_error_stage_wrong = 8,
FL2_error_init_missing = 9,
FL2_error_memory_allocation = 10,
FL2_error_dstSize_tooSmall = 11,
FL2_error_srcSize_wrong = 12,
FL2_error_canceled = 13,
FL2_error_buffer = 14,
FL2_error_timedOut = 15,
FL2_error_maxCode = 20 /* never EVER use this value directly, it can change in future versions! Use FL2_isError() instead */
} FL2_ErrorCode;

View File

@@ -18,19 +18,30 @@
***************************************/
#include "mem.h"
#include "compiler.h"
#include "fl2_error_private.h"
#if defined (__cplusplus)
extern "C" {
#endif
/*-****************************************
* Error codes handling
******************************************/
#define PREFIX(name) FL2_error_##name
#define FL2_ERROR(name) ((size_t)-PREFIX(name))
/*-*************************************
* Stream properties
***************************************/
#define FL2_PROP_HASH_BIT 7
#define FL2_LZMA_PROP_MASK 0x3FU
#ifndef NO_XXHASH
# define XXHASH_SIZEOF sizeof(XXH32_canonical_t)
#endif
/*-*************************************
* Debug
***************************************/
@@ -77,8 +88,8 @@ extern int g_debuglog_enable;
#undef MAX
#define MIN(a,b) ((a)<(b) ? (a) : (b))
#define MAX(a,b) ((a)>(b) ? (a) : (b))
#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return FL2_ERROR(e); } /* check and send Error code */
#define CHECK_F(f) do { size_t const errcod = f; if (FL2_isError(errcod)) return errcod; } while(0) /* check and Forward error code */
#define CHECK_E(f, e) do { size_t const errcod = f; if (FL2_isError(errcod)) return FL2_ERROR(e); } while(0) /* check and send Error code */
MEM_STATIC U32 ZSTD_highbit32(U32 val)
{

198
C/fast-lzma2/fl2_pool.c Normal file
View File

@@ -0,0 +1,198 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
* Modified for FL2 by Conor McCarthy
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* ====== Dependencies ======= */
#include <stddef.h> /* size_t */
#include <stdlib.h> /* malloc, calloc */
#include "fl2_pool.h"
#include "fl2_internal.h"
#ifndef FL2_SINGLETHREAD
#include "fl2_threading.h" /* pthread adaptation */
struct FL2POOL_ctx_s {
/* Keep track of the threads */
size_t numThreads;
/* All threads work on the same function and object during a job */
FL2POOL_function function;
void *opaque;
/* The number of threads working on jobs */
size_t numThreadsBusy;
/* Indicates the number of threads requested and the values to pass */
ptrdiff_t queueIndex;
ptrdiff_t queueEnd;
/* The mutex protects the queue */
FL2_pthread_mutex_t queueMutex;
/* Condition variable for pushers to wait on when the queue is full */
FL2_pthread_cond_t busyCond;
/* Condition variable for poppers to wait on when the queue is empty */
FL2_pthread_cond_t newJobsCond;
/* Indicates if the queue is shutting down */
int shutdown;
/* The threads. Extras to be calloc'd */
FL2_pthread_t threads[1];
};
/* FL2POOL_thread() :
Work thread for the thread pool.
Waits for jobs and executes them.
@returns : NULL on failure else non-null.
*/
static void* FL2POOL_thread(void* opaque)
{
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)opaque;
if (!ctx) { return NULL; }
FL2_pthread_mutex_lock(&ctx->queueMutex);
for (;;) {
/* While the mutex is locked, wait for a non-empty queue or until shutdown */
while (ctx->queueIndex >= ctx->queueEnd && !ctx->shutdown) {
FL2_pthread_cond_wait(&ctx->newJobsCond, &ctx->queueMutex);
}
/* empty => shutting down: so stop */
if (ctx->shutdown) {
FL2_pthread_mutex_unlock(&ctx->queueMutex);
return opaque;
}
/* Pop a job off the queue */
size_t n = ctx->queueIndex;
++ctx->queueIndex;
++ctx->numThreadsBusy;
/* Unlock the mutex and run the job */
FL2_pthread_mutex_unlock(&ctx->queueMutex);
ctx->function(ctx->opaque, n);
FL2_pthread_mutex_lock(&ctx->queueMutex);
--ctx->numThreadsBusy;
/* Signal the master thread waiting for jobs to complete */
FL2_pthread_cond_signal(&ctx->busyCond);
} /* for (;;) */
/* Unreachable */
}
FL2POOL_ctx* FL2POOL_create(size_t numThreads)
{
FL2POOL_ctx* ctx;
/* Check the parameters */
if (!numThreads) { return NULL; }
/* Allocate the context and zero initialize */
ctx = calloc(1, sizeof(FL2POOL_ctx) + (numThreads - 1) * sizeof(FL2_pthread_t));
if (!ctx) { return NULL; }
/* Initialize the busy count and jobs range */
ctx->numThreadsBusy = 0;
ctx->queueIndex = 0;
ctx->queueEnd = 0;
(void)FL2_pthread_mutex_init(&ctx->queueMutex, NULL);
(void)FL2_pthread_cond_init(&ctx->busyCond, NULL);
(void)FL2_pthread_cond_init(&ctx->newJobsCond, NULL);
ctx->shutdown = 0;
ctx->numThreads = 0;
/* Initialize the threads */
{ size_t i;
for (i = 0; i < numThreads; ++i) {
if (FL2_pthread_create(&ctx->threads[i], NULL, &FL2POOL_thread, ctx)) {
ctx->numThreads = i;
FL2POOL_free(ctx);
return NULL;
} }
ctx->numThreads = numThreads;
}
return ctx;
}
/*! FL2POOL_join() :
Shutdown the queue, wake any sleeping threads, and join all of the threads.
*/
static void FL2POOL_join(FL2POOL_ctx* ctx)
{
/* Shut down the queue */
FL2_pthread_mutex_lock(&ctx->queueMutex);
ctx->shutdown = 1;
/* Wake up sleeping threads */
FL2_pthread_cond_broadcast(&ctx->newJobsCond);
FL2_pthread_mutex_unlock(&ctx->queueMutex);
/* Join all of the threads */
for (size_t i = 0; i < ctx->numThreads; ++i)
FL2_pthread_join(ctx->threads[i], NULL);
}
void FL2POOL_free(FL2POOL_ctx *ctx)
{
if (!ctx) { return; }
FL2POOL_join(ctx);
FL2_pthread_mutex_destroy(&ctx->queueMutex);
FL2_pthread_cond_destroy(&ctx->busyCond);
FL2_pthread_cond_destroy(&ctx->newJobsCond);
free(ctx);
}
size_t FL2POOL_sizeof(FL2POOL_ctx *ctx)
{
if (ctx==NULL) return 0; /* supports sizeof NULL */
return sizeof(*ctx) + ctx->numThreads * sizeof(FL2_pthread_t);
}
void FL2POOL_addRange(void* ctxVoid, FL2POOL_function function, void *opaque, ptrdiff_t first, ptrdiff_t end)
{
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid;
if (!ctx)
return;
/* Callers always wait for jobs to complete before adding a new set */
assert(!ctx->numThreadsBusy);
FL2_pthread_mutex_lock(&ctx->queueMutex);
ctx->function = function;
ctx->opaque = opaque;
ctx->queueIndex = first;
ctx->queueEnd = end;
FL2_pthread_cond_broadcast(&ctx->newJobsCond);
FL2_pthread_mutex_unlock(&ctx->queueMutex);
}
void FL2POOL_add(void* ctxVoid, FL2POOL_function function, void *opaque, ptrdiff_t n)
{
FL2POOL_addRange(ctxVoid, function, opaque, n, n + 1);
}
int FL2POOL_waitAll(void *ctxVoid, unsigned timeout)
{
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid;
if (!ctx || (!ctx->numThreadsBusy && ctx->queueIndex >= ctx->queueEnd) || ctx->shutdown) { return 0; }
FL2_pthread_mutex_lock(&ctx->queueMutex);
/* Need to test for ctx->queueIndex < ctx->queueEnd in case not all jobs have started */
if (timeout != 0) {
if ((ctx->numThreadsBusy || ctx->queueIndex < ctx->queueEnd) && !ctx->shutdown)
FL2_pthread_cond_timedwait(&ctx->busyCond, &ctx->queueMutex, timeout);
}
else {
while ((ctx->numThreadsBusy || ctx->queueIndex < ctx->queueEnd) && !ctx->shutdown)
FL2_pthread_cond_wait(&ctx->busyCond, &ctx->queueMutex);
}
FL2_pthread_mutex_unlock(&ctx->queueMutex);
return ctx->numThreadsBusy && !ctx->shutdown;
}
size_t FL2POOL_threadsBusy(void * ctx)
{
return ((FL2POOL_ctx*)ctx)->numThreadsBusy;
}
#endif /* FL2_SINGLETHREAD */

View File

@@ -42,16 +42,20 @@ size_t FL2POOL_sizeof(FL2POOL_ctx *ctx);
/*! FL2POOL_function :
The function type that can be added to a thread pool.
*/
typedef void(*FL2POOL_function)(void *, size_t);
typedef void(*FL2POOL_function)(void *, ptrdiff_t);
/*! FL2POOL_add() :
Add the job `function(opaque)` to the thread pool.
FL2POOL_addRange adds multiple jobs with size_t parameter from first to less than end.
Possibly blocks until there is room in the queue.
Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
*/
void FL2POOL_add(void *ctx, FL2POOL_function function, void *opaque, size_t n);
void FL2POOL_add(void* ctxVoid, FL2POOL_function function, void *opaque, ptrdiff_t n);
void FL2POOL_addRange(void *ctx, FL2POOL_function function, void *opaque, ptrdiff_t first, ptrdiff_t end);
void FL2POOL_waitAll(void *ctx);
int FL2POOL_waitAll(void *ctx, unsigned timeout);
size_t FL2POOL_threadsBusy(void *ctx);
#if defined (__cplusplus)
}

View File

@@ -17,6 +17,10 @@
/* create fake symbol to avoid empty translation unit warning */
int g_ZSTD_threading_useles_symbol;
#include "fast-lzma2.h"
#include "fl2_threading.h"
#include "util.h"
#if !defined(FL2_SINGLETHREAD) && defined(_WIN32)
/**
@@ -28,19 +32,18 @@ int g_ZSTD_threading_useles_symbol;
/* === Dependencies === */
#include <process.h>
#include <errno.h>
#include "fl2threading.h"
/* === Implementation === */
static unsigned __stdcall worker(void *arg)
{
ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
FL2_pthread_t* const thread = (FL2_pthread_t*) arg;
thread->arg = thread->start_routine(thread->arg);
return 0;
}
int FL2_pthread_create(ZSTD_pthread_t* thread, const void* unused,
int FL2_pthread_create(FL2_pthread_t* thread, const void* unused,
void* (*start_routine) (void*), void* arg)
{
(void)unused;
@@ -54,7 +57,7 @@ int FL2_pthread_create(ZSTD_pthread_t* thread, const void* unused,
return 0;
}
int FL2_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
int FL2_pthread_join(FL2_pthread_t thread, void **value_ptr)
{
DWORD result;
@@ -73,3 +76,20 @@ int FL2_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
}
#endif /* FL2_SINGLETHREAD */
unsigned FL2_checkNbThreads(unsigned nbThreads)
{
#ifndef FL2_SINGLETHREAD
if (nbThreads == 0) {
nbThreads = UTIL_countPhysicalCores();
nbThreads += !nbThreads;
}
if (nbThreads > FL2_MAXTHREADS) {
nbThreads = FL2_MAXTHREADS;
}
#else
nbThreads = 1;
#endif
return nbThreads;
}

View File

@@ -0,0 +1,178 @@
/**
* Copyright (c) 2016 Tino Reichardt
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*
* You can contact the author at:
* - zstdmt source repository: https://github.com/mcmilk/zstdmt
*/
#ifndef THREADING_H_938743
#define THREADING_H_938743
#include "mem.h"
#ifndef FL2_XZ_BUILD
# ifdef _WIN32
# define MYTHREAD_VISTA
# else
# define MYTHREAD_POSIX /* posix assumed ; need a better detection method */
# endif
#elif defined(HAVE_CONFIG_H)
# include <config.h>
#endif
#if defined (__cplusplus)
extern "C" {
#endif
unsigned FL2_checkNbThreads(unsigned nbThreads);
#if !defined(FL2_SINGLETHREAD) && defined(MYTHREAD_VISTA)
/**
* Windows minimalist Pthread Wrapper, based on :
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
*/
#ifdef WINVER
# undef WINVER
#endif
#define WINVER 0x0600
#ifdef _WIN32_WINNT
# undef _WIN32_WINNT
#endif
#define _WIN32_WINNT 0x0600
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <synchapi.h>
/* mutex */
#define FL2_pthread_mutex_t CRITICAL_SECTION
#define FL2_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0)
#define FL2_pthread_mutex_destroy(a) DeleteCriticalSection((a))
#define FL2_pthread_mutex_lock(a) EnterCriticalSection((a))
#define FL2_pthread_mutex_unlock(a) LeaveCriticalSection((a))
/* condition variable */
#define FL2_pthread_cond_t CONDITION_VARIABLE
#define FL2_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0)
#define FL2_pthread_cond_destroy(a) /* No delete */
#define FL2_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
#define FL2_pthread_cond_timedwait(a, b, c) SleepConditionVariableCS((a), (b), (c))
#define FL2_pthread_cond_signal(a) WakeConditionVariable((a))
#define FL2_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
/* FL2_pthread_create() and FL2_pthread_join() */
typedef struct {
HANDLE handle;
void* (*start_routine)(void*);
void* arg;
} FL2_pthread_t;
int FL2_pthread_create(FL2_pthread_t* thread, const void* unused,
void* (*start_routine) (void*), void* arg);
int FL2_pthread_join(FL2_pthread_t thread, void** value_ptr);
/**
* add here more wrappers as required
*/
#elif !defined(FL2_SINGLETHREAD) && defined(MYTHREAD_POSIX)
/* === POSIX Systems === */
# include <sys/time.h>
# include <pthread.h>
#define FL2_pthread_mutex_t pthread_mutex_t
#define FL2_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
#define FL2_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
#define FL2_pthread_mutex_lock(a) pthread_mutex_lock((a))
#define FL2_pthread_mutex_unlock(a) pthread_mutex_unlock((a))
#define FL2_pthread_cond_t pthread_cond_t
#define FL2_pthread_cond_init(a, b) pthread_cond_init((a), (b))
#define FL2_pthread_cond_destroy(a) pthread_cond_destroy((a))
#define FL2_pthread_cond_wait(a, b) pthread_cond_wait((a), (b))
#define FL2_pthread_cond_signal(a) pthread_cond_signal((a))
#define FL2_pthread_cond_broadcast(a) pthread_cond_broadcast((a))
#define FL2_pthread_t pthread_t
#define FL2_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define FL2_pthread_join(a, b) pthread_join((a),(b))
/* Timed wait functions from XZ by Lasse Collin
*/
/* Sets condtime to the absolute time that is timeout_ms milliseconds
* in the future.
*/
static inline void
mythread_condtime_set(struct timespec *condtime, U32 timeout_ms)
{
condtime->tv_sec = timeout_ms / 1000;
condtime->tv_nsec = (timeout_ms % 1000) * 1000000;
struct timeval now;
gettimeofday(&now, NULL);
condtime->tv_sec += now.tv_sec;
condtime->tv_nsec += now.tv_usec * 1000L;
/* tv_nsec must stay in the range [0, 999_999_999]. */
if (condtime->tv_nsec >= 1000000000L) {
condtime->tv_nsec -= 1000000000L;
++condtime->tv_sec;
}
}
/* Waits on a condition or until a timeout expires. If the timeout expires,
* non-zero is returned, otherwise zero is returned.
*/
static inline void
FL2_pthread_cond_timedwait(FL2_pthread_cond_t *cond, FL2_pthread_mutex_t *mutex,
U32 timeout_ms)
{
struct timespec condtime;
mythread_condtime_set(&condtime, timeout_ms);
pthread_cond_timedwait(cond, mutex, &condtime);
}
#elif defined(FL2_SINGLETHREAD)
/* No multithreading support */
typedef int FL2_pthread_mutex_t;
#define FL2_pthread_mutex_init(a, b) ((void)a, 0)
#define FL2_pthread_mutex_destroy(a)
#define FL2_pthread_mutex_lock(a)
#define FL2_pthread_mutex_unlock(a)
typedef int FL2_pthread_cond_t;
#define FL2_pthread_cond_init(a, b) ((void)a, 0)
#define FL2_pthread_cond_destroy(a)
#define FL2_pthread_cond_wait(a, b)
#define FL2_pthread_cond_signal(a)
#define FL2_pthread_cond_broadcast(a)
/* do not use FL2_pthread_t */
#else
# error FL2_SINGLETHREAD not defined but no threading support found
#endif /* FL2_SINGLETHREAD */
#if defined (__cplusplus)
}
#endif
#endif /* THREADING_H_938743 */

View File

@@ -1,201 +0,0 @@
/*
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
* All rights reserved.
* Modified for FL2 by Conor McCarthy
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/* ====== Dependencies ======= */
#include <stddef.h> /* size_t */
#include <stdlib.h> /* malloc, calloc */
#include "fl2pool.h"
#include "fl2_internal.h"
/* ====== Compiler specifics ====== */
#if defined(_MSC_VER)
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
#endif
#ifndef FL2_SINGLETHREAD
#include "fl2threading.h" /* pthread adaptation */
/* A job is a function and an opaque argument */
typedef struct FL2POOL_job_s {
FL2POOL_function function;
void *opaque;
size_t n;
} FL2POOL_job;
struct FL2POOL_ctx_s {
/* Keep track of the threads */
ZSTD_pthread_t *threads;
size_t numThreads;
/* The queue is a single job */
FL2POOL_job queue;
/* The number of threads working on jobs */
size_t numThreadsBusy;
/* Indicates if the queue is empty */
int queueEmpty;
/* The mutex protects the queue */
ZSTD_pthread_mutex_t queueMutex;
/* Condition variable for pushers to wait on when the queue is full */
ZSTD_pthread_cond_t queuePushCond;
/* Condition variables for poppers to wait on when the queue is empty */
ZSTD_pthread_cond_t queuePopCond;
/* Indicates if the queue is shutting down */
int shutdown;
};
/* FL2POOL_thread() :
Work thread for the thread pool.
Waits for jobs and executes them.
@returns : NULL on failure else non-null.
*/
static void* FL2POOL_thread(void* opaque) {
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)opaque;
if (!ctx) { return NULL; }
for (;;) {
/* Lock the mutex and wait for a non-empty queue or until shutdown */
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
while (ctx->queueEmpty && !ctx->shutdown) {
ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
}
/* empty => shutting down: so stop */
if (ctx->queueEmpty) {
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
return opaque;
}
/* Pop a job off the queue */
{ FL2POOL_job const job = ctx->queue;
ctx->queueEmpty = 1;
/* Unlock the mutex, signal a pusher, and run the job */
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
job.function(job.opaque, job.n);
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
ctx->numThreadsBusy--;
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
}
} /* for (;;) */
/* Unreachable */
}
FL2POOL_ctx* FL2POOL_create(size_t numThreads) {
FL2POOL_ctx* ctx;
/* Check the parameters */
if (!numThreads) { return NULL; }
/* Allocate the context and zero initialize */
ctx = (FL2POOL_ctx*)calloc(1, sizeof(FL2POOL_ctx));
if (!ctx) { return NULL; }
/* Initialize the job queue.
* It needs one extra space since one space is wasted to differentiate empty
* and full queues.
*/
ctx->numThreadsBusy = 0;
ctx->queueEmpty = 1;
(void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
(void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
(void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
ctx->shutdown = 0;
/* Allocate space for the thread handles */
ctx->threads = (ZSTD_pthread_t*)malloc(numThreads * sizeof(ZSTD_pthread_t));
ctx->numThreads = 0;
/* Check for errors */
if (!ctx->threads) { FL2POOL_free(ctx); return NULL; }
/* Initialize the threads */
{ size_t i;
for (i = 0; i < numThreads; ++i) {
if (FL2_pthread_create(&ctx->threads[i], NULL, &FL2POOL_thread, ctx)) {
ctx->numThreads = i;
FL2POOL_free(ctx);
return NULL;
} }
ctx->numThreads = numThreads;
}
return ctx;
}
/*! FL2POOL_join() :
Shutdown the queue, wake any sleeping threads, and join all of the threads.
*/
static void FL2POOL_join(FL2POOL_ctx* ctx) {
/* Shut down the queue */
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
ctx->shutdown = 1;
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
/* Wake up sleeping threads */
ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
/* Join all of the threads */
{ size_t i;
for (i = 0; i < ctx->numThreads; ++i) {
FL2_pthread_join(ctx->threads[i], NULL);
} }
}
void FL2POOL_free(FL2POOL_ctx *ctx) {
if (!ctx) { return; }
FL2POOL_join(ctx);
ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
free(ctx->threads);
free(ctx);
}
size_t FL2POOL_sizeof(FL2POOL_ctx *ctx) {
if (ctx==NULL) return 0; /* supports sizeof NULL */
return sizeof(*ctx)
+ ctx->numThreads * sizeof(ZSTD_pthread_t);
}
void FL2POOL_add(void* ctxVoid, FL2POOL_function function, void *opaque, size_t n) {
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid;
if (!ctx)
return;
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
{ FL2POOL_job const job = {function, opaque, n};
/* Wait until there is space in the queue for the new job */
while (!ctx->queueEmpty && !ctx->shutdown) {
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
}
/* The queue is still going => there is space */
if (!ctx->shutdown) {
ctx->numThreadsBusy++;
ctx->queueEmpty = 0;
ctx->queue = job;
}
}
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
ZSTD_pthread_cond_signal(&ctx->queuePopCond);
}
void FL2POOL_waitAll(void *ctxVoid)
{
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid;
if (!ctx) { return; }
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
while (ctx->numThreadsBusy && !ctx->shutdown) {
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
}
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
}
#endif /* FL2_SINGLETHREAD */

View File

@@ -1,120 +0,0 @@
/**
* Copyright (c) 2016 Tino Reichardt
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
*
* You can contact the author at:
* - zstdmt source repository: https://github.com/mcmilk/zstdmt
*/
#ifndef THREADING_H_938743
#define THREADING_H_938743
#if defined (__cplusplus)
extern "C" {
#endif
#if !defined(FL2_SINGLETHREAD) && defined(_WIN32)
/**
* Windows minimalist Pthread Wrapper, based on :
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
*/
#ifdef WINVER
# undef WINVER
#endif
#define WINVER 0x0600
#ifdef _WIN32_WINNT
# undef _WIN32_WINNT
#endif
#define _WIN32_WINNT 0x0600
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
/* mutex */
#define ZSTD_pthread_mutex_t CRITICAL_SECTION
#define ZSTD_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0)
#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a))
#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a))
#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a))
/* condition variable */
#define ZSTD_pthread_cond_t CONDITION_VARIABLE
#define ZSTD_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0)
#define ZSTD_pthread_cond_destroy(a) /* No delete */
#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a))
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
/* FL2_pthread_create() and FL2_pthread_join() */
typedef struct {
HANDLE handle;
void* (*start_routine)(void*);
void* arg;
} ZSTD_pthread_t;
int FL2_pthread_create(ZSTD_pthread_t* thread, const void* unused,
void* (*start_routine) (void*), void* arg);
int FL2_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
/**
* add here more wrappers as required
*/
#elif !defined(FL2_SINGLETHREAD) /* posix assumed ; need a better detection method */
/* === POSIX Systems === */
# include <pthread.h>
#define ZSTD_pthread_mutex_t pthread_mutex_t
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a))
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a))
#define ZSTD_pthread_cond_t pthread_cond_t
#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b))
#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a))
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b))
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a))
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a))
#define ZSTD_pthread_t pthread_t
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
#else /* FL2_SINGLETHREAD defined */
/* No multithreading support */
typedef int ZSTD_pthread_mutex_t;
#define ZSTD_pthread_mutex_init(a, b) ((void)a, 0)
#define ZSTD_pthread_mutex_destroy(a)
#define ZSTD_pthread_mutex_lock(a)
#define ZSTD_pthread_mutex_unlock(a)
typedef int ZSTD_pthread_cond_t;
#define ZSTD_pthread_cond_init(a, b) ((void)a, 0)
#define ZSTD_pthread_cond_destroy(a)
#define ZSTD_pthread_cond_wait(a, b)
#define ZSTD_pthread_cond_signal(a)
#define ZSTD_pthread_cond_broadcast(a)
/* do not use ZSTD_pthread_t */
#endif /* FL2_SINGLETHREAD */
#if defined (__cplusplus)
}
#endif
#endif /* THREADING_H_938743 */

View File

File diff suppressed because it is too large Load Diff

View File

@@ -10,6 +10,7 @@ Public domain
#include "mem.h"
#include "data_block.h"
#include "radix_mf.h"
#include "atomic.h"
#if defined (__cplusplus)
extern "C" {
@@ -19,14 +20,10 @@ extern "C" {
#define LZMA2_END_MARKER '\0'
#define LZMA_MIN_DICT_BITS 12
#define ENC_MIN_BYTES_PER_THREAD 0x20000
typedef struct FL2_lzmaEncoderCtx_s FL2_lzmaEncoderCtx;
typedef enum {
FL2_fast,
FL2_opt,
FL2_ultra
} FL2_strategy;
typedef struct LZMA2_ECtx_s LZMA2_ECtx;
typedef struct
{
@@ -37,25 +34,28 @@ typedef struct
unsigned match_cycles;
FL2_strategy strategy;
unsigned second_dict_bits;
unsigned random_filter;
unsigned reset_interval;
} FL2_lzma2Parameters;
FL2_lzmaEncoderCtx* FL2_lzma2Create();
LZMA2_ECtx* LZMA2_createECtx(void);
void FL2_lzma2Free(FL2_lzmaEncoderCtx* enc);
void LZMA2_freeECtx(LZMA2_ECtx *const enc);
int FL2_lzma2HashAlloc(FL2_lzmaEncoderCtx* enc, const FL2_lzma2Parameters* options);
int LZMA2_hashAlloc(LZMA2_ECtx *const enc, const FL2_lzma2Parameters* const options);
size_t FL2_lzma2Encode(FL2_lzmaEncoderCtx* enc,
FL2_matchTable* tbl,
const FL2_dataBlock block,
const FL2_lzma2Parameters* options,
FL2_progressFn progress, void* opaque, size_t base, U32 weight);
size_t LZMA2_encode(LZMA2_ECtx *const enc,
FL2_matchTable* const tbl,
FL2_dataBlock const block,
const FL2_lzma2Parameters* const options,
int stream_prop,
FL2_atomic *const progress_in,
FL2_atomic *const progress_out,
int *const canceled);
BYTE FL2_getDictSizeProp(size_t dictionary_size);
BYTE LZMA2_getDictSizeProp(size_t const dictionary_size);
size_t FL2_lzma2MemoryUsage(unsigned chain_log, FL2_strategy strategy, unsigned thread_count);
size_t LZMA2_encMemoryUsage(unsigned const chain_log, FL2_strategy const strategy, unsigned const thread_count);
#if defined (__cplusplus)
}

View File

@@ -28,9 +28,6 @@ extern "C" {
#if defined(_MSC_VER) /* Visual Studio */
# include <stdlib.h> /* _byteswap_ulong */
# include <intrin.h> /* _byteswap_* */
# pragma warning(disable : 4389) /* disable: C4389: '==' : signed/unsigned mismatch */
#endif
#endif
#if defined(__GNUC__)
# define MEM_STATIC static __inline __attribute__((unused))
@@ -42,6 +39,10 @@ extern "C" {
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
#ifndef __has_builtin
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
#endif
/* code only tested on 32 and 64 bits systems */
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
@@ -60,11 +61,23 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
typedef uint64_t U64;
typedef int64_t S64;
#else
# include <limits.h>
#if CHAR_BIT != 8
# error "this implementation requires char to be exactly 8-bit type"
#endif
typedef unsigned char BYTE;
#if USHRT_MAX != 65535
# error "this implementation requires short to be exactly 16-bit type"
#endif
typedef unsigned short U16;
typedef signed short S16;
#if UINT_MAX != 4294967295
# error "this implementation requires int to be exactly 32-bit type"
#endif
typedef unsigned int U32;
typedef signed int S32;
/* note : there are no limits defined for long long type in C90.
* limits exist in C99, however, in such case, <stdint.h> is preferred */
typedef unsigned long long U64;
typedef signed long long S64;
#endif
@@ -189,7 +202,8 @@ MEM_STATIC U32 MEM_swap32(U32 in)
{
#if defined(_MSC_VER) /* Visual Studio */
return _byteswap_ulong(in);
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
return __builtin_bswap32(in);
#else
return ((in << 24) & 0xff000000 ) |
@@ -203,7 +217,8 @@ MEM_STATIC U64 MEM_swap64(U64 in)
{
#if defined(_MSC_VER) /* Visual Studio */
return _byteswap_uint64(in);
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
return __builtin_bswap64(in);
#else
return ((in << 56) & 0xff00000000000000ULL) |

View File

@@ -22,8 +22,8 @@ extern "C" {
****************************************/
#if defined(_MSC_VER)
# define _CRT_SECURE_NO_WARNINGS /* Disable Visual Studio warning messages for fopen, strncpy, strerror */
# if (_MSC_VER <= 1800) /* 1800 == Visual Studio 2013 */
# define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before <io.h> and <windows.h> */
# if (_MSC_VER <= 1800) /* (1800 = Visual Studio 2013) */
# define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */
# endif
#endif
@@ -65,38 +65,55 @@ extern "C" {
/* ************************************************************
* Detect POSIX version
* PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows
* PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX
* PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION
* PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows
* PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX
* PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION
* Value of PLATFORM_POSIX_VERSION can be forced on command line
***************************************************************/
#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
|| defined(__midipix__) || defined(__VMS))
# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.12001 (SUSv3) conformant */ \
#ifndef PLATFORM_POSIX_VERSION
# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
|| defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */
/* exception rule : force posix version to 200112L,
* note: it's better to use unistd.h's _POSIX_VERSION whenever possible */
# define PLATFORM_POSIX_VERSION 200112L
# else
/* try to determine posix version through official unistd.h's _POSIX_VERSION (http://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html).
* note : there is no simple way to know in advance if <unistd.h> is present or not on target system,
* Posix specification mandates its presence and its content, but target system must respect this spec.
* It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like
* otherwise it will block preprocessing stage.
* The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h>
*/
# elif !defined(_WIN32) \
&& (defined(__unix__) || defined(__unix) \
|| defined(__midipix__) || defined(__VMS) || defined(__HAIKU__))
# if defined(__linux__) || defined(__linux)
# ifndef _POSIX_C_SOURCE
# define _POSIX_C_SOURCE 200112L /* use feature test macro */
# define _POSIX_C_SOURCE 200112L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
# endif
# endif
# include <unistd.h> /* declares _POSIX_VERSION */
# if defined(_POSIX_VERSION) /* POSIX compliant */
# define PLATFORM_POSIX_VERSION _POSIX_VERSION
# else
# define PLATFORM_POSIX_VERSION 1
# endif
# else /* non-unix target platform (like Windows) */
# define PLATFORM_POSIX_VERSION 0
# endif
# endif
#endif
#if !defined(PLATFORM_POSIX_VERSION)
# define PLATFORM_POSIX_VERSION -1
#endif
#endif /* PLATFORM_POSIX_VERSION */
/*-*********************************************
* Detect if isatty() and fileno() are available
************************************************/
#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__)
#if (defined(__linux__) && (PLATFORM_POSIX_VERSION > 1)) \
|| (PLATFORM_POSIX_VERSION >= 200112L) \
|| defined(__DJGPP__) \
|| defined(__MSYS__)
# include <unistd.h> /* isatty */
# define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__)
@@ -106,8 +123,7 @@ extern "C" {
# include <io.h> /* _isatty */
# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
# include <stdio.h> /* FILE */
static __inline int IS_CONSOLE(FILE* stdStream)
{
static __inline int IS_CONSOLE(FILE* stdStream) {
DWORD dummy;
return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
}
@@ -117,7 +133,7 @@ static __inline int IS_CONSOLE(FILE* stdStream)
/******************************
* OS-specific Includes
* OS-specific IO behaviors
******************************/
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
# include <fcntl.h> /* _O_BINARY */
@@ -125,7 +141,7 @@ static __inline int IS_CONSOLE(FILE* stdStream)
# if !defined(__DJGPP__)
# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
# include <winioctl.h> /* FSCTL_SET_SPARSE */
# define SET_BINARY_MODE(file) { int unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
# define SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); }
# else
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
@@ -146,6 +162,34 @@ static __inline int IS_CONSOLE(FILE* stdStream)
#endif
#ifndef ZSTD_START_SYMBOLLIST_FRAME
# ifdef __linux__
# define ZSTD_START_SYMBOLLIST_FRAME 2
# elif defined __APPLE__
# define ZSTD_START_SYMBOLLIST_FRAME 4
# else
# define ZSTD_START_SYMBOLLIST_FRAME 0
# endif
#endif
#ifndef ZSTD_SETPRIORITY_SUPPORT
/* mandates presence of <sys/resource.h> and support for setpriority() : http://man7.org/linux/man-pages/man2/setpriority.2.html */
# define ZSTD_SETPRIORITY_SUPPORT (PLATFORM_POSIX_VERSION >= 200112L)
#endif
#ifndef ZSTD_NANOSLEEP_SUPPORT
/* mandates support of nanosleep() within <time.h> : http://man7.org/linux/man-pages/man2/nanosleep.2.html */
# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) \
|| (PLATFORM_POSIX_VERSION >= 200112L)
# define ZSTD_NANOSLEEP_SUPPORT 1
# else
# define ZSTD_NANOSLEEP_SUPPORT 0
# endif
#endif
#if defined (__cplusplus)
}
#endif

View File

@@ -9,7 +9,7 @@
*/
#include "mem.h" /* U32, U64 */
#include "fl2threading.h"
#include "fl2_threading.h"
#include "fl2_internal.h"
#include "radix_internal.h"
@@ -52,10 +52,9 @@ void RMF_bitpackLimitLengths(FL2_matchTable* const tbl, size_t const index)
SetNull(index - 1);
for (U32 length = 2; length < RADIX_MAX_LENGTH && length <= index; ++length) {
U32 const link = tbl->table[index - length];
if (link != RADIX_NULL_LINK) {
if (link != RADIX_NULL_LINK)
tbl->table[index - length] = (MIN(length, link >> RADIX_LINK_BITS) << RADIX_LINK_BITS) | (link & RADIX_LINK_MASK);
}
}
}
#include "radix_engine.h"

View File

@@ -9,80 +9,82 @@
*/
#include <stdio.h>
#include "count.h"
#define MAX_READ_BEYOND_DEPTH 2
/* If a repeating byte is found, fill that section of the table with matches of distance 1 */
static size_t HandleRepeat(FL2_matchTable* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t const block_size, ptrdiff_t i, size_t const radix_16)
static size_t RMF_handleRepeat(RMF_builder* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t i, U32 depth)
{
ptrdiff_t const rpt_index = i - (MAX_REPEAT / 2 - 2);
ptrdiff_t rpt_end;
/* Set the head to the first byte of the repeat and adjust the count */
tbl->list_heads[radix_16].head = (U32)(rpt_index - 1);
tbl->list_heads[radix_16].count -= MAX_REPEAT / 2 - 2;
/* Find the end */
i += ZSTD_count(data_block + i + 2, data_block + i + 1, data_block + block_size);
rpt_end = i;
/* Normally the last 2 bytes, but may be 4 if depth == 4 */
ptrdiff_t const last_2 = i + MAX_REPEAT / 2 - 1;
/* Find the start */
i += (4 - (i & 3)) & 3;
U32 u = *(U32*)(data_block + i);
while (i != 0 && *(U32*)(data_block + i - 4) == u)
i -= 4;
while (i != 0 && data_block[i - 1] == (BYTE)u)
--i;
ptrdiff_t const rpt_index = i;
/* No point if it's in the overlap region */
if (i >= (ptrdiff_t)start) {
U32 len = 2;
if (last_2 >= (ptrdiff_t)start) {
U32 len = depth;
/* Set matches at distance 1 and available length */
for (; i >= rpt_index && len <= RADIX_MAX_LENGTH; --i) {
for (i = last_2; i > rpt_index && len <= RADIX_MAX_LENGTH; --i) {
SetMatchLinkAndLength(i, (U32)(i - 1), len);
++len;
}
/* Set matches at distance 1 and max length */
for (; i >= rpt_index; --i) {
for (; i > rpt_index; --i)
SetMatchLinkAndLength(i, (U32)(i - 1), RADIX_MAX_LENGTH);
}
}
return rpt_end;
return rpt_index;
}
/* If a 2-byte repeat is found, fill that section of the table with matches of distance 2 */
static size_t HandleRepeat2(FL2_matchTable* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t const block_size, ptrdiff_t i, size_t const radix_16)
static size_t RMF_handleRepeat2(RMF_builder* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t i, U32 depth)
{
size_t radix_16_rev;
ptrdiff_t const rpt_index = i - (MAX_REPEAT - 3);
ptrdiff_t rpt_end;
/* Normally the last 2 bytes, but may be 4 if depth == 4 */
ptrdiff_t const last_2 = i + MAX_REPEAT * 2U - 4;
/* Set the head to the first byte of the repeat and adjust the count */
tbl->list_heads[radix_16].head = (U32)(rpt_index - 1);
tbl->list_heads[radix_16].count -= MAX_REPEAT / 2 - 2;
radix_16_rev = ((radix_16 >> 8) | (radix_16 << 8)) & 0xFFFF;
tbl->list_heads[radix_16_rev].head = (U32)(rpt_index - 2);
tbl->list_heads[radix_16_rev].count -= MAX_REPEAT / 2 - 1;
/* Find the end */
i += ZSTD_count(data_block + i + 2, data_block + i, data_block + block_size);
rpt_end = i;
/* Find the start */
ptrdiff_t realign = i & 1;
i += (4 - (i & 3)) & 3;
U32 u = *(U32*)(data_block + i);
while (i != 0 && *(U32*)(data_block + i - 4) == u)
i -= 4;
while (i != 0 && data_block[i - 1] == data_block[i + 1])
--i;
i += (i & 1) ^ realign;
ptrdiff_t const rpt_index = i;
/* No point if it's in the overlap region */
if (i >= (ptrdiff_t)start) {
U32 len = 2;
U32 len = depth + (data_block[last_2 + depth] == data_block[last_2]);
/* Set matches at distance 2 and available length */
for (; i >= rpt_index && len <= RADIX_MAX_LENGTH; --i) {
for (i = last_2; i > rpt_index && len <= RADIX_MAX_LENGTH; i -= 2) {
SetMatchLinkAndLength(i, (U32)(i - 2), len);
++len;
len += 2;
}
/* Set matches at distance 2 and max length */
for (; i >= rpt_index; --i) {
for (; i > rpt_index; i -= 2)
SetMatchLinkAndLength(i, (U32)(i - 2), RADIX_MAX_LENGTH);
}
}
return rpt_end;
return rpt_index;
}
/* Initialization for the reference algortithm */
#ifdef RMF_REFERENCE
static void RadixInitReference(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end)
static void RMF_initReference(FL2_matchTable* const tbl, const void* const data, size_t const end)
{
const BYTE* const data_block = (const BYTE*)data;
ptrdiff_t const block_size = end - 1;
size_t st_index = 0;
for (ptrdiff_t i = 0; i < block_size; ++i)
{
size_t radix_16 = ((size_t)data_block[i] << 8) | data_block[i + 1];
U32 prev = tbl->list_heads[radix_16].head;
size_t const radix_16 = ((size_t)data_block[i] << 8) | data_block[i + 1];
U32 const prev = tbl->list_heads[radix_16].head;
if (prev != RADIX_NULL_LINK) {
SetMatchLinkAndLength(i, prev, 2U);
tbl->list_heads[radix_16].head = (U32)i;
@@ -98,7 +100,6 @@ static void RadixInitReference(FL2_matchTable* const tbl, const void* const data
SetNull(end - 1);
tbl->end_index = (U32)st_index;
tbl->st_index = ATOMIC_INITIAL_VALUE;
(void)start;
}
#endif
@@ -108,48 +109,43 @@ RMF_bitpackInit
#else
RMF_structuredInit
#endif
(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end)
(FL2_matchTable* const tbl, const void* const data, size_t const end)
{
const BYTE* const data_block = (const BYTE*)data;
size_t st_index = 0;
size_t radix_16;
ptrdiff_t const block_size = end - 2;
ptrdiff_t rpt_total = 0;
U32 count = 0;
if (end <= 2) {
for (size_t i = 0; i < end; ++i) {
for (size_t i = 0; i < end; ++i)
SetNull(i);
}
tbl->end_index = 0;
return 0;
}
#ifdef RMF_REFERENCE
if (tbl->params.use_ref_mf) {
RadixInitReference(tbl, data, start, end);
RMF_initReference(tbl, data, end);
return 0;
}
#endif
SetNull(0);
const BYTE* const data_block = (const BYTE*)data;
size_t st_index = 0;
/* Initial 2-byte radix value */
radix_16 = ((size_t)data_block[0] << 8) | data_block[1];
size_t radix_16 = ((size_t)data_block[0] << 8) | data_block[1];
tbl->stack[st_index++] = (U32)radix_16;
tbl->list_heads[radix_16].head = 0;
tbl->list_heads[radix_16].count = 1;
radix_16 = ((size_t)((BYTE)radix_16) << 8) | data_block[2];
ptrdiff_t rpt_total = 0;
ptrdiff_t i = 1;
ptrdiff_t const block_size = end - 2;
for (; i < block_size; ++i) {
/* Pre-load the next value for speed increase */
/* Pre-load the next value for speed increase on some hardware. Execution can continue while memory read is pending */
size_t const next_radix = ((size_t)((BYTE)radix_16) << 8) | data_block[i + 2];
U32 const prev = tbl->list_heads[radix_16].head;
if (prev != RADIX_NULL_LINK) {
S32 dist = (S32)i - prev;
/* Check for repeat */
if (dist > 2) {
count = 0;
/* Link this position to the previous occurance */
/* Link this position to the previous occurrence */
InitMatchLink(i, prev);
/* Set the previous to this position */
tbl->list_heads[radix_16].head = (U32)i;
@@ -157,33 +153,6 @@ RMF_structuredInit
radix_16 = next_radix;
}
else {
count += 3 - dist;
/* Do the usual if the repeat is too short */
if (count < MAX_REPEAT - 2) {
InitMatchLink(i, prev);
tbl->list_heads[radix_16].head = (U32)i;
++tbl->list_heads[radix_16].count;
radix_16 = next_radix;
}
else {
ptrdiff_t const prev_i = i;
/* Eliminate the repeat from the linked list to save time */
if (dist == 1) {
i = HandleRepeat(tbl, data_block, start, end, i, radix_16);
rpt_total += i - prev_i + MAX_REPEAT / 2U - 1;
}
else {
i = HandleRepeat2(tbl, data_block, start, end, i, radix_16);
rpt_total += i - prev_i + MAX_REPEAT - 2;
}
if (i < block_size)
radix_16 = ((size_t)data_block[i + 1] << 8) | data_block[i + 2];
count = 0;
}
}
}
else {
count = 0;
SetNull(i);
tbl->list_heads[radix_16].head = (U32)i;
tbl->list_heads[radix_16].count = 1;
@@ -192,65 +161,100 @@ RMF_structuredInit
}
}
/* Handle the last value */
if (i <= block_size && tbl->list_heads[radix_16].head != RADIX_NULL_LINK) {
if (tbl->list_heads[radix_16].head != RADIX_NULL_LINK)
SetMatchLinkAndLength(block_size, tbl->list_heads[radix_16].head, 2);
}
else {
else
SetNull(block_size);
}
/* Never a match at the last byte */
SetNull(end - 1);
tbl->end_index = (U32)st_index;
tbl->st_index = ATOMIC_INITIAL_VALUE;
return rpt_total;
}
#if defined(_MSC_VER)
# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable */
#endif
/* Copy the list into a buffer and recurse it there. This decreases cache misses and allows */
/* data characters to be loaded every fourth pass and stored for use in the next 4 passes */
static void RecurseListsBuffered(RMF_builder* const tbl,
static void RMF_recurseListsBuffered(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_start,
size_t link,
BYTE depth,
BYTE const max_depth,
U32 depth,
U32 const max_depth,
U32 orig_list_count,
size_t const stack_base)
{
if (orig_list_count < 2 || tbl->match_buffer_limit < 2)
return;
/* Create an offset data buffer pointer for reading the next bytes */
const BYTE* data_src = data_block + depth;
size_t start = 0;
if (orig_list_count < 2 || tbl->match_buffer_limit < 2)
return;
do {
size_t count = start;
U32 list_count = (U32)(start + orig_list_count);
U32 overlap;
if (list_count > tbl->match_buffer_limit) {
if (list_count > tbl->match_buffer_limit)
list_count = (U32)tbl->match_buffer_limit;
}
size_t count = start;
size_t prev_link = (size_t)-1;
size_t rpt = 0;
size_t rpt_tail = link;
for (; count < list_count; ++count) {
/* Pre-load next link */
size_t const next_link = GetMatchLink(link);
size_t dist = prev_link - link;
if (dist > 2) {
/* Get 4 data characters for later. This doesn't block on a cache miss. */
tbl->match_buffer[count].src.u32 = MEM_read32(data_src + link);
/* Record the actual location of this suffix */
tbl->match_buffer[count].from = (U32)link;
/* Initialize the next link */
tbl->match_buffer[count].next = (U32)(count + 1) | ((U32)depth << 24);
tbl->match_buffer[count].next = (U32)(count + 1) | (depth << 24);
rpt = 0;
prev_link = link;
rpt_tail = link;
link = next_link;
}
else {
rpt += 3 - dist;
/* Do the usual if the repeat is too short */
if (rpt < MAX_REPEAT - 2) {
/* Get 4 data characters for later. This doesn't block on a cache miss. */
tbl->match_buffer[count].src.u32 = MEM_read32(data_src + link);
/* Record the actual location of this suffix */
tbl->match_buffer[count].from = (U32)link;
/* Initialize the next link */
tbl->match_buffer[count].next = (U32)(count + 1) | (depth << 24);
prev_link = link;
link = next_link;
}
else {
/* Eliminate the repeat from the linked list to save time */
if (dist == 1) {
link = RMF_handleRepeat(tbl, data_block, block_start, link, depth);
count -= MAX_REPEAT / 2;
orig_list_count -= (U32)(rpt_tail - link);
}
else {
link = RMF_handleRepeat2(tbl, data_block, block_start, link, depth);
count -= MAX_REPEAT - 1;
orig_list_count -= (U32)(rpt_tail - link) >> 1;
}
rpt = 0;
list_count = (U32)(start + orig_list_count);
if (list_count > tbl->match_buffer_limit)
list_count = (U32)tbl->match_buffer_limit;
}
}
}
count = list_count;
/* Make the last element circular so pre-loading doesn't read past the end. */
tbl->match_buffer[count - 1].next = (U32)(count - 1) | ((U32)depth << 24);
overlap = 0;
tbl->match_buffer[count - 1].next = (U32)(count - 1) | (depth << 24);
U32 overlap = 0;
if (list_count < (U32)(start + orig_list_count)) {
overlap = list_count >> MATCH_BUFFER_OVERLAP;
overlap += !overlap;
@@ -259,23 +263,33 @@ static void RecurseListsBuffered(RMF_builder* const tbl,
orig_list_count -= (U32)(list_count - start);
/* Copy everything back, except the last link which never changes, and any extra overlap */
count -= overlap + (overlap == 0);
#ifdef RMF_BITPACK
if (max_depth > RADIX_MAX_LENGTH) for (size_t index = 0; index < count; ++index) {
size_t const from = tbl->match_buffer[index].from;
if (from < block_start)
return;
U32 length = tbl->match_buffer[index].next >> 24;
length = (length > RADIX_MAX_LENGTH) ? RADIX_MAX_LENGTH : length;
size_t const next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
SetMatchLinkAndLength(from, tbl->match_buffer[next].from, length);
}
else
#endif
for (size_t index = 0; index < count; ++index) {
size_t const from = tbl->match_buffer[index].from;
if (from < block_start)
return;
{ U32 length = tbl->match_buffer[index].next >> 24;
size_t next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
U32 const length = tbl->match_buffer[index].next >> 24;
size_t const next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
SetMatchLinkAndLength(from, tbl->match_buffer[next].from, length);
}
}
start = 0;
if (overlap) {
size_t dest = 0;
for (size_t src = list_count - overlap; src < list_count; ++src) {
tbl->match_buffer[dest].from = tbl->match_buffer[src].from;
tbl->match_buffer[dest].src.u32 = MEM_read32(data_src + tbl->match_buffer[src].from);
tbl->match_buffer[dest].next = (U32)(dest + 1) | ((U32)depth << 24);
tbl->match_buffer[dest].next = (U32)(dest + 1) | (depth << 24);
++dest;
}
start = dest;
@@ -283,30 +297,23 @@ static void RecurseListsBuffered(RMF_builder* const tbl,
} while (orig_list_count != 0);
}
/* Parse the list with bounds checks on data reads. Stop at the point where bound checks are not required. */
/* Parse the list with an upper bound check on data reads. Stop at the point where bound checks are not required. */
/* Buffering is used so that parsing can continue below the bound to find a few matches without altering the main table. */
static void RecurseListsBound(RMF_builder* const tbl,
static void RMF_recurseListsBound(RMF_builder* const tbl,
const BYTE* const data_block,
ptrdiff_t const block_size,
RMF_tableHead* const list_head,
U32 const max_depth)
U32 max_depth)
{
U32 list_count = list_head->count;
if (list_count < 2)
return;
ptrdiff_t link = list_head->head;
ptrdiff_t const bounded_size = max_depth + MAX_READ_BEYOND_DEPTH;
ptrdiff_t const bounded_start = block_size - MIN(block_size, bounded_size);
/* Create an offset data buffer pointer for reading the next bytes */
size_t count = 0;
size_t extra_count = (max_depth >> 4) + 4;
ptrdiff_t limit;
const BYTE* data_src;
U32 depth;
size_t index;
size_t st_index;
RMF_listTail* tails_8;
if (list_count < 2)
return;
list_count = MIN((U32)bounded_size, list_count);
list_count = MIN(list_count, (U32)tbl->match_buffer_size);
@@ -314,10 +321,9 @@ static void RecurseListsBound(RMF_builder* const tbl,
ptrdiff_t next_link = GetMatchLink(link);
if (link >= bounded_start) {
--list_head->count;
if (next_link < bounded_start) {
if (next_link < bounded_start)
list_head->head = (U32)next_link;
}
}
else {
--extra_count;
}
@@ -328,18 +334,20 @@ static void RecurseListsBound(RMF_builder* const tbl,
link = next_link;
}
list_count = (U32)count;
limit = block_size - 2;
data_src = data_block + 2;
depth = 3;
index = 0;
st_index = 0;
tails_8 = tbl->tails_8;
ptrdiff_t limit = block_size - 2;
/* Create an offset data buffer pointer for reading the next bytes */
const BYTE* data_src = data_block + 2;
U32 depth = 3;
size_t index = 0;
size_t st_index = 0;
RMF_listTail* const tails_8 = tbl->tails_8;
do {
link = tbl->match_buffer[index].from;
if (link < limit) {
size_t const radix_8 = data_src[link];
/* Seen this char before? */
const U32 prev = tails_8[radix_8].prev_index;
U32 const prev = tails_8[radix_8].prev_index;
tails_8[radix_8].prev_index = (U32)index;
if (prev != RADIX_NULL_LINK) {
++tails_8[radix_8].list_count;
/* Link the previous occurrence to this one and record the new length */
@@ -353,7 +361,6 @@ static void RecurseListsBound(RMF_builder* const tbl,
tbl->stack[st_index].count = (U32)radix_8;
++st_index;
}
tails_8[radix_8].prev_index = (U32)index;
}
++index;
} while (index < list_count);
@@ -368,10 +375,9 @@ static void RecurseListsBound(RMF_builder* const tbl,
/* Pop an item off the stack */
--st_index;
list_count = tbl->stack[st_index].count;
if (list_count < 2) {
/* Nothing to match with */
if (list_count < 2) /* Nothing to match with */
continue;
}
index = tbl->stack[st_index].head;
depth = (tbl->match_buffer[index].next >> 24);
if (depth >= max_depth)
@@ -390,9 +396,10 @@ static void RecurseListsBound(RMF_builder* const tbl,
if (link < limit) {
size_t const radix_8 = data_src[link];
U32 const prev = tails_8[radix_8].prev_index;
tails_8[radix_8].prev_index = (U32)index;
if (prev != RADIX_NULL_LINK) {
++tails_8[radix_8].list_count;
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
else {
tails_8[radix_8].list_count = 1;
@@ -400,7 +407,6 @@ static void RecurseListsBound(RMF_builder* const tbl,
tbl->stack[st_index].count = (U32)radix_8;
++st_index;
}
tails_8[radix_8].prev_index = (U32)index;
}
index = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
} while (--list_count != 0);
@@ -413,20 +419,20 @@ static void RecurseListsBound(RMF_builder* const tbl,
--count;
for (index = 0; index < count; ++index) {
ptrdiff_t const from = tbl->match_buffer[index].from;
size_t next;
U32 length;
if (from < bounded_start)
break;
length = tbl->match_buffer[index].next >> 24;
U32 length = tbl->match_buffer[index].next >> 24;
length = MIN(length, (U32)(block_size - from));
next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
length = MIN(length, RADIX_MAX_LENGTH);
size_t const next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
SetMatchLinkAndLength(from, tbl->match_buffer[next].from, length);
}
}
/* Compare each string with all others to find the best match */
static void BruteForce(RMF_builder* const tbl,
static void RMF_bruteForce(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_start,
size_t link,
@@ -445,6 +451,7 @@ static void BruteForce(RMF_builder* const tbl,
link = GetMatchLink(link);
buffer[i] = link;
} while (++i < list_count);
i = 0;
do {
size_t longest = 0;
@@ -454,34 +461,37 @@ static void BruteForce(RMF_builder* const tbl,
do {
const BYTE* data_2 = data_src + buffer[j];
size_t len_test = 0;
while (data[len_test] == data_2[len_test] && len_test < limit) {
while (data[len_test] == data_2[len_test] && len_test < limit)
++len_test;
}
if (len_test > longest) {
longest_index = j;
longest = len_test;
if (len_test >= limit) {
if (len_test >= limit)
break;
}
}
} while (++j < list_count);
if (longest > 0) {
SetMatchLinkAndLength(buffer[i],
(U32)buffer[longest_index],
depth + (U32)longest);
}
if (longest > 0)
SetMatchLinkAndLength(buffer[i], (U32)buffer[longest_index], depth + (U32)longest);
++i;
/* Test with block_start to avoid wasting time matching strings in the overlap region with each other */
} while (i < list_count - 1 && buffer[i] >= block_start);
}
static void RecurseLists16(RMF_builder* const tbl,
/* RMF_recurseLists16() :
* Match strings at depth 2 using a 16-bit radix to lengthen to depth 4
*/
static void RMF_recurseLists16(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_start,
size_t link,
U32 count,
U32 const max_depth)
{
/* Offset data pointer. This method is only called at depth 2 */
U32 const table_max_depth = MIN(max_depth, RADIX_MAX_LENGTH);
/* Offset data pointer. This function is only called at depth 2 */
const BYTE* const data_src = data_block + 2;
/* Load radix values from the data chars */
size_t next_radix_8 = data_src[link];
@@ -489,7 +499,6 @@ static void RecurseLists16(RMF_builder* const tbl,
size_t reset_list[RADIX8_TABLE_SIZE];
size_t reset_count = 0;
size_t st_index = 0;
U32 prev;
/* Last one is done separately */
--count;
do
@@ -504,7 +513,8 @@ static void RecurseLists16(RMF_builder* const tbl,
next_radix_8 = data_src[next_link];
next_radix_16 = next_radix_8 + ((size_t)(data_src[next_link + 1]) << 8);
prev = tbl->tails_8[radix_8].prev_index;
U32 prev = tbl->tails_8[radix_8].prev_index;
tbl->tails_8[radix_8].prev_index = (U32)link;
if (prev != RADIX_NULL_LINK) {
/* Link the previous occurrence to this one at length 3. */
/* This will be overwritten if a 4 is found. */
@@ -513,9 +523,9 @@ static void RecurseLists16(RMF_builder* const tbl,
else {
reset_list[reset_count++] = radix_8;
}
tbl->tails_8[radix_8].prev_index = (U32)link;
prev = tbl->tails_16[radix_16].prev_index;
tbl->tails_16[radix_16].prev_index = (U32)link;
if (prev != RADIX_NULL_LINK) {
++tbl->tails_16[radix_16].list_count;
/* Link at length 4, overwriting the 3 */
@@ -524,35 +534,35 @@ static void RecurseLists16(RMF_builder* const tbl,
else {
tbl->tails_16[radix_16].list_count = 1;
tbl->stack[st_index].head = (U32)link;
/* Store a reference to this table location to retrieve the count at the end */
tbl->stack[st_index].count = (U32)radix_16;
++st_index;
}
tbl->tails_16[radix_16].prev_index = (U32)link;
link = next_link;
} while (--count > 0);
/* Do the last location */
prev = tbl->tails_8[next_radix_8].prev_index;
if (prev != RADIX_NULL_LINK) {
U32 prev = tbl->tails_8[next_radix_8].prev_index;
if (prev != RADIX_NULL_LINK)
SetMatchLinkAndLength(prev, (U32)link, 3);
}
prev = tbl->tails_16[next_radix_16].prev_index;
if (prev != RADIX_NULL_LINK) {
++tbl->tails_16[next_radix_16].list_count;
SetMatchLinkAndLength(prev, (U32)link, 4);
}
for (size_t i = 0; i < reset_count; ++i) {
for (size_t i = 0; i < reset_count; ++i)
tbl->tails_8[reset_list[i]].prev_index = RADIX_NULL_LINK;
}
for (size_t i = 0; i < st_index; ++i) {
tbl->tails_16[tbl->stack[i].count].prev_index = RADIX_NULL_LINK;
tbl->stack[i].count = tbl->tails_16[tbl->stack[i].count].list_count;
}
while (st_index > 0) {
U32 list_count;
U32 depth;
while (st_index > 0) {
--st_index;
list_count = tbl->stack[st_index].count;
U32 const list_count = tbl->stack[st_index].count;
if (list_count < 2) {
/* Nothing to do */
continue;
@@ -567,19 +577,19 @@ static void RecurseLists16(RMF_builder* const tbl,
continue;
}
/* The current depth */
depth = GetMatchLength(link);
U32 const depth = GetMatchLength(link);
if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) {
/* Quicker to use brute force, each string compared with all previous strings */
BruteForce(tbl, data_block,
RMF_bruteForce(tbl, data_block,
block_start,
link,
list_count,
depth,
max_depth);
table_max_depth);
continue;
}
/* Send to the buffer at depth 4 */
RecurseListsBuffered(tbl,
RMF_recurseListsBuffered(tbl,
data_block,
block_start,
link,
@@ -591,7 +601,10 @@ static void RecurseLists16(RMF_builder* const tbl,
}
#if 0
static void RecurseListsUnbuf16(RMF_builder* const tbl,
/* Unbuffered complete processing to max_depth.
* This may be faster on CPUs without a large memory cache.
*/
static void RMF_recurseListsUnbuf16(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_start,
size_t link,
@@ -607,7 +620,6 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
size_t reset_list[RADIX8_TABLE_SIZE];
size_t reset_count = 0;
size_t st_index = 0;
U32 prev;
/* Last one is done separately */
--count;
do
@@ -620,7 +632,7 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
size_t radix_16 = next_radix_16;
next_radix_8 = data_src[next_link];
next_radix_16 = next_radix_8 + ((size_t)(data_src[next_link + 1]) << 8);
prev = tails_8[radix_8].prev_index;
U32 prev = tails_8[radix_8].prev_index;
if (prev != RADIX_NULL_LINK) {
/* Link the previous occurrence to this one at length 3. */
/* This will be overwritten if a 4 is found. */
@@ -646,7 +658,7 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
link = next_link;
} while (--count > 0);
/* Do the last location */
prev = tails_8[next_radix_8].prev_index;
U32 prev = tails_8[next_radix_8].prev_index;
if (prev != RADIX_NULL_LINK) {
SetMatchLinkAndLength(prev, (U32)link, 3);
}
@@ -683,7 +695,7 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
U32 depth = GetMatchLength(link);
if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) {
/* Quicker to use brute force, each string compared with all previous strings */
BruteForce(tbl, data_block,
RMF_bruteForce(tbl, data_block,
block_start,
link,
list_count,
@@ -800,7 +812,7 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
#ifdef RMF_REFERENCE
/* Simple, slow, complete parsing for reference */
static void RecurseListsReference(RMF_builder* const tbl,
static void RMF_recurseListsReference(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_size,
size_t link,
@@ -836,12 +848,8 @@ static void RecurseListsReference(RMF_builder* const tbl,
}
memset(tbl->tails_8, 0xFF, sizeof(tbl->tails_8));
while (st_index > 0) {
U32 list_count;
U32 depth;
size_t prev_st_index;
--st_index;
list_count = tbl->stack[st_index].count;
U32 list_count = tbl->stack[st_index].count;
if (list_count < 2) {
/* Nothing to do */
continue;
@@ -854,14 +862,14 @@ static void RecurseListsReference(RMF_builder* const tbl,
}
link = tbl->stack[st_index].head;
/* The current depth */
depth = GetMatchLength(link);
U32 depth = GetMatchLength(link);
if (depth >= max_depth)
continue;
data_src = data_block + depth;
limit = block_size - depth;
/* Next depth for 1 extra char */
++depth;
prev_st_index = st_index;
size_t prev_st_index = st_index;
do {
if (link < limit) {
size_t const radix_8 = data_src[link];
@@ -890,21 +898,29 @@ static void RecurseListsReference(RMF_builder* const tbl,
#endif /* RMF_REFERENCE */
/* Atomically take a list from the head table */
static ptrdiff_t RMF_getNextList(FL2_matchTable* const tbl, unsigned const multi_thread)
static ptrdiff_t RMF_getNextList_mt(FL2_matchTable* const tbl)
{
if (tbl->st_index < tbl->end_index) {
long index = multi_thread ? FL2_atomic_increment(tbl->st_index) : FL2_nonAtomic_increment(tbl->st_index);
if (index < tbl->end_index) {
long index = FL2_atomic_increment(tbl->st_index);
if (index < tbl->end_index)
return index;
}
}
return -1;
}
#define UPDATE_INTERVAL 0x40000U
/* Non-atomically take a list from the head table */
static ptrdiff_t RMF_getNextList_st(FL2_matchTable* const tbl)
{
if (tbl->st_index < tbl->end_index) {
long index = FL2_nonAtomic_increment(tbl->st_index);
if (index < tbl->end_index)
return index;
}
return -1;
}
/* Iterate the head table concurrently with other threads, and recurse each list until max_depth is reached */
int
void
#ifdef RMF_BITPACK
RMF_bitpackBuildTable
#else
@@ -913,69 +929,58 @@ RMF_structuredBuildTable
(FL2_matchTable* const tbl,
size_t const job,
unsigned const multi_thread,
FL2_dataBlock const block,
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done)
FL2_dataBlock const block)
{
if (!block.end)
return 0;
U64 const enc_size = block.end - block.start;
if (block.end == 0)
return;
unsigned const best = !tbl->params.divide_and_conquer;
unsigned const max_depth = MIN(tbl->params.depth, RADIX_MAX_LENGTH) & ~1;
size_t const bounded_start = block.end - max_depth - MAX_READ_BEYOND_DEPTH;
ptrdiff_t next_progress = 0;
size_t update = UPDATE_INTERVAL;
size_t total = init_done;
unsigned const max_depth = MIN(tbl->params.depth, STRUCTURED_MAX_LENGTH) & ~1;
size_t bounded_start = max_depth + MAX_READ_BEYOND_DEPTH;
bounded_start = block.end - MIN(block.end, bounded_start);
ptrdiff_t next_progress = (job == 0) ? 0 : RADIX16_TABLE_SIZE;
ptrdiff_t(*getNextList)(FL2_matchTable* const tbl)
= multi_thread ? RMF_getNextList_mt : RMF_getNextList_st;
for (;;)
{
/* Get the next to process */
ptrdiff_t index = RMF_getNextList(tbl, multi_thread);
RMF_tableHead list_head;
ptrdiff_t index = getNextList(tbl);
if (index < 0) {
if (index < 0)
break;
}
if (progress) {
while (next_progress < index) {
total += tbl->list_heads[tbl->stack[next_progress]].count;
/* initial value of next_progress ensures only thread 0 executes this */
tbl->progress += tbl->list_heads[tbl->stack[next_progress]].count;
++next_progress;
}
if (total >= update) {
if (progress((size_t)((total * enc_size / block.end * weight) >> 4), opaque)) {
FL2_atomic_add(tbl->st_index, RADIX16_TABLE_SIZE);
return 1;
}
update = total + UPDATE_INTERVAL;
}
}
index = tbl->stack[index];
list_head = tbl->list_heads[index];
RMF_tableHead list_head = tbl->list_heads[index];
tbl->list_heads[index].head = RADIX_NULL_LINK;
if (list_head.count < 2 || list_head.head < block.start) {
if (list_head.count < 2 || list_head.head < block.start)
continue;
}
#ifdef RMF_REFERENCE
if (tbl->params.use_ref_mf) {
RecurseListsReference(tbl->builders[job], block.data, block.end, list_head.head, list_head.count, max_depth);
RMF_recurseListsReference(tbl->builders[job], block.data, block.end, list_head.head, list_head.count, max_depth);
continue;
}
#endif
if (list_head.head >= bounded_start) {
RecurseListsBound(tbl->builders[job], block.data, block.end, &list_head, (BYTE)max_depth);
if (list_head.count < 2 || list_head.head < block.start) {
RMF_recurseListsBound(tbl->builders[job], block.data, block.end, &list_head, max_depth);
if (list_head.count < 2 || list_head.head < block.start)
continue;
}
}
if (best && list_head.count > tbl->builders[job]->match_buffer_limit)
{
/* Not worth buffering or too long */
RecurseLists16(tbl->builders[job], block.data, block.start, list_head.head, list_head.count, max_depth);
RMF_recurseLists16(tbl->builders[job], block.data, block.start, list_head.head, list_head.count, max_depth);
}
else {
RecurseListsBuffered(tbl->builders[job], block.data, block.start, list_head.head, 2, (BYTE)max_depth, list_head.count, 0);
RMF_recurseListsBuffered(tbl->builders[job], block.data, block.start, list_head.head, 2, (BYTE)max_depth, list_head.count, 0);
}
}
return 0;
}
int
@@ -984,28 +989,24 @@ RMF_bitpackIntegrityCheck
#else
RMF_structuredIntegrityCheck
#endif
(const FL2_matchTable* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth)
(const FL2_matchTable* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned max_depth)
{
max_depth &= ~1;
int err = 0;
for (index += !index; index < end; ++index) {
U32 link;
U32 length;
U32 len_test;
U32 limit;
if (IsNull(index))
continue;
link = GetMatchLink(index);
U32 const link = GetMatchLink(index);
if (link >= index) {
printf("Forward link at %X to %u\r\n", (U32)index, link);
err = 1;
continue;
}
length = GetMatchLength(index);
U32 const length = GetMatchLength(index);
if (index && length < RADIX_MAX_LENGTH && link - 1 == GetMatchLink(index - 1) && length + 1 == GetMatchLength(index - 1))
continue;
len_test = 0;
limit = MIN((U32)(end - index), RADIX_MAX_LENGTH);
U32 len_test = 0;
U32 const limit = MIN((U32)(end - index), RADIX_MAX_LENGTH);
for (; len_test < limit && data[link + len_test] == data[index + len_test]; ++len_test) {
}
if (len_test < length) {
@@ -1013,63 +1014,8 @@ RMF_structuredIntegrityCheck
err = 1;
}
if (length < max_depth && len_test > length)
/* These occur occasionally due to splitting of chains in the buffer when long repeats are present */
printf("Shortened match at %X: %u of %u\r\n", (U32)index, length, len_test);
}
return err;
}
static size_t ExtendMatch(const FL2_matchTable* const tbl,
const BYTE* const data,
ptrdiff_t const start_index,
ptrdiff_t const limit,
U32 const link,
size_t const length)
{
ptrdiff_t end_index = start_index + length;
ptrdiff_t const dist = start_index - link;
while (end_index < limit && end_index - (ptrdiff_t)GetMatchLink(end_index) == dist) {
end_index += GetMatchLength(end_index);
}
if (end_index >= limit) {
return limit - start_index;
}
while (end_index < limit && data[end_index - dist] == data[end_index]) {
++end_index;
}
return end_index - start_index;
}
size_t
#ifdef RMF_BITPACK
RMF_bitpackGetMatch
#else
RMF_structuredGetMatch
#endif
(const FL2_matchTable* const tbl,
const BYTE* const data,
size_t const index,
size_t const limit,
unsigned const max_depth,
size_t* const offset_ptr)
{
size_t length;
size_t dist;
U32 link;
if (IsNull(index))
return 0;
link = GetMatchLink(index);
length = GetMatchLength(index);
if (length < 2)
return 0;
dist = index - link;
*offset_ptr = dist;
if (length > limit - index)
return limit - index;
if (length == max_depth
|| length == RADIX_MAX_LENGTH /* from HandleRepeat */)
{
length = ExtendMatch(tbl, data, index, limit, link, length);
}
return length;
}

210
C/fast-lzma2/radix_get.h Normal file
View File

@@ -0,0 +1,210 @@
/*
* Copyright (c) 2018, Conor McCarthy
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#ifndef FL2_RADIX_GET_H_
#define FL2_RADIX_GET_H_
#if defined (__cplusplus)
extern "C" {
#endif
typedef struct
{
U32 length;
U32 dist;
} RMF_match;
static size_t RMF_bitpackExtendMatch(const BYTE* const data,
const U32* const table,
ptrdiff_t const start_index,
ptrdiff_t limit,
U32 const link,
size_t const length)
{
ptrdiff_t end_index = start_index + length;
ptrdiff_t const dist = start_index - link;
if (limit > start_index + (ptrdiff_t)kMatchLenMax)
limit = start_index + kMatchLenMax;
while (end_index < limit && end_index - (ptrdiff_t)(table[end_index] & RADIX_LINK_MASK) == dist)
end_index += table[end_index] >> RADIX_LINK_BITS;
if (end_index >= limit) {
DEBUGLOG(7, "RMF_bitpackExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(limit - start_index));
return limit - start_index;
}
while (end_index < limit && data[end_index - dist] == data[end_index])
++end_index;
DEBUGLOG(7, "RMF_bitpackExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(end_index - start_index));
return end_index - start_index;
}
#define GetMatchLink(table, index) ((const RMF_unit*)(table))[(index) >> UNIT_BITS].links[(index) & UNIT_MASK]
#define GetMatchLength(table, index) ((const RMF_unit*)(table))[(index) >> UNIT_BITS].lengths[(index) & UNIT_MASK]
static size_t RMF_structuredExtendMatch(const BYTE* const data,
const U32* const table,
ptrdiff_t const start_index,
ptrdiff_t limit,
U32 const link,
size_t const length)
{
ptrdiff_t end_index = start_index + length;
ptrdiff_t const dist = start_index - link;
if (limit > start_index + (ptrdiff_t)kMatchLenMax)
limit = start_index + kMatchLenMax;
while (end_index < limit && end_index - (ptrdiff_t)GetMatchLink(table, end_index) == dist)
end_index += GetMatchLength(table, end_index);
if (end_index >= limit) {
DEBUGLOG(7, "RMF_structuredExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(limit - start_index));
return limit - start_index;
}
while (end_index < limit && data[end_index - dist] == data[end_index])
++end_index;
DEBUGLOG(7, "RMF_structuredExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(end_index - start_index));
return end_index - start_index;
}
FORCE_INLINE_TEMPLATE
RMF_match RMF_getMatch(FL2_dataBlock block,
FL2_matchTable* tbl,
unsigned max_depth,
int structTbl,
size_t index)
{
if (structTbl)
{
U32 const link = GetMatchLink(tbl->table, index);
RMF_match match;
match.length = 0;
if (link == RADIX_NULL_LINK)
return match;
size_t const length = GetMatchLength(tbl->table, index);
size_t const dist = index - link - 1;
if (length > block.end - index)
match.length = (U32)(block.end - index);
else if (length == max_depth || length == STRUCTURED_MAX_LENGTH /* from HandleRepeat */)
match.length = (U32)RMF_structuredExtendMatch(block.data, tbl->table, index, block.end, link, length);
else
match.length = (U32)length;
match.dist = (U32)dist;
return match;
}
else {
U32 link = tbl->table[index];
RMF_match match;
match.length = 0;
if (link == RADIX_NULL_LINK)
return match;
size_t const length = link >> RADIX_LINK_BITS;
link &= RADIX_LINK_MASK;
size_t const dist = index - link - 1;
if (length > block.end - index)
match.length = (U32)(block.end - index);
else if (length == max_depth || length == BITPACK_MAX_LENGTH /* from HandleRepeat */)
match.length = (U32)RMF_bitpackExtendMatch(block.data, tbl->table, index, block.end, link, length);
else
match.length = (U32)length;
match.dist = (U32)dist;
return match;
}
}
FORCE_INLINE_TEMPLATE
RMF_match RMF_getNextMatch(FL2_dataBlock block,
FL2_matchTable* tbl,
unsigned max_depth,
int structTbl,
size_t index)
{
if (structTbl)
{
U32 const link = GetMatchLink(tbl->table, index);
RMF_match match;
match.length = 0;
if (link == RADIX_NULL_LINK)
return match;
size_t const length = GetMatchLength(tbl->table, index);
size_t const dist = index - link - 1;
/* same distance, one byte shorter */
if (link - 1 == GetMatchLink(tbl->table, index - 1))
return match;
if (length > block.end - index)
match.length = (U32)(block.end - index);
else if (length == max_depth || length == STRUCTURED_MAX_LENGTH /* from HandleRepeat */)
match.length = (U32)RMF_structuredExtendMatch(block.data, tbl->table, index, block.end, link, length);
else
match.length = (U32)length;
match.dist = (U32)dist;
return match;
}
else {
U32 link = tbl->table[index];
RMF_match match;
match.length = 0;
if (link == RADIX_NULL_LINK)
return match;
size_t const length = link >> RADIX_LINK_BITS;
link &= RADIX_LINK_MASK;
size_t const dist = index - link - 1;
/* same distance, one byte shorter */
if (link - 1 == (tbl->table[index - 1] & RADIX_LINK_MASK))
return match;
if (length > block.end - index)
match.length = (U32)(block.end - index);
else if (length == max_depth || length == BITPACK_MAX_LENGTH /* from HandleRepeat */)
match.length = (U32)RMF_bitpackExtendMatch(block.data, tbl->table, index, block.end, link, length);
else
match.length = (U32)length;
match.dist = (U32)dist;
return match;
}
}
#if defined (__cplusplus)
}
#endif
#endif /* FL2_RADIX_GET_H_ */

View File

@@ -14,6 +14,10 @@
#include "atomic.h"
#include "radix_mf.h"
#if defined(FL2_XZ_BUILD) && defined(TUKLIB_FAST_UNALIGNED_ACCESS)
# define MEM_read32(a) (*(const U32*)(a))
#endif
#if defined (__cplusplus)
extern "C" {
#endif
@@ -21,26 +25,27 @@ extern "C" {
#define DICTIONARY_LOG_MIN 12U
#define DICTIONARY_LOG_MAX_64 30U
#define DICTIONARY_LOG_MAX_32 27U
#define DEFAULT_BUFFER_LOG 8U
#define DEFAULT_BLOCK_OVERLAP 2U
#define DEFAULT_SEARCH_DEPTH 32U
#define DEFAULT_DIVIDEANDCONQUER 1
#define MAX_REPEAT 32
#define RADIX16_TABLE_SIZE (1UL << 16)
#define RADIX8_TABLE_SIZE (1UL << 8)
#define DICTIONARY_SIZE_MIN ((size_t)1 << DICTIONARY_LOG_MIN)
#define DICTIONARY_SIZE_MAX_64 ((size_t)1 << DICTIONARY_LOG_MAX_64)
#define DICTIONARY_SIZE_MAX_32 ((size_t)1 << DICTIONARY_LOG_MAX_32)
#define MAX_REPEAT 24
#define RADIX16_TABLE_SIZE ((size_t)1 << 16)
#define RADIX8_TABLE_SIZE ((size_t)1 << 8)
#define STACK_SIZE (RADIX16_TABLE_SIZE * 3)
#define MAX_BRUTE_FORCE_LIST_SIZE 5
#define BUFFER_LINK_MASK 0xFFFFFFU
#define MATCH_BUFFER_OVERLAP 6
#define BITPACK_MAX_LENGTH 63UL
#define STRUCTURED_MAX_LENGTH 255UL
#define BITPACK_MAX_LENGTH 63U
#define STRUCTURED_MAX_LENGTH 255U
#define RADIX_LINK_BITS 26
#define RADIX_LINK_MASK ((1UL << RADIX_LINK_BITS) - 1)
#define RADIX_NULL_LINK 0xFFFFFFFFUL
#define RADIX_LINK_MASK ((1U << RADIX_LINK_BITS) - 1)
#define RADIX_NULL_LINK 0xFFFFFFFFU
#define UNIT_BITS 2
#define UNIT_MASK ((1UL << UNIT_BITS) - 1)
#define UNIT_MASK ((1U << UNIT_BITS) - 1)
#define RADIX_CANCEL_INDEX (long)(RADIX16_TABLE_SIZE + FL2_MAXTHREADS + 2)
typedef struct
{
@@ -88,9 +93,10 @@ struct FL2_matchTable_s
{
FL2_atomic st_index;
long end_index;
int isStruct;
int allocStruct;
int is_struct;
int alloc_struct;
unsigned thread_count;
size_t progress;
RMF_parameters params;
RMF_builder** builders;
U32 stack[RADIX16_TABLE_SIZE];
@@ -98,27 +104,25 @@ struct FL2_matchTable_s
U32 table[1];
};
size_t RMF_bitpackInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const start, size_t const end);
size_t RMF_structuredInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const start, size_t const end);
int RMF_bitpackBuildTable(struct FL2_matchTable_s* const tbl,
size_t RMF_bitpackInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const end);
size_t RMF_structuredInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const end);
void RMF_bitpackBuildTable(struct FL2_matchTable_s* const tbl,
size_t const job,
unsigned const multi_thread,
FL2_dataBlock const block,
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done);
int RMF_structuredBuildTable(struct FL2_matchTable_s* const tbl,
FL2_dataBlock const block);
void RMF_structuredBuildTable(struct FL2_matchTable_s* const tbl,
size_t const job,
unsigned const multi_thread,
FL2_dataBlock const block,
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done);
FL2_dataBlock const block);
void RMF_recurseListChunk(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_start,
BYTE const depth,
BYTE const max_depth,
U32 const depth,
U32 const max_depth,
U32 const list_count,
size_t const stack_base);
int RMF_bitpackIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth);
int RMF_structuredIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth);
int RMF_bitpackIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned max_depth);
int RMF_structuredIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned max_depth);
void RMF_bitpackLimitLengths(struct FL2_matchTable_s* const tbl, size_t const index);
void RMF_structuredLimitLengths(struct FL2_matchTable_s* const tbl, size_t const index);
BYTE* RMF_bitpackAsOutputBuffer(struct FL2_matchTable_s* const tbl, size_t const index);

View File

@@ -11,21 +11,20 @@
#include <stddef.h> /* size_t, ptrdiff_t */
#include <stdlib.h> /* malloc, free */
#include "fast-lzma2.h"
#include "fl2_errors.h"
#include "mem.h" /* U32, U64, MEM_64bits */
#include "fl2_internal.h"
#include "radix_internal.h"
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" /* warning: 'rpt_head_next' may be used uninitialized in this function */
# pragma GCC diagnostic ignored "-Wmaybe-uninitialized" /* warning: 'rpt_head_next' may be used uninitialized in this function */
#elif defined(_MSC_VER)
# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable */
# pragma warning(disable : 4701) /* warning: 'rpt_head_next' may be used uninitialized in this function */
#endif
#define MIN_MATCH_BUFFER_SIZE 256U /* min buffer size at least FL2_SEARCH_DEPTH_MAX + 2 for bounded build */
#define MAX_MATCH_BUFFER_SIZE (1UL << 24) /* max buffer size constrained by 24-bit link values */
#define REPEAT_CHECK_TABLE ((1 << 1) | (1 << 2) | (1 << 4) | (1 << 8) | (1 << 16) | (1ULL << 32))
static void RMF_initTailTable(RMF_builder* const tbl)
{
for (size_t i = 0; i < RADIX8_TABLE_SIZE; i += 2) {
@@ -43,89 +42,107 @@ static RMF_builder* RMF_createBuilder(size_t match_buffer_size)
match_buffer_size = MIN(match_buffer_size, MAX_MATCH_BUFFER_SIZE);
match_buffer_size = MAX(match_buffer_size, MIN_MATCH_BUFFER_SIZE);
{ RMF_builder* const builder = (RMF_builder*)malloc(
RMF_builder* const builder = malloc(
sizeof(RMF_builder) + (match_buffer_size - 1) * sizeof(RMF_buildMatch));
if (builder == NULL)
return NULL;
builder->match_buffer_size = match_buffer_size;
builder->match_buffer_limit = match_buffer_size;
RMF_initTailTable(builder);
return builder;
}
}
static void RMF_freeBuilderTable(RMF_builder** const builders, unsigned const size)
{
if (builders == NULL)
return;
for (unsigned i = 0; i < size; ++i) {
for (unsigned i = 0; i < size; ++i)
free(builders[i]);
}
free(builders);
}
static RMF_builder** RMF_createBuilderTable(U32* const matchTable, size_t const match_buffer_size, unsigned const max_len, unsigned const size)
/* RMF_createBuilderTable() :
* Create one match table builder object per thread.
* max_len : maximum match length supported by the table structure
* size : number of threads
*/
static RMF_builder** RMF_createBuilderTable(U32* const match_table, size_t const match_buffer_size, unsigned const max_len, unsigned const size)
{
RMF_builder** builders = (RMF_builder**)malloc(size * sizeof(RMF_builder*));
DEBUGLOG(3, "RMF_createBuilderTable : match_buffer_size %u, builders %u", (U32)match_buffer_size, size);
RMF_builder** const builders = malloc(size * sizeof(RMF_builder*));
if (builders == NULL)
return NULL;
for (unsigned i = 0; i < size; ++i)
builders[i] = NULL;
for (unsigned i = 0; i < size; ++i) {
builders[i] = RMF_createBuilder(match_buffer_size);
if (builders[i] == NULL) {
RMF_freeBuilderTable(builders, i);
return NULL;
}
builders[i]->table = matchTable;
builders[i]->table = match_table;
builders[i]->max_len = max_len;
}
return builders;
}
static int RMF_isStruct(unsigned dictionary_log, unsigned depth)
static int RMF_isStruct(size_t const dictionary_size)
{
return dictionary_log > RADIX_LINK_BITS || depth > BITPACK_MAX_LENGTH;
return dictionary_size > ((size_t)1 << RADIX_LINK_BITS);
}
static int RMF_isStructParam(const RMF_parameters* const params)
{
return RMF_isStruct(params->dictionary_log, params->depth);
}
/** RMF_clampCParams() :
* make CParam values within valid range.
* @return : valid CParams */
/* RMF_clampParams() :
* Make param values within valid range.
* Return : valid RMF_parameters */
static RMF_parameters RMF_clampParams(RMF_parameters params)
{
# define CLAMP(val,min,max) { \
if (val<(min)) val=(min); \
else if (val>(max)) val=(max); \
}
CLAMP(params.dictionary_log, DICTIONARY_LOG_MIN, MEM_64bits() ? DICTIONARY_LOG_MAX_64 : DICTIONARY_LOG_MAX_32);
CLAMP(params.match_buffer_log, FL2_BUFFER_SIZE_LOG_MIN, FL2_BUFFER_SIZE_LOG_MAX);
CLAMP(params.overlap_fraction, FL2_BLOCK_OVERLAP_MIN, FL2_BLOCK_OVERLAP_MAX);
CLAMP(params.dictionary_size, DICTIONARY_SIZE_MIN, MEM_64bits() ? DICTIONARY_SIZE_MAX_64 : DICTIONARY_SIZE_MAX_32);
CLAMP(params.match_buffer_log, RMF_BUFFER_LOG_MIN, RMF_BUFFER_LOG_MAX);
if (params.overlap_fraction > FL2_BLOCK_OVERLAP_MAX)
params.overlap_fraction = FL2_BLOCK_OVERLAP_MAX;
CLAMP(params.depth, FL2_SEARCH_DEPTH_MIN, FL2_SEARCH_DEPTH_MAX);
return params;
# undef CLAMP
}
/* RMF_applyParameters_internal() :
* Set parameters to those specified.
* Create a builder table if none exists. Free an existing one if incompatible.
* Set match_buffer_limit and max supported match length.
* Returns an error if dictionary won't fit.
*/
static size_t RMF_applyParameters_internal(FL2_matchTable* const tbl, const RMF_parameters* const params)
{
int const isStruct = RMF_isStructParam(params);
unsigned const dictionary_log = tbl->params.dictionary_log;
int const is_struct = RMF_isStruct(params->dictionary_size);
size_t const dictionary_size = tbl->params.dictionary_size;
/* dictionary is allocated with the struct and is immutable */
if (params->dictionary_log > tbl->params.dictionary_log
|| (params->dictionary_log == tbl->params.dictionary_log && isStruct > tbl->allocStruct))
if (params->dictionary_size > tbl->params.dictionary_size
|| (params->dictionary_size == tbl->params.dictionary_size && is_struct > tbl->alloc_struct))
return FL2_ERROR(parameter_unsupported);
{ size_t const match_buffer_size = (size_t)1 << (params->dictionary_log - params->match_buffer_log);
size_t const match_buffer_size = params->dictionary_size >> params->match_buffer_log;
tbl->params = *params;
tbl->params.dictionary_log = dictionary_log;
tbl->isStruct = isStruct;
tbl->params.dictionary_size = dictionary_size;
tbl->is_struct = is_struct;
if (tbl->builders == NULL
|| match_buffer_size > tbl->builders[0]->match_buffer_size)
{
RMF_freeBuilderTable(tbl->builders, tbl->thread_count);
tbl->builders = RMF_createBuilderTable(tbl->table, match_buffer_size, tbl->isStruct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH, tbl->thread_count);
tbl->builders = RMF_createBuilderTable(tbl->table, match_buffer_size, tbl->is_struct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH, tbl->thread_count);
if (tbl->builders == NULL) {
return FL2_ERROR(memory_allocation);
}
@@ -133,56 +150,67 @@ static size_t RMF_applyParameters_internal(FL2_matchTable* const tbl, const RMF_
else {
for (unsigned i = 0; i < tbl->thread_count; ++i) {
tbl->builders[i]->match_buffer_limit = match_buffer_size;
tbl->builders[i]->max_len = tbl->isStruct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH;
}
tbl->builders[i]->max_len = tbl->is_struct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH;
}
}
return 0;
}
/* RMF_reduceDict() :
* Reduce dictionary and match buffer size if the total input size is known and < dictionary_size.
*/
static void RMF_reduceDict(RMF_parameters* const params, size_t const dict_reduce)
{
if (dict_reduce)
while (params->dictionary_log > DICTIONARY_LOG_MIN && (size_t)1 << (params->dictionary_log - 1) >= dict_reduce) {
--params->dictionary_log;
params->match_buffer_log = MAX(params->match_buffer_log - 1, FL2_BUFFER_SIZE_LOG_MIN);
if (dict_reduce) {
for (size_t dict_size = params->dictionary_size; dict_size > DICTIONARY_SIZE_MIN && (dict_size >> 1) >= dict_reduce; dict_size >>= 1) {
/* Use unchanged match buffer size for reduced dict */
params->match_buffer_log = MAX(params->match_buffer_log - 1, RMF_BUFFER_LOG_MIN);
}
params->dictionary_size = MIN(params->dictionary_size, MAX(dict_reduce, DICTIONARY_SIZE_MIN));
}
}
FL2_matchTable* RMF_createMatchTable(const RMF_parameters* const p, size_t const dict_reduce, unsigned const thread_count)
static void RMF_initListHeads(FL2_matchTable* const tbl)
{
int isStruct;
size_t dictionary_size;
size_t table_bytes;
FL2_matchTable* tbl;
RMF_parameters params = RMF_clampParams(*p);
RMF_reduceDict(&params, dict_reduce);
isStruct = RMF_isStructParam(&params);
dictionary_size = (size_t)1 << params.dictionary_log;
DEBUGLOG(3, "RMF_createMatchTable : isStruct %d, dict %u", isStruct, (U32)dictionary_size);
table_bytes = isStruct ? ((dictionary_size + 3U) / 4U) * sizeof(RMF_unit)
: dictionary_size * sizeof(U32);
tbl = (FL2_matchTable*)malloc(
sizeof(FL2_matchTable) + table_bytes - sizeof(U32));
if (!tbl) return NULL;
tbl->isStruct = isStruct;
tbl->allocStruct = isStruct;
tbl->thread_count = thread_count + !thread_count;
tbl->params = params;
tbl->builders = NULL;
RMF_applyParameters_internal(tbl, &params);
for (size_t i = 0; i < RADIX16_TABLE_SIZE; i += 2) {
tbl->list_heads[i].head = RADIX_NULL_LINK;
tbl->list_heads[i].count = 0;
tbl->list_heads[i + 1].head = RADIX_NULL_LINK;
tbl->list_heads[i + 1].count = 0;
}
}
/* RMF_createMatchTable() :
* Create a match table. Reduce the dict size to input size if possible.
* A thread_count of 0 will be raised to 1.
*/
FL2_matchTable* RMF_createMatchTable(const RMF_parameters* const p, size_t const dict_reduce, unsigned const thread_count)
{
RMF_parameters params = RMF_clampParams(*p);
RMF_reduceDict(&params, dict_reduce);
int const is_struct = RMF_isStruct(params.dictionary_size);
size_t dictionary_size = params.dictionary_size;
DEBUGLOG(3, "RMF_createMatchTable : is_struct %d, dict %u", is_struct, (U32)dictionary_size);
size_t const table_bytes = is_struct ? ((dictionary_size + 3U) / 4U) * sizeof(RMF_unit)
: dictionary_size * sizeof(U32);
FL2_matchTable* const tbl = malloc(sizeof(FL2_matchTable) + table_bytes - sizeof(U32));
if (!tbl) return NULL;
tbl->is_struct = is_struct;
tbl->alloc_struct = is_struct;
tbl->thread_count = thread_count + !thread_count;
tbl->params = params;
tbl->builders = NULL;
RMF_applyParameters_internal(tbl, &params);
RMF_initListHeads(tbl);
RMF_initProgress(tbl);
return tbl;
}
@@ -190,7 +218,9 @@ void RMF_freeMatchTable(FL2_matchTable* const tbl)
{
if (tbl == NULL)
return;
DEBUGLOG(3, "RMF_freeMatchTable");
RMF_freeBuilderTable(tbl->builders, tbl->thread_count);
free(tbl);
}
@@ -199,8 +229,8 @@ BYTE RMF_compatibleParameters(const FL2_matchTable* const tbl, const RMF_paramet
{
RMF_parameters params = RMF_clampParams(*p);
RMF_reduceDict(&params, dict_reduce);
return tbl->params.dictionary_log > params.dictionary_log
|| (tbl->params.dictionary_log == params.dictionary_log && tbl->allocStruct >= RMF_isStructParam(&params));
return tbl->params.dictionary_size > params.dictionary_size
|| (tbl->params.dictionary_size == params.dictionary_size && tbl->alloc_struct >= RMF_isStruct(params.dictionary_size));
}
size_t RMF_applyParameters(FL2_matchTable* const tbl, const RMF_parameters* const p, size_t const dict_reduce)
@@ -215,18 +245,25 @@ size_t RMF_threadCount(const FL2_matchTable* const tbl)
return tbl->thread_count;
}
size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end)
void RMF_initProgress(FL2_matchTable * const tbl)
{
DEBUGLOG(5, "RMF_initTable : start %u, size %u", (U32)start, (U32)end);
if (tbl->isStruct) {
return RMF_structuredInit(tbl, data, start, end);
}
else {
return RMF_bitpackInit(tbl, data, start, end);
}
if (tbl != NULL)
tbl->progress = 0;
}
static void HandleRepeat(RMF_buildMatch* const match_buffer,
size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const end)
{
DEBUGLOG(5, "RMF_initTable : size %u", (U32)end);
tbl->st_index = ATOMIC_INITIAL_VALUE;
if (tbl->is_struct)
return RMF_structuredInit(tbl, data, end);
else
return RMF_bitpackInit(tbl, data, end);
}
static void RMF_handleRepeat(RMF_buildMatch* const match_buffer,
const BYTE* const data_block,
size_t const next,
U32 count,
@@ -235,20 +272,22 @@ static void HandleRepeat(RMF_buildMatch* const match_buffer,
U32 const max_len)
{
size_t index = next;
size_t next_i;
U32 length = depth + rpt_len;
const BYTE* const data = data_block + match_buffer[index].from;
const BYTE* const data_2 = data - rpt_len;
while (data[length] == data_2[length] && length < max_len)
++length;
for (; length <= max_len && count; --count) {
next_i = match_buffer[index].next & 0xFFFFFF;
size_t next_i = match_buffer[index].next & 0xFFFFFF;
match_buffer[index].next = (U32)next_i | (length << 24);
length += rpt_len;
index = next_i;
}
for (; count; --count) {
next_i = match_buffer[index].next & 0xFFFFFF;
size_t next_i = match_buffer[index].next & 0xFFFFFF;
match_buffer[index].next = (U32)next_i | (max_len << 24);
index = next_i;
}
@@ -261,27 +300,29 @@ typedef struct
union src_data_u src;
} BruteForceMatch;
static void BruteForceBuffered(RMF_builder* const tbl,
static void RMF_bruteForceBuffered(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_start,
size_t index,
size_t list_count,
size_t const list_count,
size_t const slot,
size_t const depth,
size_t const max_depth)
{
BruteForceMatch buffer[MAX_BRUTE_FORCE_LIST_SIZE + 1];
const BYTE* data_src = data_block + depth;
size_t limit = max_depth - depth;
const BYTE* start = data_src + block_start;
const BYTE* const data_src = data_block + depth;
size_t const limit = max_depth - depth;
const BYTE* const start = data_src + block_start;
size_t i = 0;
for (;;) {
/* Load all locations from the match buffer */
buffer[i].index = index;
buffer[i].data_src = data_src + tbl->match_buffer[index].from;
buffer[i].src.u32 = tbl->match_buffer[index].src.u32;
if (++i >= list_count) {
if (++i >= list_count)
break;
}
index = tbl->match_buffer[index].next & 0xFFFFFF;
}
i = 0;
@@ -289,28 +330,29 @@ static void BruteForceBuffered(RMF_builder* const tbl,
size_t longest = 0;
size_t j = i + 1;
size_t longest_index = j;
const BYTE* data = buffer[i].data_src;
const BYTE* const data = buffer[i].data_src;
do {
/* Begin with the remaining chars pulled from the match buffer */
size_t len_test = slot;
while (len_test < 4 && buffer[i].src.chars[len_test] == buffer[j].src.chars[len_test] && len_test - slot < limit) {
while (len_test < 4 && buffer[i].src.chars[len_test] == buffer[j].src.chars[len_test] && len_test - slot < limit)
++len_test;
}
len_test -= slot;
if (len_test) {
/* Complete the match length count in the raw input buffer */
const BYTE* data_2 = buffer[j].data_src;
while (data[len_test] == data_2[len_test] && len_test < limit) {
while (data[len_test] == data_2[len_test] && len_test < limit)
++len_test;
}
}
if (len_test > longest) {
longest_index = j;
longest = len_test;
if (len_test >= limit) {
if (len_test >= limit)
break;
}
}
} while (++j < list_count);
if (longest > 0) {
/* If the existing match was extended, store the new link and length info in the match buffer */
index = buffer[i].index;
tbl->match_buffer[index].next = (U32)(buffer[longest_index].index | ((depth + longest) << 24));
}
@@ -318,17 +360,19 @@ static void BruteForceBuffered(RMF_builder* const tbl,
} while (i < list_count - 1 && buffer[i].data_src >= start);
}
/* Lengthen and divide buffered chains into smaller chains, save them on a stack and process in turn.
* The match finder spends most of its time here.
*/
FORCE_INLINE_TEMPLATE
void RMF_recurseListChunk_generic(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_start,
BYTE depth,
BYTE const max_depth,
U32 depth,
U32 const max_depth,
U32 list_count,
size_t const stack_base)
{
/* Create an offset data buffer pointer for reading the next bytes */
const BYTE base_depth = depth;
U32 const base_depth = depth;
size_t st_index = stack_base;
size_t index = 0;
++depth;
@@ -338,10 +382,11 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
size_t const radix_8 = tbl->match_buffer[index].src.chars[0];
/* Seen this char before? */
U32 const prev = tbl->tails_8[radix_8].prev_index;
tbl->tails_8[radix_8].prev_index = (U32)index;
if (prev != RADIX_NULL_LINK) {
++tbl->tails_8[radix_8].list_count;
/* Link the previous occurrence to this one and record the new length */
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
else {
tbl->tails_8[radix_8].list_count = 1;
@@ -351,7 +396,6 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
tbl->stack[st_index].count = (U32)radix_8;
++st_index;
}
tbl->tails_8[radix_8].prev_index = (U32)index;
++index;
} while (index < list_count);
@@ -361,7 +405,7 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
U32 const prev = tbl->tails_8[radix_8].prev_index;
if (prev != RADIX_NULL_LINK) {
++tbl->tails_8[radix_8].list_count;
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
}
/* Convert radix values on the stack to counts and reset any used tail slots */
@@ -370,11 +414,6 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
tbl->stack[j].count = (U32)tbl->tails_8[tbl->stack[j].count].list_count;
}
while (st_index > stack_base) {
const BYTE* data_src;
size_t link;
size_t slot;
U32 test;
/* Pop an item off the stack */
--st_index;
list_count = tbl->stack[st_index].count;
@@ -383,7 +422,7 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
continue;
}
index = tbl->stack[st_index].head;
link = tbl->match_buffer[index].from;
size_t link = tbl->match_buffer[index].from;
if (link < block_start) {
/* Chain starts in the overlap region which is already encoded */
continue;
@@ -396,10 +435,11 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
continue;
}
depth = tbl->match_buffer[index].next >> 24;
slot = (depth - base_depth) & 3;
/* Index into the 4-byte pre-loaded input char cache */
size_t slot = (depth - base_depth) & 3;
if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) {
/* Quicker to use brute force, each string compared with all previous strings */
BruteForceBuffered(tbl,
RMF_bruteForceBuffered(tbl,
data_block,
block_start,
index,
@@ -409,35 +449,41 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
max_depth);
continue;
}
/* check for repeats at depth 4,8,16,32 etc */
test = max_depth != 6 && ((depth & 3) == 0) && ((REPEAT_CHECK_TABLE >> ((depth >> 2) & 31)) & 1) && (max_depth >= depth + (depth >> 1));
/* check for repeats at depth 4,8,16,32 etc unless depth is near max_depth */
U32 const test = max_depth != 6 && ((depth & 3) == 0)
&& (depth & (depth - 1)) == 0
&& (max_depth >= depth + (depth >> 1));
++depth;
/* Update the offset data buffer pointer */
data_src = data_block + depth;
/* Create an offset data buffer pointer for reading the next bytes */
const BYTE* const data_src = data_block + depth;
/* Last pass is done separately */
if (!test && depth < max_depth) {
size_t const prev_st_index = st_index;
/* Last element done separately */
--list_count;
/* slot is the char cache index. If 3 then chars need to be loaded. */
/* If slot is 3 then chars need to be loaded. */
if (slot == 3 && max_depth != 6) do {
size_t const radix_8 = tbl->match_buffer[index].src.chars[3];
size_t const next_index = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
/* Pre-load the next link and data bytes to avoid waiting for RAM access */
/* Pre-load the next link and data bytes. On some hardware execution can continue
* ahead while the data is retrieved if no operations except move are done on the data. */
tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link);
size_t const next_link = tbl->match_buffer[next_index].from;
U32 const prev = tbl->tails_8[radix_8].prev_index;
if (prev!=RADIX_NULL_LINK) {
tbl->tails_8[radix_8].prev_index = (U32)index;
if (prev != RADIX_NULL_LINK) {
/* This char has occurred before in the chain. Link the previous (> index) occurance with this */
++tbl->tails_8[radix_8].list_count;
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
else {
/* First occurrence in the chain */
tbl->tails_8[radix_8].list_count = 1;
tbl->stack[st_index].head = (U32)index;
/* Save the char as a reference to load the count at the end */
tbl->stack[st_index].count = (U32)radix_8;
++st_index;
}
tbl->tails_8[radix_8].prev_index = (U32)index;
index = next_index;
link = next_link;
} while (--list_count != 0);
@@ -447,9 +493,10 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
/* Pre-load the next link to avoid waiting for RAM access */
size_t const next_link = tbl->match_buffer[next_index].from;
U32 const prev = tbl->tails_8[radix_8].prev_index;
tbl->tails_8[radix_8].prev_index = (U32)index;
if (prev != RADIX_NULL_LINK) {
++tbl->tails_8[radix_8].list_count;
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
else {
tbl->tails_8[radix_8].list_count = 1;
@@ -457,20 +504,18 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
tbl->stack[st_index].count = (U32)radix_8;
++st_index;
}
tbl->tails_8[radix_8].prev_index = (U32)index;
index = next_index;
link = next_link;
} while (--list_count != 0);
{ size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
U32 const prev = tbl->tails_8[radix_8].prev_index;
if (prev != RADIX_NULL_LINK) {
if (slot == 3) {
if (slot == 3)
tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link);
}
++tbl->tails_8[radix_8].list_count;
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
}
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
for (size_t j = prev_st_index; j < st_index; ++j) {
tbl->tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK;
@@ -490,14 +535,15 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
size_t const next_index = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
size_t const next_link = tbl->match_buffer[next_index].from;
if ((link - next_link) > rpt_depth) {
if (rpt > 0) {
HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
}
if (rpt > 0)
RMF_handleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
rpt = -1;
U32 const prev = tbl->tails_8[radix_8].prev_index;
tbl->tails_8[radix_8].prev_index = (U32)index;
if (prev != RADIX_NULL_LINK) {
++tbl->tails_8[radix_8].list_count;
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
else {
tbl->tails_8[radix_8].list_count = 1;
@@ -505,23 +551,23 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
tbl->stack[st_index].count = (U32)radix_8;
++st_index;
}
tbl->tails_8[radix_8].prev_index = (U32)index;
index = next_index;
link = next_link;
}
else {
U32 const dist = (U32)(link - next_link);
if (rpt < 0 || dist != rpt_dist) {
if (rpt > 0) {
HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
}
if (rpt > 0)
RMF_handleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
rpt = 0;
rpt_head_next = next_index;
rpt_dist = dist;
U32 const prev = tbl->tails_8[radix_8].prev_index;
tbl->tails_8[radix_8].prev_index = (U32)index;
if (prev != RADIX_NULL_LINK) {
++tbl->tails_8[radix_8].list_count;
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
else {
tbl->tails_8[radix_8].list_count = 1;
@@ -529,7 +575,6 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
tbl->stack[st_index].count = (U32)radix_8;
++st_index;
}
tbl->tails_8[radix_8].prev_index = (U32)index;
}
else {
++rpt;
@@ -538,19 +583,18 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
link = next_link;
}
} while (--list_count != 0);
if (rpt > 0) {
HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
}
{ size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
if (rpt > 0)
RMF_handleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
U32 const prev = tbl->tails_8[radix_8].prev_index;
if (prev != RADIX_NULL_LINK) {
if (slot == 3) {
tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link);
}
++tbl->tails_8[radix_8].list_count;
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
}
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
for (size_t j = prev_st_index; j < st_index; ++j) {
tbl->tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK;
@@ -558,7 +602,7 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
}
}
else {
size_t prev_st_index = st_index;
size_t const prev_st_index = st_index;
/* The last pass at max_depth */
do {
size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
@@ -567,14 +611,14 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
/* The last element in tbl->match_buffer is circular so this is never an access violation. */
size_t const next_link = tbl->match_buffer[next_index].from;
U32 const prev = tbl->tails_8[radix_8].prev_index;
tbl->tails_8[radix_8].prev_index = (U32)index;
if (prev != RADIX_NULL_LINK) {
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
}
else {
tbl->stack[st_index].count = (U32)radix_8;
++st_index;
}
tbl->tails_8[radix_8].prev_index = (U32)index;
index = next_index;
link = next_link;
} while (--list_count != 0);
@@ -589,84 +633,81 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
void RMF_recurseListChunk(RMF_builder* const tbl,
const BYTE* const data_block,
size_t const block_start,
BYTE const depth,
BYTE const max_depth,
U32 const depth,
U32 const max_depth,
U32 const list_count,
size_t const stack_base)
{
if (max_depth > 6) {
if (list_count < 2)
return;
/* Template-like inline functions */
if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE)
RMF_bruteForceBuffered(tbl, data_block, block_start, 0, list_count, 0, depth, max_depth);
else if (max_depth > 6)
RMF_recurseListChunk_generic(tbl, data_block, block_start, depth, max_depth, list_count, stack_base);
}
else {
else
RMF_recurseListChunk_generic(tbl, data_block, block_start, depth, 6, list_count, stack_base);
}
}
/* Iterate the head table concurrently with other threads, and recurse each list until max_depth is reached */
int RMF_buildTable(FL2_matchTable* const tbl,
size_t const job,
unsigned const multi_thread,
FL2_dataBlock const block,
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done)
FL2_dataBlock const block)
{
DEBUGLOG(5, "RMF_buildTable : thread %u", (U32)job);
if (tbl->isStruct) {
return RMF_structuredBuildTable(tbl, job, multi_thread, block, progress, opaque, weight, init_done);
}
else {
return RMF_bitpackBuildTable(tbl, job, multi_thread, block, progress, opaque, weight, init_done);
if (tbl->is_struct)
RMF_structuredBuildTable(tbl, job, multi_thread, block);
else
RMF_bitpackBuildTable(tbl, job, multi_thread, block);
if (job == 0 && tbl->st_index >= RADIX_CANCEL_INDEX) {
RMF_initListHeads(tbl);
return 1;
}
return 0;
}
void RMF_cancelBuild(FL2_matchTable * const tbl)
{
if(tbl != NULL)
FL2_atomic_add(tbl->st_index, RADIX_CANCEL_INDEX - ATOMIC_INITIAL_VALUE);
}
void RMF_resetIncompleteBuild(FL2_matchTable * const tbl)
{
RMF_initListHeads(tbl);
}
int RMF_integrityCheck(const FL2_matchTable* const tbl, const BYTE* const data, size_t const index, size_t const end, unsigned const max_depth)
{
if (tbl->isStruct) {
if (tbl->is_struct)
return RMF_structuredIntegrityCheck(tbl, data, index, end, max_depth);
}
else {
else
return RMF_bitpackIntegrityCheck(tbl, data, index, end, max_depth);
}
}
size_t RMF_getMatch(FL2_matchTable* const tbl,
const BYTE* const data,
size_t const index,
size_t const limit,
unsigned max_depth,
size_t* const offset_ptr)
{
if (tbl->isStruct) {
return RMF_structuredGetMatch(tbl, data, index, limit, max_depth, offset_ptr);
}
else {
return RMF_bitpackGetMatch(tbl, data, index, limit, max_depth, offset_ptr);
}
}
void RMF_limitLengths(FL2_matchTable* const tbl, size_t const index)
{
if (tbl->isStruct) {
if (tbl->is_struct)
RMF_structuredLimitLengths(tbl, index);
}
else {
else
RMF_bitpackLimitLengths(tbl, index);
}
}
BYTE* RMF_getTableAsOutputBuffer(FL2_matchTable* const tbl, size_t const index)
{
if (tbl->isStruct) {
if (tbl->is_struct)
return RMF_structuredAsOutputBuffer(tbl, index);
}
else {
else
return RMF_bitpackAsOutputBuffer(tbl, index);
}
}
size_t RMF_memoryUsage(unsigned const dict_log, unsigned const buffer_log, unsigned const depth, unsigned thread_count)
size_t RMF_memoryUsage(size_t const dict_size, unsigned const buffer_log, unsigned const thread_count)
{
size_t size = (size_t)(4U + RMF_isStruct(dict_log, depth)) << dict_log;
U32 buf_size = (U32)1 << (dict_log - buffer_log);
size_t size = (size_t)(4U + RMF_isStruct(dict_size)) * dict_size;
size_t const buf_size = dict_size >> buffer_log;
size += ((buf_size - 1) * sizeof(RMF_buildMatch) + sizeof(RMF_builder)) * thread_count;
return size;
}

View File

@@ -20,16 +20,19 @@ extern "C" {
typedef struct FL2_matchTable_s FL2_matchTable;
#define OVERLAP_FROM_DICT_LOG(d, o) (((size_t)1 << ((d) - 4)) * (o))
#define OVERLAP_FROM_DICT_SIZE(d, o) (((d) >> 4) * (o))
#define RMF_MIN_BYTES_PER_THREAD 1024
#define RMF_BUFFER_LOG_BASE 12
#define RMF_BUFFER_LOG_MIN 6
#define RMF_BUFFER_LOG_MAX 12
typedef struct
{
unsigned dictionary_log;
size_t dictionary_size;
unsigned match_buffer_log;
unsigned overlap_fraction;
unsigned block_size_log;
unsigned divide_and_conquer;
unsigned depth;
#ifdef RMF_REFERENCE
@@ -42,16 +45,18 @@ void RMF_freeMatchTable(FL2_matchTable* const tbl);
BYTE RMF_compatibleParameters(const FL2_matchTable* const tbl, const RMF_parameters* const params, size_t const dict_reduce);
size_t RMF_applyParameters(FL2_matchTable* const tbl, const RMF_parameters* const params, size_t const dict_reduce);
size_t RMF_threadCount(const FL2_matchTable * const tbl);
size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end);
void RMF_initProgress(FL2_matchTable * const tbl);
size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const end);
int RMF_buildTable(FL2_matchTable* const tbl,
size_t const job,
unsigned const multi_thread,
FL2_dataBlock const block,
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done);
FL2_dataBlock const block);
void RMF_cancelBuild(FL2_matchTable* const tbl);
void RMF_resetIncompleteBuild(FL2_matchTable* const tbl);
int RMF_integrityCheck(const FL2_matchTable* const tbl, const BYTE* const data, size_t const index, size_t const end, unsigned const max_depth);
void RMF_limitLengths(FL2_matchTable* const tbl, size_t const index);
BYTE* RMF_getTableAsOutputBuffer(FL2_matchTable* const tbl, size_t const index);
size_t RMF_memoryUsage(unsigned const dict_log, unsigned const buffer_log, unsigned const depth, unsigned thread_count);
size_t RMF_memoryUsage(size_t const dict_size, unsigned const buffer_log, unsigned const thread_count);
#if defined (__cplusplus)
}

View File

@@ -9,7 +9,7 @@
*/
#include "mem.h" /* U32, U64 */
#include "fl2threading.h"
#include "fl2_threading.h"
#include "fl2_internal.h"
#include "radix_internal.h"
@@ -34,7 +34,7 @@ typedef struct FL2_matchTable_s FL2_matchTable;
#define SetMatchLength(index, link, length) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].lengths[(index) & UNIT_MASK] = (BYTE)(length)
#define SetMatchLinkAndLength(index, link, length) { size_t i_ = (index) >> UNIT_BITS, u_ = (index) & UNIT_MASK; ((RMF_unit*)tbl->table)[i_].links[u_] = (U32)(link); ((RMF_unit*)tbl->table)[i_].lengths[u_] = (BYTE)(length); }
#define SetMatchLinkAndLength(index, link, length) do { size_t i_ = (index) >> UNIT_BITS, u_ = (index) & UNIT_MASK; ((RMF_unit*)tbl->table)[i_].links[u_] = (U32)(link); ((RMF_unit*)tbl->table)[i_].lengths[u_] = (BYTE)(length); } while(0)
#define SetNull(index) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] = RADIX_NULL_LINK

View File

@@ -7,84 +7,194 @@
#include "fl2_internal.h"
#include "mem.h"
#include "platform.h"
#include "range_enc.h"
const unsigned price_table[kBitModelTotal >> kNumMoveReducingBits] = {
128, 103, 91, 84, 78, 73, 69, 66,
63, 61, 58, 56, 54, 52, 51, 49,
48, 46, 45, 44, 43, 42, 41, 40,
39, 38, 37, 36, 35, 34, 34, 33,
32, 31, 31, 30, 29, 29, 28, 28,
27, 26, 26, 25, 25, 24, 24, 23,
23, 22, 22, 22, 21, 21, 20, 20,
19, 19, 19, 18, 18, 17, 17, 17,
16, 16, 16, 15, 15, 15, 14, 14,
14, 13, 13, 13, 12, 12, 12, 11,
11, 11, 11, 10, 10, 10, 10, 9,
9, 9, 9, 8, 8, 8, 8, 7,
7, 7, 7, 6, 6, 6, 6, 5,
5, 5, 5, 5, 4, 4, 4, 4,
3, 3, 3, 3, 3, 2, 2, 2,
2, 2, 2, 1, 1, 1, 1, 1
};
/* The first and last elements of these tables are never used */
BYTE price_table[2][kPriceTableSize] = { {
0, 193, 182, 166, 154, 145, 137, 131,
125, 120, 115, 111, 107, 103, 100, 97,
94, 91, 89, 86, 84, 82, 80, 78,
76, 74, 72, 71, 69, 67, 66, 64,
63, 61, 60, 59, 57, 56, 55, 54,
53, 52, 50, 49, 48, 47, 46, 45,
44, 43, 42, 42, 41, 40, 39, 38,
37, 36, 36, 35, 34, 33, 33, 32,
31, 30, 30, 29, 28, 28, 27, 26,
26, 25, 25, 24, 23, 23, 22, 21,
21, 20, 20, 19, 19, 18, 18, 17,
17, 16, 16, 15, 15, 14, 14, 13,
13, 12, 12, 11, 11, 10, 10, 9,
9, 8, 8, 8, 7, 7, 6, 6,
5, 5, 5, 4, 4, 3, 3, 3,
2, 2, 2, 1, 1, 0, 0, 0
}, {
0, 0, 0, 1, 1, 2, 2, 2,
3, 3, 3, 4, 4, 5, 5, 5,
6, 6, 7, 7, 8, 8, 8, 9,
9, 10, 10, 11, 11, 12, 12, 13,
13, 13, 14, 14, 15, 15, 16, 17,
17, 18, 18, 19, 19, 20, 20, 21,
21, 22, 23, 23, 24, 24, 25, 26,
26, 27, 28, 28, 29, 30, 30, 31,
32, 33, 33, 34, 35, 36, 36, 37,
38, 39, 40, 41, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 53,
54, 55, 56, 57, 59, 60, 61, 63,
64, 66, 67, 69, 70, 72, 74, 76,
78, 80, 82, 84, 86, 89, 91, 94,
97, 100, 103, 107, 111, 115, 119, 125,
130, 137, 145, 154, 165, 181, 192, 0
} };
void SetOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size)
#if 0
#include <stdio.h>
/* Generates price_table */
void RC_printPriceTable()
{
static const unsigned test_size = 0x4000;
const unsigned test_div = test_size >> 8;
BYTE buf[0x3062];
unsigned table0[kPriceTableSize];
unsigned table1[kPriceTableSize];
unsigned count[kPriceTableSize];
memset(table0, 0, sizeof(table0));
memset(table1, 0, sizeof(table1));
memset(count, 0, sizeof(count));
for (Probability i = 31; i <= kBitModelTotal - 31; ++i) {
RangeEncoder rc;
RC_reset(&rc);
RC_setOutputBuffer(&rc, buf, sizeof(buf));
for (unsigned j = 0; j < test_size; ++j) {
Probability prob = i;
RC_encodeBit0(&rc, &prob);
}
RC_flush(&rc);
table0[i >> kNumMoveReducingBits] += (unsigned)rc.out_index - 5;
RC_reset(&rc);
RC_setOutputBuffer(&rc, buf, sizeof(buf));
for (unsigned j = 0; j < test_size; ++j) {
Probability prob = i;
RC_encodeBit1(&rc, &prob);
}
RC_flush(&rc);
table1[i >> kNumMoveReducingBits] += (unsigned)rc.out_index - 5;
++count[i >> kNumMoveReducingBits];
}
for (int i = 0; i < kPriceTableSize; ++i) if (count[i]) {
table0[i] = (table0[i] / count[i]) / test_div;
table1[i] = (table1[i] / count[i]) / test_div;
}
fputs("const BYTE price_table[2][kPriceTableSize] = {\r\n", stdout);
for (int i = 0; i < kPriceTableSize;) {
for (int j = 0; j < 8; ++j, ++i)
printf("%4d,", table0[i]);
fputs("\r\n", stdout);
}
fputs("}, {\r\n", stdout);
for (int i = 0; i < kPriceTableSize;) {
for (int j = 0; j < 8; ++j, ++i)
printf("%4d,", table1[i]);
fputs("\r\n", stdout);
}
fputs("} };\r\n", stdout);
}
#endif
void RC_setOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size)
{
rc->out_buffer = out_buffer;
rc->chunk_size = chunk_size;
rc->out_index = 0;
}
void RangeEncReset(RangeEncoder* const rc)
void RC_reset(RangeEncoder* const rc)
{
rc->low = 0;
rc->range = (U32)-1;
rc->cache_size = 1;
rc->cache_size = 0;
rc->cache = 0;
}
void ShiftLow(RangeEncoder* const rc)
#ifdef __64BIT__
void FORCE_NOINLINE RC_shiftLow(RangeEncoder* const rc)
{
if (rc->low < 0xFF000000 || rc->low > 0xFFFFFFFF)
{
BYTE temp = rc->cache;
U64 low = rc->low;
rc->low = (U32)(low << 8);
if (low < 0xFF000000 || low > 0xFFFFFFFF) {
BYTE high = (BYTE)(low >> 32);
rc->out_buffer[rc->out_index++] = rc->cache + high;
rc->cache = (BYTE)(low >> 24);
if (rc->cache_size != 0) {
high += 0xFF;
do {
assert (rc->out_index < rc->chunk_size - 4096);
rc->out_buffer[rc->out_index++] = temp + (BYTE)(rc->low >> 32);
temp = 0xFF;
rc->out_buffer[rc->out_index++] = high;
} while (--rc->cache_size != 0);
rc->cache = (BYTE)(rc->low >> 24);
}
++rc->cache_size;
rc->low = (rc->low << 8) & 0xFFFFFFFF;
}
else {
rc->cache_size++;
}
}
void EncodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol)
#else
void FORCE_NOINLINE RC_shiftLow(RangeEncoder* const rc)
{
size_t tree_index = 1;
assert(bit_count > 0);
U32 low = (U32)rc->low;
unsigned high = (unsigned)(rc->low >> 32);
rc->low = low << 8;
if (low < (U32)0xFF000000 || high != 0) {
rc->out_buffer[rc->out_index++] = rc->cache + (BYTE)high;
rc->cache = (BYTE)(low >> 24);
if (rc->cache_size != 0) {
high += 0xFF;
do {
rc->out_buffer[rc->out_index++] = (BYTE)high;
} while (--rc->cache_size != 0);
}
}
else {
rc->cache_size++;
}
}
#endif
void RC_encodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol)
{
assert(bit_count > 1);
--bit_count;
unsigned bit = symbol >> bit_count;
RC_encodeBit(rc, &probs[1], bit);
size_t tree_index = 1;
do {
unsigned bit;
--bit_count;
bit = (symbol >> bit_count) & 1;
EncodeBit(rc, &probs[tree_index], bit);
tree_index = (tree_index << 1) | bit;
bit = (symbol >> bit_count) & 1;
RC_encodeBit(rc, &probs[tree_index], bit);
} while (bit_count != 0);
}
void EncodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol)
void RC_encodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol)
{
unsigned tree_index = 1;
assert(bit_count != 0);
do {
unsigned bit = symbol & 1;
EncodeBit(rc, &probs[tree_index], bit);
RC_encodeBit(rc, &probs[1], bit);
unsigned tree_index = 1;
while (--bit_count != 0) {
tree_index = (tree_index << 1) + bit;
symbol >>= 1;
} while (--bit_count != 0);
bit = symbol & 1;
RC_encodeBit(rc, &probs[tree_index], bit);
}
}
void EncodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count)
void FORCE_NOINLINE RC_encodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count)
{
assert(bit_count > 0);
do {
@@ -93,7 +203,7 @@ void EncodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count)
rc->low += rc->range & -((int)(value >> bit_count) & 1);
if (rc->range < kTopValue) {
rc->range <<= 8;
ShiftLow(rc);
RC_shiftLow(rc);
}
} while (bit_count != 0);
}

View File

@@ -28,9 +28,13 @@ typedef U16 Probability;
#define kNumMoveBits 5U
#define kProbInitValue (kBitModelTotal >> 1U)
#define kNumMoveReducingBits 4U
#define kNumBitPriceShiftBits 4U
#define kNumBitPriceShiftBits 5U
#define kPriceTableSize (kBitModelTotal >> kNumMoveReducingBits)
extern const unsigned price_table[kBitModelTotal >> kNumMoveReducingBits];
extern BYTE price_table[2][kPriceTableSize];
#if 0
void RC_printPriceTable();
#endif
typedef struct
{
@@ -43,22 +47,20 @@ typedef struct
BYTE cache;
} RangeEncoder;
void RangeEncReset(RangeEncoder* const rc);
void RC_reset(RangeEncoder* const rc);
void SetOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size);
void RC_setOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size);
void RangeEncReset(RangeEncoder* const rc);
void FORCE_NOINLINE RC_shiftLow(RangeEncoder* const rc);
void ShiftLow(RangeEncoder* const rc);
void RC_encodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol);
void EncodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol);
void RC_encodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol);
void EncodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol);
void EncodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count);
void FORCE_NOINLINE RC_encodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count);
HINT_INLINE
void EncodeBit0(RangeEncoder* const rc, Probability *const rprob)
void RC_encodeBit0(RangeEncoder* const rc, Probability *const rprob)
{
unsigned prob = *rprob;
rc->range = (rc->range >> kNumBitModelTotalBits) * prob;
@@ -66,12 +68,12 @@ void EncodeBit0(RangeEncoder* const rc, Probability *const rprob)
*rprob = (Probability)prob;
if (rc->range < kTopValue) {
rc->range <<= 8;
ShiftLow(rc);
RC_shiftLow(rc);
}
}
HINT_INLINE
void EncodeBit1(RangeEncoder* const rc, Probability *const rprob)
void RC_encodeBit1(RangeEncoder* const rc, Probability *const rprob)
{
unsigned prob = *rprob;
U32 new_bound = (rc->range >> kNumBitModelTotalBits) * prob;
@@ -81,16 +83,16 @@ void EncodeBit1(RangeEncoder* const rc, Probability *const rprob)
*rprob = (Probability)prob;
if (rc->range < kTopValue) {
rc->range <<= 8;
ShiftLow(rc);
RC_shiftLow(rc);
}
}
HINT_INLINE
void EncodeBit(RangeEncoder* const rc, Probability *const rprob, unsigned const bit)
void RC_encodeBit(RangeEncoder* const rc, Probability *const rprob, unsigned const bit)
{
unsigned prob = *rprob;
if (bit != 0) {
U32 new_bound = (rc->range >> kNumBitModelTotalBits) * prob;
U32 const new_bound = (rc->range >> kNumBitModelTotalBits) * prob;
rc->low += new_bound;
rc->range -= new_bound;
prob -= prob >> kNumMoveBits;
@@ -102,52 +104,56 @@ void EncodeBit(RangeEncoder* const rc, Probability *const rprob, unsigned const
*rprob = (Probability)prob;
if (rc->range < kTopValue) {
rc->range <<= 8;
ShiftLow(rc);
RC_shiftLow(rc);
}
}
#define GET_PRICE(rc, prob, symbol) \
price_table[((prob) ^ ((-(int)(symbol)) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
#define GET_PRICE(prob, symbol) \
price_table[symbol][(prob) >> kNumMoveReducingBits]
#define GET_PRICE_0(rc, prob) price_table[(prob) >> kNumMoveReducingBits]
#define GET_PRICE_0(prob) price_table[0][(prob) >> kNumMoveReducingBits]
#define GET_PRICE_1(rc, prob) price_table[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
#define GET_PRICE_1(prob) price_table[1][(prob) >> kNumMoveReducingBits]
#define kMinLitPrice 8U
HINT_INLINE
unsigned GetTreePrice(RangeEncoder* const rc, const Probability* const prob_table, unsigned const bit_count, size_t symbol)
unsigned RC_getTreePrice(const Probability* const prob_table, unsigned bit_count, size_t symbol)
{
unsigned price = 0;
symbol |= ((size_t)1 << bit_count);
while (symbol != 1) {
size_t next_symbol = symbol >> 1;
do {
size_t const next_symbol = symbol >> 1;
unsigned prob = prob_table[next_symbol];
unsigned bit = (unsigned)symbol & 1;
price += GET_PRICE(rc, prob, bit);
size_t bit = symbol & 1;
price += GET_PRICE(prob, bit);
symbol = next_symbol;
}
} while (symbol != 1);
return price;
}
HINT_INLINE
unsigned GetReverseTreePrice(RangeEncoder* const rc, const Probability* const prob_table, unsigned const bit_count, size_t symbol)
unsigned RC_getReverseTreePrice(const Probability* const prob_table, unsigned bit_count, size_t symbol)
{
unsigned price = 0;
unsigned prob = prob_table[1];
size_t bit = symbol & 1;
unsigned price = GET_PRICE(prob, bit);
size_t m = 1;
for (unsigned i = bit_count; i != 0; --i) {
unsigned prob = prob_table[m];
unsigned bit = symbol & 1;
symbol >>= 1;
price += GET_PRICE(rc, prob, bit);
while (--bit_count != 0) {
m = (m << 1) | bit;
symbol >>= 1;
prob = prob_table[m];
bit = symbol & 1;
price += GET_PRICE(prob, bit);
}
return price;
}
HINT_INLINE
void Flush(RangeEncoder* const rc)
void RC_flush(RangeEncoder* const rc)
{
for (int i = 0; i < 5; ++i)
ShiftLow(rc);
RC_shiftLow(rc);
}
#if defined (__cplusplus)

707
C/fast-lzma2/util.c Normal file
View File

@@ -0,0 +1,707 @@
/*
* Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#if defined (__cplusplus)
extern "C" {
#endif
/*-****************************************
* Dependencies
******************************************/
#include "util.h" /* note : ensure that platform.h is included first ! */
#include <errno.h>
#include <assert.h>
int UTIL_fileExist(const char* filename)
{
stat_t statbuf;
#if defined(_MSC_VER)
int const stat_error = _stat64(filename, &statbuf);
#else
int const stat_error = stat(filename, &statbuf);
#endif
return !stat_error;
}
int UTIL_isRegularFile(const char* infilename)
{
stat_t statbuf;
return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */
}
int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
{
int r;
#if defined(_MSC_VER)
r = _stat64(infilename, statbuf);
if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */
#else
r = stat(infilename, statbuf);
if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */
#endif
return 1;
}
int UTIL_setFileStat(const char *filename, stat_t *statbuf)
{
int res = 0;
struct utimbuf timebuf;
if (!UTIL_isRegularFile(filename))
return -1;
timebuf.actime = time(NULL);
timebuf.modtime = statbuf->st_mtime;
res += utime(filename, &timebuf); /* set access and modification times */
#if !defined(_WIN32)
res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */
#endif
res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */
errno = 0;
return -res; /* number of errors is returned */
}
U32 UTIL_isDirectory(const char* infilename)
{
int r;
stat_t statbuf;
#if defined(_MSC_VER)
r = _stat64(infilename, &statbuf);
if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
#else
r = stat(infilename, &statbuf);
if (!r && S_ISDIR(statbuf.st_mode)) return 1;
#endif
return 0;
}
U32 UTIL_isLink(const char* infilename)
{
/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
#ifndef __STRICT_ANSI__
#if defined(_BSD_SOURCE) \
|| (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE >= 500)) \
|| (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) \
|| (defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) \
|| (defined(__APPLE__) && defined(__MACH__)) \
|| defined(__OpenBSD__) \
|| defined(__FreeBSD__)
int r;
stat_t statbuf;
r = lstat(infilename, &statbuf);
if (!r && S_ISLNK(statbuf.st_mode)) return 1;
#endif
#endif
(void)infilename;
return 0;
}
U64 UTIL_getFileSize(const char* infilename)
{
if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN;
{ int r;
#if defined(_MSC_VER)
struct __stat64 statbuf;
r = _stat64(infilename, &statbuf);
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
#elif defined(__MINGW32__) && defined (__MSVCRT__)
struct _stati64 statbuf;
r = _stati64(infilename, &statbuf);
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
#else
struct stat statbuf;
r = stat(infilename, &statbuf);
if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN;
#endif
return (U64)statbuf.st_size;
}
}
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles)
{
U64 total = 0;
int error = 0;
unsigned n;
for (n=0; n<nbFiles; n++) {
U64 const size = UTIL_getFileSize(fileNamesTable[n]);
error |= (size == UTIL_FILESIZE_UNKNOWN);
total += size;
}
return error ? UTIL_FILESIZE_UNKNOWN : total;
}
#ifdef _WIN32
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
char* path;
int dirLength, fnameLength, pathLength, nbFiles = 0;
WIN32_FIND_DATAA cFile;
HANDLE hFile;
dirLength = (int)strlen(dirName);
path = (char*) malloc(dirLength + 3);
if (!path) return 0;
memcpy(path, dirName, dirLength);
path[dirLength] = '\\';
path[dirLength+1] = '*';
path[dirLength+2] = 0;
hFile=FindFirstFileA(path, &cFile);
if (hFile == INVALID_HANDLE_VALUE) {
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName);
return 0;
}
free(path);
do {
fnameLength = (int)strlen(cFile.cFileName);
path = (char*) malloc(dirLength + fnameLength + 2);
if (!path) { FindClose(hFile); return 0; }
memcpy(path, dirName, dirLength);
path[dirLength] = '\\';
memcpy(path+dirLength+1, cFile.cFileName, fnameLength);
pathLength = dirLength+1+fnameLength;
path[pathLength] = 0;
if (cFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
if ( strcmp (cFile.cFileName, "..") == 0
|| strcmp (cFile.cFileName, ".") == 0 )
continue;
/* Recursively call "UTIL_prepareFileList" with the new path. */
nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks);
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
} else if ( (cFile.dwFileAttributes & FILE_ATTRIBUTE_NORMAL)
|| (cFile.dwFileAttributes & FILE_ATTRIBUTE_ARCHIVE)
|| (cFile.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED) ) {
if (*bufStart + *pos + pathLength >= *bufEnd) {
ptrdiff_t const newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
*bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
*bufEnd = *bufStart + newListSize;
}
if (*bufStart + *pos + pathLength < *bufEnd) {
memcpy(*bufStart + *pos, path, pathLength+1 /* include final \0 */);
*pos += pathLength + 1;
nbFiles++;
}
}
free(path);
} while (FindNextFileA(hFile, &cFile));
FindClose(hFile);
return nbFiles;
}
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
DIR *dir;
struct dirent *entry;
char* path;
int dirLength, fnameLength, pathLength, nbFiles = 0;
if (!(dir = opendir(dirName))) {
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
return 0;
}
dirLength = (int)strlen(dirName);
errno = 0;
while ((entry = readdir(dir)) != NULL) {
if (strcmp (entry->d_name, "..") == 0 ||
strcmp (entry->d_name, ".") == 0) continue;
fnameLength = (int)strlen(entry->d_name);
path = (char*) malloc(dirLength + fnameLength + 2);
if (!path) { closedir(dir); return 0; }
memcpy(path, dirName, dirLength);
path[dirLength] = '/';
memcpy(path+dirLength+1, entry->d_name, fnameLength);
pathLength = dirLength+1+fnameLength;
path[pathLength] = 0;
if (!followLinks && UTIL_isLink(path)) {
UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path);
continue;
}
if (UTIL_isDirectory(path)) {
nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
} else {
if (*bufStart + *pos + pathLength >= *bufEnd) {
ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
*bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
*bufEnd = *bufStart + newListSize;
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
}
if (*bufStart + *pos + pathLength < *bufEnd) {
memcpy(*bufStart + *pos, path, pathLength + 1); /* with final \0 */
*pos += pathLength + 1;
nbFiles++;
}
}
free(path);
errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */
}
if (errno != 0) {
UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno));
free(*bufStart);
*bufStart = NULL;
}
closedir(dir);
return nbFiles;
}
#else
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
(void)bufStart; (void)bufEnd; (void)pos; (void)followLinks;
UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
return 0;
}
#endif /* #ifdef _WIN32 */
/*
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
*/
const char**
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
char** allocatedBuffer, unsigned* allocatedNamesNb,
int followLinks)
{
size_t pos;
unsigned i, nbFiles;
char* buf = (char*)malloc(LIST_SIZE_INCREASE);
char* bufend = buf + LIST_SIZE_INCREASE;
const char** fileTable;
if (!buf) return NULL;
for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
if (!UTIL_isDirectory(inputNames[i])) {
size_t const len = strlen(inputNames[i]);
if (buf + pos + len >= bufend) {
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
buf = (char*)UTIL_realloc(buf, newListSize);
bufend = buf + newListSize;
if (!buf) return NULL;
}
if (buf + pos + len < bufend) {
memcpy(buf+pos, inputNames[i], len+1); /* with final \0 */
pos += len + 1;
nbFiles++;
}
} else {
nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
if (buf == NULL) return NULL;
} }
if (nbFiles == 0) { free(buf); return NULL; }
fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*));
if (!fileTable) { free(buf); return NULL; }
for (i=0, pos=0; i<nbFiles; i++) {
fileTable[i] = buf + pos;
pos += strlen(fileTable[i]) + 1;
}
if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
*allocatedBuffer = buf;
*allocatedNamesNb = nbFiles;
return fileTable;
}
/*-****************************************
* Console log
******************************************/
int g_utilDisplayLevel;
/*-****************************************
* Time functions
******************************************/
#if defined(_WIN32) /* Windows */
UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static LARGE_INTEGER ticksPerSecond;
static int init = 0;
if (!init) {
if (!QueryPerformanceFrequency(&ticksPerSecond))
UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
init = 1;
}
return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
}
U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static LARGE_INTEGER ticksPerSecond;
static int init = 0;
if (!init) {
if (!QueryPerformanceFrequency(&ticksPerSecond))
UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
init = 1;
}
return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
}
#elif defined(__APPLE__) && defined(__MACH__)
UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static mach_timebase_info_data_t rate;
static int init = 0;
if (!init) {
mach_timebase_info(&rate);
init = 1;
}
return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL;
}
U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static mach_timebase_info_data_t rate;
static int init = 0;
if (!init) {
mach_timebase_info(&rate);
init = 1;
}
return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
}
#elif (PLATFORM_POSIX_VERSION >= 200112L) \
&& (defined(__UCLIBC__) \
|| (defined(__GLIBC__) \
&& ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) \
|| (__GLIBC__ > 2))))
UTIL_time_t UTIL_getTime(void)
{
UTIL_time_t time;
if (clock_gettime(CLOCK_MONOTONIC, &time))
UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n"); /* we could also exit() */
return time;
}
UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t diff;
if (end.tv_nsec < begin.tv_nsec) {
diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
} else {
diff.tv_sec = end.tv_sec - begin.tv_sec;
diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
}
return diff;
}
U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
U64 micro = 0;
micro += 1000000ULL * diff.tv_sec;
micro += diff.tv_nsec / 1000ULL;
return micro;
}
U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
U64 nano = 0;
nano += 1000000000ULL * diff.tv_sec;
nano += diff.tv_nsec;
return nano;
}
#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
UTIL_time_t UTIL_getTime(void) { return clock(); }
U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
#endif
/* returns time span in microseconds */
U64 UTIL_clockSpanMicro(UTIL_time_t clockStart )
{
UTIL_time_t const clockEnd = UTIL_getTime();
return UTIL_getSpanTimeMicro(clockStart, clockEnd);
}
/* returns time span in microseconds */
U64 UTIL_clockSpanNano(UTIL_time_t clockStart )
{
UTIL_time_t const clockEnd = UTIL_getTime();
return UTIL_getSpanTimeNano(clockStart, clockEnd);
}
void UTIL_waitForNextTick(void)
{
UTIL_time_t const clockStart = UTIL_getTime();
UTIL_time_t clockEnd;
do {
clockEnd = UTIL_getTime();
} while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
}
/* count the number of physical cores */
#if defined(_WIN32) || defined(WIN32)
#include <windows.h>
typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
int UTIL_countPhysicalCores(void)
{
static int numPhysicalCores = 0;
if (numPhysicalCores != 0) return numPhysicalCores;
{ LPFN_GLPI glpi;
BOOL done = FALSE;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL;
DWORD returnLength = 0;
size_t byteOffset = 0;
glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")),
"GetLogicalProcessorInformation");
if (glpi == NULL) {
goto failed;
}
while(!done) {
DWORD rc = glpi(buffer, &returnLength);
if (FALSE == rc) {
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
if (buffer)
free(buffer);
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
if (buffer == NULL) {
perror("zstd");
exit(1);
}
} else {
/* some other error */
goto failed;
}
} else {
done = TRUE;
}
}
ptr = buffer;
while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
if (ptr->Relationship == RelationProcessorCore) {
numPhysicalCores++;
}
ptr++;
byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
}
free(buffer);
return numPhysicalCores;
}
failed:
/* try to fall back on GetSystemInfo */
{ SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
numPhysicalCores = sysinfo.dwNumberOfProcessors;
if (numPhysicalCores == 0) numPhysicalCores = 1; /* just in case */
}
return numPhysicalCores;
}
#elif defined(__APPLE__)
#include <sys/sysctl.h>
/* Use apple-provided syscall
* see: man 3 sysctl */
int UTIL_countPhysicalCores(void)
{
static S32 numPhysicalCores = 0; /* apple specifies int32_t */
if (numPhysicalCores != 0) return numPhysicalCores;
{ size_t size = sizeof(S32);
int const ret = sysctlbyname("hw.physicalcpu", &numPhysicalCores, &size, NULL, 0);
if (ret != 0) {
if (errno == ENOENT) {
/* entry not present, fall back on 1 */
numPhysicalCores = 1;
} else {
perror("zstd: can't get number of physical cpus");
exit(1);
}
}
return numPhysicalCores;
}
}
#elif defined(__linux__)
/* parse /proc/cpuinfo
* siblings / cpu cores should give hyperthreading ratio
* otherwise fall back on sysconf */
int UTIL_countPhysicalCores(void)
{
static int numPhysicalCores = 0;
if (numPhysicalCores != 0) return numPhysicalCores;
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
if (numPhysicalCores == -1) {
/* value not queryable, fall back on 1 */
return numPhysicalCores = 1;
}
/* try to determine if there's hyperthreading */
{ FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
#define BUF_SIZE 80
char buff[BUF_SIZE];
int siblings = 0;
int cpu_cores = 0;
int ratio = 1;
if (cpuinfo == NULL) {
/* fall back on the sysconf value */
return numPhysicalCores;
}
/* assume the cpu cores/siblings values will be constant across all
* present processors */
while (!feof(cpuinfo)) {
if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) {
if (strncmp(buff, "siblings", 8) == 0) {
const char* const sep = strchr(buff, ':');
if (*sep == '\0') {
/* formatting was broken? */
goto failed;
}
siblings = atoi(sep + 1);
}
if (strncmp(buff, "cpu cores", 9) == 0) {
const char* const sep = strchr(buff, ':');
if (*sep == '\0') {
/* formatting was broken? */
goto failed;
}
cpu_cores = atoi(sep + 1);
}
} else if (ferror(cpuinfo)) {
/* fall back on the sysconf value */
goto failed;
}
}
if (siblings && cpu_cores) {
ratio = siblings / cpu_cores;
}
failed:
fclose(cpuinfo);
return numPhysicalCores = numPhysicalCores / ratio;
}
}
#elif defined(__FreeBSD__)
#include <sys/param.h>
#include <sys/sysctl.h>
/* Use physical core sysctl when available
* see: man 4 smp, man 3 sysctl */
int UTIL_countPhysicalCores(void)
{
static int numPhysicalCores = 0; /* freebsd sysctl is native int sized */
if (numPhysicalCores != 0) return numPhysicalCores;
#if __FreeBSD_version >= 1300008
{ size_t size = sizeof(numPhysicalCores);
int ret = sysctlbyname("kern.smp.cores", &numPhysicalCores, &size, NULL, 0);
if (ret == 0) return numPhysicalCores;
if (errno != ENOENT) {
perror("zstd: can't get number of physical cpus");
exit(1);
}
/* sysctl not present, fall through to older sysconf method */
}
#endif
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
if (numPhysicalCores == -1) {
/* value not queryable, fall back on 1 */
numPhysicalCores = 1;
}
return numPhysicalCores;
}
#elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
/* Use POSIX sysconf
* see: man 3 sysconf */
int UTIL_countPhysicalCores(void)
{
static int numPhysicalCores = 0;
if (numPhysicalCores != 0) return numPhysicalCores;
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
if (numPhysicalCores == -1) {
/* value not queryable, fall back on 1 */
return numPhysicalCores = 1;
}
return numPhysicalCores;
}
#else
int UTIL_countPhysicalCores(void)
{
/* assume 1 */
return 1;
}
#endif
#if defined (__cplusplus)
}
#endif

View File

@@ -16,17 +16,15 @@ extern "C" {
#endif
/*-****************************************
* Dependencies
******************************************/
#include "platform.h" /* PLATFORM_POSIX_VERSION */
#include <stdlib.h> /* malloc */
#include "platform.h" /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */
#include <stdlib.h> /* malloc, realloc, free */
#include <stddef.h> /* size_t, ptrdiff_t */
#include <stdio.h> /* fprintf */
#include <string.h> /* strncmp */
#include <sys/types.h> /* stat, utime */
#include <sys/stat.h> /* stat */
#include <sys/stat.h> /* stat, chmod */
#if defined(_MSC_VER)
# include <sys/utime.h> /* utime */
# include <io.h> /* _chmod */
@@ -34,13 +32,12 @@ extern "C" {
# include <unistd.h> /* chown, stat */
# include <utime.h> /* utime */
#endif
#include <time.h> /* time */
#include <errno.h>
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
#include "mem.h" /* U32, U64 */
/* ************************************************************
* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
/*-************************************************************
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
***************************************************************/
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
# define UTIL_fseek _fseeki64
@@ -53,37 +50,38 @@ extern "C" {
#endif
/*-****************************************
* Sleep functions: Windows - Posix - others
******************************************/
/*-*************************************************
* Sleep & priority functions: Windows - Posix - others
***************************************************/
#if defined(_WIN32)
# include <windows.h>
# define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)
# define UTIL_sleep(s) Sleep(1000*s)
# define UTIL_sleepMilli(milli) Sleep(milli)
#elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */
# include <unistd.h>
# include <sys/resource.h> /* setpriority */
# include <time.h> /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */
# if defined(PRIO_PROCESS)
# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
# else
# define SET_REALTIME_PRIORITY /* disabled */
# endif
#elif PLATFORM_POSIX_VERSION > 0 /* Unix-like operating system */
# include <unistd.h> /* sleep */
# define UTIL_sleep(s) sleep(s)
# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L) /* nanosleep requires POSIX.1-2001 */
# if ZSTD_NANOSLEEP_SUPPORT /* necessarily defined in platform.h */
# define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); }
# else
# define UTIL_sleepMilli(milli) /* disabled */
# endif
#else
# if ZSTD_SETPRIORITY_SUPPORT
# include <sys/resource.h> /* setpriority */
# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
# else
# define SET_REALTIME_PRIORITY /* disabled */
# endif
#else /* unknown non-unix operating systen */
# define UTIL_sleep(s) /* disabled */
# define UTIL_sleepMilli(milli) /* disabled */
# define SET_REALTIME_PRIORITY /* disabled */
#endif
/* *************************************
/*-*************************************
* Constants
***************************************/
#define LIST_SIZE_INCREASE (8*1024)
@@ -101,8 +99,6 @@ extern "C" {
# define UTIL_STATIC static inline
#elif defined(_MSC_VER)
# define UTIL_STATIC static __inline
# pragma warning(disable : 4996) /* disable: C4996: 'strncpy': This function or variable may be unsafe. */
# pragma warning(disable : 4389) /* disable: C4389: '==' : signed/unsigned mismatch */
#else
# define UTIL_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
#endif
@@ -111,7 +107,7 @@ extern "C" {
/*-****************************************
* Console log
******************************************/
static int g_utilDisplayLevel;
extern int g_utilDisplayLevel;
#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
@@ -120,119 +116,47 @@ static int g_utilDisplayLevel;
* Time functions
******************************************/
#if defined(_WIN32) /* Windows */
#define UTIL_TIME_INITIALIZER { { 0, 0 } }
typedef LARGE_INTEGER UTIL_time_t;
UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static LARGE_INTEGER ticksPerSecond;
static int init = 0;
if (!init) {
if (!QueryPerformanceFrequency(&ticksPerSecond))
UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
init = 1;
}
return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
}
UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static LARGE_INTEGER ticksPerSecond;
static int init = 0;
if (!init) {
if (!QueryPerformanceFrequency(&ticksPerSecond))
UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
init = 1;
}
return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
}
#elif defined(__APPLE__) && defined(__MACH__)
#include <mach/mach_time.h>
#define UTIL_TIME_INITIALIZER 0
typedef U64 UTIL_time_t;
UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static mach_timebase_info_data_t rate;
static int init = 0;
if (!init) {
mach_timebase_info(&rate);
init = 1;
}
return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL;
}
UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
{
static mach_timebase_info_data_t rate;
static int init = 0;
if (!init) {
mach_timebase_info(&rate);
init = 1;
}
return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
}
#elif (PLATFORM_POSIX_VERSION >= 200112L)
#include <time.h>
#elif (PLATFORM_POSIX_VERSION >= 200112L) \
&& (defined(__UCLIBC__) \
|| (defined(__GLIBC__) \
&& ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) \
|| (__GLIBC__ > 2))))
#define UTIL_TIME_INITIALIZER { 0, 0 }
typedef struct timespec UTIL_freq_t;
typedef struct timespec UTIL_time_t;
UTIL_STATIC UTIL_time_t UTIL_getTime(void)
{
UTIL_time_t time;
if (clock_gettime(CLOCK_MONOTONIC, &time))
UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n"); /* we could also exit() */
return time;
}
UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t diff;
if (end.tv_nsec < begin.tv_nsec) {
diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
} else {
diff.tv_sec = end.tv_sec - begin.tv_sec;
diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
}
return diff;
}
UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
U64 micro = 0;
micro += 1000000ULL * diff.tv_sec;
micro += diff.tv_nsec / 1000ULL;
return micro;
}
UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
{
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
U64 nano = 0;
nano += 1000000000ULL * diff.tv_sec;
nano += diff.tv_nsec;
return nano;
}
UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end);
#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
typedef clock_t UTIL_time_t;
UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); }
UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
#define UTIL_TIME_INITIALIZER 0
#endif
UTIL_time_t UTIL_getTime(void);
U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd);
U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd);
#define SEC_TO_MICRO 1000000
/* returns time span in microseconds */
UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart )
{
UTIL_time_t const clockEnd = UTIL_getTime();
return UTIL_getSpanTimeMicro(clockStart, clockEnd);
}
UTIL_STATIC void UTIL_waitForNextTick(void)
{
UTIL_time_t const clockStart = UTIL_getTime();
UTIL_time_t clockEnd;
do {
clockEnd = UTIL_getTime();
} while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
}
U64 UTIL_clockSpanMicro(UTIL_time_t clockStart);
/* returns time span in microseconds */
U64 UTIL_clockSpanNano(UTIL_time_t clockStart);
void UTIL_waitForNextTick(void);
/*-****************************************
* File functions
@@ -245,118 +169,23 @@ UTIL_STATIC void UTIL_waitForNextTick(void)
#endif
UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf)
{
int res = 0;
struct utimbuf timebuf;
timebuf.actime = time(NULL);
timebuf.modtime = statbuf->st_mtime;
res += utime(filename, &timebuf); /* set access and modification times */
#if !defined(_WIN32)
res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */
#endif
res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */
errno = 0;
return -res; /* number of errors is returned */
}
UTIL_STATIC int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
{
int r;
#if defined(_MSC_VER)
r = _stat64(infilename, statbuf);
if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */
#else
r = stat(infilename, statbuf);
if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */
#endif
return 1;
}
UTIL_STATIC int UTIL_isRegularFile(const char* infilename)
{
stat_t statbuf;
return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */
}
UTIL_STATIC U32 UTIL_isDirectory(const char* infilename)
{
int r;
stat_t statbuf;
#if defined(_MSC_VER)
r = _stat64(infilename, &statbuf);
if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
#else
r = stat(infilename, &statbuf);
if (!r && S_ISDIR(statbuf.st_mode)) return 1;
#endif
return 0;
}
UTIL_STATIC U32 UTIL_isLink(const char* infilename)
{
#if defined(_WIN32)
/* no symlinks on windows */
(void)infilename;
#else
int r;
stat_t statbuf;
r = lstat(infilename, &statbuf);
if (!r && S_ISLNK(statbuf.st_mode)) return 1;
#endif
return 0;
}
int UTIL_fileExist(const char* filename);
int UTIL_isRegularFile(const char* infilename);
int UTIL_setFileStat(const char* filename, stat_t* statbuf);
U32 UTIL_isDirectory(const char* infilename);
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
U32 UTIL_isLink(const char* infilename);
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
{
if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN;
{ int r;
#if defined(_MSC_VER)
struct __stat64 statbuf;
r = _stat64(infilename, &statbuf);
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
#elif defined(__MINGW32__) && defined (__MSVCRT__)
struct _stati64 statbuf;
r = _stati64(infilename, &statbuf);
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
#else
struct stat statbuf;
r = stat(infilename, &statbuf);
if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN;
#endif
return (U64)statbuf.st_size;
}
}
UTIL_STATIC U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles)
{
U64 total = 0;
int error = 0;
unsigned n;
for (n=0; n<nbFiles; n++) {
U64 const size = UTIL_getFileSize(fileNamesTable[n]);
error |= (size == UTIL_FILESIZE_UNKNOWN);
total += size;
}
return error ? UTIL_FILESIZE_UNKNOWN : total;
}
U64 UTIL_getFileSize(const char* infilename);
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles);
/*
* A modified version of realloc().
* If UTIL_realloc() fails the original block is freed.
*/
UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size)
{
void *newptr = realloc(ptr, size);
if (newptr) return newptr;
@@ -364,143 +193,14 @@ UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
return NULL;
}
int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks);
#ifdef _WIN32
# define UTIL_HAS_CREATEFILELIST
UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
char* path;
int dirLength, fnameLength, pathLength, nbFiles = 0;
WIN32_FIND_DATAA cFile;
HANDLE hFile;
dirLength = (int)strlen(dirName);
path = (char*) malloc(dirLength + 3);
if (!path) return 0;
memcpy(path, dirName, dirLength);
path[dirLength] = '\\';
path[dirLength+1] = '*';
path[dirLength+2] = 0;
hFile=FindFirstFileA(path, &cFile);
if (hFile == INVALID_HANDLE_VALUE) {
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName);
return 0;
}
free(path);
do {
fnameLength = (int)strlen(cFile.cFileName);
path = (char*) malloc(dirLength + fnameLength + 2);
if (!path) { FindClose(hFile); return 0; }
memcpy(path, dirName, dirLength);
path[dirLength] = '\\';
memcpy(path+dirLength+1, cFile.cFileName, fnameLength);
pathLength = dirLength+1+fnameLength;
path[pathLength] = 0;
if (cFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
if (strcmp (cFile.cFileName, "..") == 0 ||
strcmp (cFile.cFileName, ".") == 0) continue;
nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
}
else if ((cFile.dwFileAttributes & FILE_ATTRIBUTE_NORMAL) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_ARCHIVE) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED)) {
if (*bufStart + *pos + pathLength >= *bufEnd) {
ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
*bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
*bufEnd = *bufStart + newListSize;
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
}
if (*bufStart + *pos + pathLength < *bufEnd) {
strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos));
*pos += pathLength + 1;
nbFiles++;
}
}
free(path);
} while (FindNextFileA(hFile, &cFile));
FindClose(hFile);
return nbFiles;
}
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
# define UTIL_HAS_CREATEFILELIST
# include <dirent.h> /* opendir, readdir */
# include <string.h> /* strerror, memcpy */
UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
DIR *dir;
struct dirent *entry;
char* path;
int dirLength, fnameLength, pathLength, nbFiles = 0;
if (!(dir = opendir(dirName))) {
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
return 0;
}
dirLength = (int)strlen(dirName);
errno = 0;
while ((entry = readdir(dir)) != NULL) {
if (strcmp (entry->d_name, "..") == 0 ||
strcmp (entry->d_name, ".") == 0) continue;
fnameLength = (int)strlen(entry->d_name);
path = (char*) malloc(dirLength + fnameLength + 2);
if (!path) { closedir(dir); return 0; }
memcpy(path, dirName, dirLength);
path[dirLength] = '/';
memcpy(path+dirLength+1, entry->d_name, fnameLength);
pathLength = dirLength+1+fnameLength;
path[pathLength] = 0;
if (!followLinks && UTIL_isLink(path)) {
UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path);
continue;
}
if (UTIL_isDirectory(path)) {
nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
} else {
if (*bufStart + *pos + pathLength >= *bufEnd) {
ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
*bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
*bufEnd = *bufStart + newListSize;
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
}
if (*bufStart + *pos + pathLength < *bufEnd) {
strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos));
*pos += pathLength + 1;
nbFiles++;
}
}
free(path);
errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */
}
if (errno != 0) {
UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno));
free(*bufStart);
*bufStart = NULL;
}
closedir(dir);
return nbFiles;
}
#else
UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
{
(void)bufStart; (void)bufEnd; (void)pos;
UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
return 0;
}
#endif /* #ifdef _WIN32 */
/*
@@ -509,53 +209,10 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
*/
UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb, int followLinks)
{
size_t pos;
unsigned i, nbFiles;
char* buf = (char*)malloc(LIST_SIZE_INCREASE);
char* bufend = buf + LIST_SIZE_INCREASE;
const char** fileTable;
if (!buf) return NULL;
for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
if (!UTIL_isDirectory(inputNames[i])) {
size_t const len = strlen(inputNames[i]);
if (buf + pos + len >= bufend) {
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
buf = (char*)UTIL_realloc(buf, newListSize);
bufend = buf + newListSize;
if (!buf) return NULL;
}
if (buf + pos + len < bufend) {
strncpy(buf + pos, inputNames[i], bufend - (buf + pos));
pos += len + 1;
nbFiles++;
}
} else {
nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
if (buf == NULL) return NULL;
} }
if (nbFiles == 0) { free(buf); return NULL; }
fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*));
if (!fileTable) { free(buf); return NULL; }
for (i=0, pos=0; i<nbFiles; i++) {
fileTable[i] = buf + pos;
pos += strlen(fileTable[i]) + 1;
}
if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
*allocatedBuffer = buf;
*allocatedNamesNb = nbFiles;
return fileTable;
}
const char**
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
char** allocatedBuffer, unsigned* allocatedNamesNb,
int followLinks);
UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer)
{
@@ -563,201 +220,7 @@ UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBu
if (filenameTable) free((void*)filenameTable);
}
/* count the number of physical cores */
#if defined(_WIN32) || defined(WIN32)
#include <windows.h>
typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
UTIL_STATIC int UTIL_countPhysicalCores(void)
{
static int numPhysicalCores = 0;
if (numPhysicalCores != 0) return numPhysicalCores;
{ LPFN_GLPI glpi;
BOOL done = FALSE;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL;
DWORD returnLength = 0;
size_t byteOffset = 0;
glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")),
"GetLogicalProcessorInformation");
if (glpi == NULL) {
goto failed;
}
while(!done) {
DWORD rc = glpi(buffer, &returnLength);
if (FALSE == rc) {
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
if (buffer)
free(buffer);
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
if (buffer == NULL) {
perror("zstd");
exit(1);
}
} else {
/* some other error */
goto failed;
}
} else {
done = TRUE;
}
}
ptr = buffer;
while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
if (ptr->Relationship == RelationProcessorCore) {
numPhysicalCores++;
}
ptr++;
byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
}
free(buffer);
return numPhysicalCores;
}
failed:
/* try to fall back on GetSystemInfo */
{ SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
numPhysicalCores = sysinfo.dwNumberOfProcessors;
if (numPhysicalCores == 0) numPhysicalCores = 1; /* just in case */
}
return numPhysicalCores;
}
#elif defined(__APPLE__)
#include <sys/sysctl.h>
/* Use apple-provided syscall
* see: man 3 sysctl */
UTIL_STATIC int UTIL_countPhysicalCores(void)
{
static S32 numPhysicalCores = 0; /* apple specifies int32_t */
if (numPhysicalCores != 0) return numPhysicalCores;
{ size_t size = sizeof(S32);
int const ret = sysctlbyname("hw.physicalcpu", &numPhysicalCores, &size, NULL, 0);
if (ret != 0) {
if (errno == ENOENT) {
/* entry not present, fall back on 1 */
numPhysicalCores = 1;
} else {
perror("zstd: can't get number of physical cpus");
exit(1);
}
}
return numPhysicalCores;
}
}
#elif defined(__linux__)
/* parse /proc/cpuinfo
* siblings / cpu cores should give hyperthreading ratio
* otherwise fall back on sysconf */
UTIL_STATIC int UTIL_countPhysicalCores(void)
{
static int numPhysicalCores = 0;
if (numPhysicalCores != 0) return numPhysicalCores;
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
if (numPhysicalCores == -1) {
/* value not queryable, fall back on 1 */
return numPhysicalCores = 1;
}
/* try to determine if there's hyperthreading */
{ FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
#define BUF_SIZE 80
char buff[BUF_SIZE];
int siblings = 0;
int cpu_cores = 0;
int ratio = 1;
if (cpuinfo == NULL) {
/* fall back on the sysconf value */
return numPhysicalCores;
}
/* assume the cpu cores/siblings values will be constant across all
* present processors */
while (!feof(cpuinfo)) {
if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) {
if (strncmp(buff, "siblings", 8) == 0) {
const char* const sep = strchr(buff, ':');
if (*sep == '\0') {
/* formatting was broken? */
goto failed;
}
siblings = atoi(sep + 1);
}
if (strncmp(buff, "cpu cores", 9) == 0) {
const char* const sep = strchr(buff, ':');
if (*sep == '\0') {
/* formatting was broken? */
goto failed;
}
cpu_cores = atoi(sep + 1);
}
} else if (ferror(cpuinfo)) {
/* fall back on the sysconf value */
goto failed;
}
}
if (siblings && cpu_cores) {
ratio = siblings / cpu_cores;
}
failed:
fclose(cpuinfo);
return numPhysicalCores = numPhysicalCores / ratio;
}
}
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
/* Use apple-provided syscall
* see: man 3 sysctl */
UTIL_STATIC int UTIL_countPhysicalCores(void)
{
static int numPhysicalCores = 0;
if (numPhysicalCores != 0) return numPhysicalCores;
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
if (numPhysicalCores == -1) {
/* value not queryable, fall back on 1 */
return numPhysicalCores = 1;
}
return numPhysicalCores;
}
#else
UTIL_STATIC int UTIL_countPhysicalCores(void)
{
/* assume 1 */
return 1;
}
#endif
int UTIL_countPhysicalCores(void);
#if defined (__cplusplus)
}

View File

@@ -212,7 +212,7 @@ $(ZSTDMT_OBJS): ../../../../C/zstdmt/$(*B).c
!IFDEF FASTLZMA2_OBJS
$(FASTLZMA2_OBJS): ../../../../C/fast-lzma2/$(*B).c
$(COMPL_O2) -DNO_XXHASH
$(COMPL_O2) -DNO_XXHASH -DFL2_7ZIP_BUILD
!ENDIF
@@ -298,7 +298,7 @@ $(FASTLZMA2_OBJS): ../../../../C/fast-lzma2/$(*B).c
-I ../../../../C/lz5 \
-I ../../../../C/zstd
{../../../../C/fast-lzma2}.c{$O}.obj::
$(COMPLB_O2) -DNO_XXHASH
$(COMPLB_O2) -DNO_XXHASH -DFL2_7ZIP_BUILD
!ENDIF

View File

@@ -322,16 +322,17 @@ ZSTDMT_OBJS = \
$O\zstd-mt_threading.obj \
FASTLZMA2_OBJS = \
$O\fl2_error_private.obj \
$O\fl2pool.obj \
$O\fl2threading.obj \
$O\dict_buffer.obj \
$O\fl2_common.obj \
$O\fl2_compress.obj \
$O\fl2_pool.obj \
$O\fl2_threading.obj \
$O\lzma2_enc.obj \
$O\radix_bitpack.obj \
$O\radix_mf.obj \
$O\radix_struct.obj \
$O\range_enc.obj \
$O\util.obj \
!include "../../UI/Console/Console.mak"

View File

@@ -36,15 +36,16 @@ COMPRESS_OBJS = $(COMPRESS_OBJS) \
$O\FastLzma2Register.obj \
FASTLZMA2_OBJS = \
$O\fl2_error_private.obj \
$O\fl2pool.obj \
$O\fl2threading.obj \
$O\dict_buffer.obj \
$O\fl2_common.obj \
$O\fl2_compress.obj \
$O\fl2_pool.obj \
$O\fl2_threading.obj \
$O\lzma2_enc.obj \
$O\radix_bitpack.obj \
$O\radix_mf.obj \
$O\radix_struct.obj \
$O\range_enc.obj \
$O\util.obj \
!include "../../7zip.mak"

View File

@@ -244,16 +244,17 @@ ZSTDMT_OBJS = \
$O\zstd-mt_threading.obj \
FASTLZMA2_OBJS = \
$O\fl2_error_private.obj \
$O\fl2pool.obj \
$O\fl2threading.obj \
$O\dict_buffer.obj \
$O\fl2_common.obj \
$O\fl2_compress.obj \
$O\fl2_pool.obj \
$O\fl2_threading.obj \
$O\lzma2_enc.obj \
$O\radix_bitpack.obj \
$O\radix_mf.obj \
$O\radix_struct.obj \
$O\range_enc.obj \
$O\util.obj \

View File

@@ -119,15 +119,16 @@ ZSTDMT_OBJS = \
$O\zstd-mt_threading.obj \
FASTLZMA2_OBJS = \
$O\fl2_error_private.obj \
$O\fl2pool.obj \
$O\fl2threading.obj \
$O\dict_buffer.obj \
$O\fl2_common.obj \
$O\fl2_compress.obj \
$O\fl2_pool.obj \
$O\fl2_threading.obj \
$O\lzma2_enc.obj \
$O\radix_bitpack.obj \
$O\radix_mf.obj \
$O\radix_struct.obj \
$O\range_enc.obj \
$O\util.obj \
!include "../../7zip.mak"

View File

@@ -119,15 +119,16 @@ ZSTDMT_OBJS = \
$O\zstd-mt_threading.obj \
FASTLZMA2_OBJS = \
$O\fl2_error_private.obj \
$O\fl2pool.obj \
$O\fl2threading.obj \
$O\dict_buffer.obj \
$O\fl2_common.obj \
$O\fl2_compress.obj \
$O\fl2_pool.obj \
$O\fl2_threading.obj \
$O\lzma2_enc.obj \
$O\radix_bitpack.obj \
$O\radix_mf.obj \
$O\radix_struct.obj \
$O\range_enc.obj \
$O\util.obj \
!include "../../7zip.mak"

View File

@@ -236,15 +236,16 @@ ZSTDMT_OBJS = \
$O\zstd-mt_threading.obj \
FASTLZMA2_OBJS = \
$O\fl2_error_private.obj \
$O\fl2pool.obj \
$O\fl2threading.obj \
$O\dict_buffer.obj \
$O\fl2_common.obj \
$O\fl2_compress.obj \
$O\fl2_pool.obj \
$O\fl2_threading.obj \
$O\lzma2_enc.obj \
$O\radix_bitpack.obj \
$O\radix_mf.obj \
$O\radix_struct.obj \
$O\range_enc.obj \
$O\util.obj \
!include "../../7zip.mak"

View File

@@ -121,23 +121,39 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream, ISequentialOutStream
return SResToHRESULT(res);
}
CFastEncoder::CFastEncoder()
static HRESULT TranslateError(size_t res)
{
_encoder = NULL;
reduceSize = 0;
if (FL2_getErrorCode(res) == FL2_error_memory_allocation)
return E_OUTOFMEMORY;
return S_FALSE;
}
CFastEncoder::~CFastEncoder()
#define CHECK_S(f_) do { \
size_t r_ = f_; \
if (FL2_isError(r_)) \
return TranslateError(r_); \
} while (false)
#define CHECK_H(f_) do { \
HRESULT r_ = f_; \
if (r_ != S_OK) \
return r_; \
} while (false)
#define CHECK_P(f) if (FL2_isError(f)) return E_INVALIDARG; /* check and convert error code */
CFastEncoder::FastLzma2::FastLzma2()
: fcs(NULL),
dict_pos(0)
{
if (_encoder)
FL2_freeCCtx(_encoder);
}
CFastEncoder::FastLzma2::~FastLzma2()
{
FL2_freeCCtx(fcs);
}
#define CHECK_F(f) if (FL2_isError(f)) return E_INVALIDARG; /* check and convert error code */
STDMETHODIMP CFastEncoder::SetCoderProperties(const PROPID *propIDs,
const PROPVARIANT *coderProps, UInt32 numProps)
HRESULT CFastEncoder::FastLzma2::SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *coderProps, UInt32 numProps)
{
CLzma2EncProps lzma2Props;
Lzma2EncProps_Init(&lzma2Props);
@@ -146,56 +162,165 @@ STDMETHODIMP CFastEncoder::SetCoderProperties(const PROPID *propIDs,
{
RINOK(SetLzma2Prop(propIDs[i], coderProps[i], lzma2Props));
}
if (_encoder == NULL) {
_encoder = FL2_createCCtxMt(lzma2Props.numTotalThreads);
if (_encoder == NULL)
if (fcs == NULL) {
fcs = FL2_createCStreamMt(lzma2Props.numTotalThreads, 1);
if (fcs == NULL)
return E_OUTOFMEMORY;
}
if (lzma2Props.lzmaProps.algo > 2) {
if (lzma2Props.lzmaProps.algo > 3)
return E_INVALIDARG;
lzma2Props.lzmaProps.algo = 2;
FL2_CCtx_setParameter(_encoder, FL2_p_highCompression, 1);
FL2_CCtx_setParameter(_encoder, FL2_p_compressionLevel, lzma2Props.lzmaProps.level);
FL2_CCtx_setParameter(fcs, FL2_p_highCompression, 1);
FL2_CCtx_setParameter(fcs, FL2_p_compressionLevel, lzma2Props.lzmaProps.level);
}
else {
FL2_CCtx_setParameter(_encoder, FL2_p_7zLevel, lzma2Props.lzmaProps.level);
FL2_CCtx_setParameter(fcs, FL2_p_compressionLevel, lzma2Props.lzmaProps.level);
}
dictSize = lzma2Props.lzmaProps.dictSize;
size_t dictSize = lzma2Props.lzmaProps.dictSize;
if (!dictSize) {
dictSize = (UInt32)1 << FL2_CCtx_setParameter(_encoder, FL2_p_dictionaryLog, 0);
dictSize = (UInt32)FL2_CCtx_getParameter(fcs, FL2_p_dictionarySize);
}
reduceSize = lzma2Props.lzmaProps.reduceSize;
size_t reduceSize = lzma2Props.lzmaProps.reduceSize;
reduceSize += (reduceSize < (UInt64)-1); /* prevent extra buffer shift after read */
dictSize = (UInt32)min(dictSize, reduceSize);
unsigned dictLog = FL2_DICTLOG_MIN;
while (((UInt32)1 << dictLog) < dictSize)
++dictLog;
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_dictionaryLog, dictLog));
dictSize = max(dictSize, FL2_DICTSIZE_MIN);
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_dictionarySize, dictSize));
if (lzma2Props.lzmaProps.algo >= 0) {
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_strategy, (unsigned)lzma2Props.lzmaProps.algo));
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_strategy, (unsigned)lzma2Props.lzmaProps.algo));
}
if (lzma2Props.lzmaProps.fb > 0)
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_fastLength, lzma2Props.lzmaProps.fb));
if (lzma2Props.lzmaProps.mc) {
unsigned ml = 0;
while (((UInt32)1 << ml) < lzma2Props.lzmaProps.mc)
++ml;
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_searchLog, ml));
}
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_fastLength, lzma2Props.lzmaProps.fb));
if (lzma2Props.lzmaProps.mc > 0)
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_hybridCycles, lzma2Props.lzmaProps.mc));
if (lzma2Props.lzmaProps.lc >= 0)
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_literalCtxBits, lzma2Props.lzmaProps.lc));
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_literalCtxBits, lzma2Props.lzmaProps.lc));
if (lzma2Props.lzmaProps.lp >= 0)
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_literalPosBits, lzma2Props.lzmaProps.lp));
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_literalPosBits, lzma2Props.lzmaProps.lp));
if (lzma2Props.lzmaProps.pb >= 0)
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_posBits, lzma2Props.lzmaProps.pb));
FL2_CCtx_setParameter(_encoder, FL2_p_omitProperties, 1);
#ifndef NO_XXHASH
FL2_CCtx_setParameter(_encoder, FL2_p_doXXHash, 0);
#endif
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_posBits, lzma2Props.lzmaProps.pb));
FL2_CCtx_setParameter(fcs, FL2_p_omitProperties, 1);
FL2_setCStreamTimeout(fcs, 500);
return S_OK;
}
size_t CFastEncoder::FastLzma2::GetDictSize() const
{
return FL2_CCtx_getParameter(fcs, FL2_p_dictionarySize);
}
HRESULT CFastEncoder::FastLzma2::Begin()
{
CHECK_S(FL2_initCStream(fcs, 0));
CHECK_S(FL2_getDictionaryBuffer(fcs, &dict));
dict_pos = 0;
return S_OK;
}
BYTE* CFastEncoder::FastLzma2::GetAvailableBuffer(unsigned long& size)
{
size = static_cast<unsigned long>(dict.size - dict_pos);
return reinterpret_cast<BYTE*>(dict.dst) + dict_pos;
}
HRESULT CFastEncoder::FastLzma2::WaitAndReport(size_t& res, ICompressProgressInfo *progress)
{
while (FL2_isTimedOut(res)) {
if (!UpdateProgress(progress))
return S_FALSE;
res = FL2_waitCStream(fcs);
}
CHECK_S(res);
return S_OK;
}
HRESULT CFastEncoder::FastLzma2::AddByteCount(size_t count, ISequentialOutStream *outStream, ICompressProgressInfo *progress)
{
dict_pos += count;
if (dict_pos == dict.size) {
size_t res = FL2_updateDictionary(fcs, dict_pos);
CHECK_H(WaitAndReport(res, progress));
if (res != 0)
CHECK_H(WriteBuffers(outStream));
do {
res = FL2_getDictionaryBuffer(fcs, &dict);
} while (FL2_isTimedOut(res));
CHECK_S(res);
dict_pos = 0;
}
if (!UpdateProgress(progress))
return S_FALSE;
return S_OK;
}
bool CFastEncoder::FastLzma2::UpdateProgress(ICompressProgressInfo *progress)
{
if (progress) {
UInt64 outProcessed;
UInt64 inProcessed = FL2_getCStreamProgress(fcs, &outProcessed);
HRESULT err = progress->SetRatioInfo(&inProcessed, &outProcessed);
if (err != S_OK) {
FL2_cancelCStream(fcs);
return false;
}
}
return true;
}
HRESULT CFastEncoder::FastLzma2::WriteBuffers(ISequentialOutStream *outStream)
{
size_t csize;
for (;;) {
FL2_cBuffer cbuf;
// Waits if compression in progress
csize = FL2_getNextCStreamBuffer(fcs, &cbuf);
CHECK_S(csize);
if (csize == 0)
break;
HRESULT err = WriteStream(outStream, cbuf.src, cbuf.size);
if (err != S_OK)
return err;
}
return S_OK;
}
HRESULT CFastEncoder::FastLzma2::End(ISequentialOutStream *outStream, ICompressProgressInfo *progress)
{
if (dict_pos) {
size_t res = FL2_updateDictionary(fcs, dict_pos);
CHECK_H(WaitAndReport(res, progress));
}
size_t res = FL2_endStream(fcs, nullptr);
CHECK_H(WaitAndReport(res, progress));
while (res) {
WriteBuffers(outStream);
res = FL2_endStream(fcs, nullptr);
CHECK_H(WaitAndReport(res, progress));
}
return S_OK;
}
void CFastEncoder::FastLzma2::Cancel()
{
FL2_cancelCStream(fcs);
}
CFastEncoder::CFastEncoder()
{
}
CFastEncoder::~CFastEncoder()
{
}
STDMETHODIMP CFastEncoder::SetCoderProperties(const PROPID *propIDs,
const PROPVARIANT *coderProps, UInt32 numProps)
{
return _encoder.SetCoderProperties(propIDs, coderProps, numProps);
}
#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
@@ -203,6 +328,7 @@ STDMETHODIMP CFastEncoder::WriteCoderProperties(ISequentialOutStream *outStream)
{
Byte prop;
unsigned i;
size_t dictSize = _encoder.GetDictSize();
for (i = 0; i < 40; i++)
if (dictSize <= LZMA2_DIC_SIZE_FROM_PROP(i))
break;
@@ -211,79 +337,29 @@ STDMETHODIMP CFastEncoder::WriteCoderProperties(ISequentialOutStream *outStream)
}
typedef struct
{
ISequentialOutStream* outStream;
ICompressProgressInfo* progress;
UInt64 in_processed;
UInt64 out_processed;
HRESULT res;
} EncodingObjects;
static int FL2LIB_CALL Progress(size_t done, void* opaque)
{
EncodingObjects* p = (EncodingObjects*)opaque;
if (p && p->progress) {
UInt64 in_processed = p->in_processed + done;
p->res = p->progress->SetRatioInfo(&in_processed, &p->out_processed);
return p->res != S_OK;
}
return 0;
}
static int FL2LIB_CALL Write(const void* src, size_t srcSize, void* opaque)
{
EncodingObjects* p = (EncodingObjects*)opaque;
p->res = WriteStream(p->outStream, src, srcSize);
return p->res != S_OK;
}
STDMETHODIMP CFastEncoder::Code(ISequentialInStream *inStream, ISequentialOutStream *outStream,
const UInt64 * /* inSize */, const UInt64 * /* outSize */, ICompressProgressInfo *progress)
{
HRESULT err = S_OK;
inBuffer.AllocAtLeast(dictSize);
EncodingObjects objs = { outStream, progress, 0, 0, S_OK };
FL2_blockBuffer block = { inBuffer, 0, 0, dictSize };
CHECK_H(_encoder.Begin());
size_t inSize;
unsigned long dSize;
do
{
FL2_shiftBlock(_encoder, &block);
size_t inSize = dictSize - block.start;
err = ReadStream(inStream, inBuffer + block.start, &inSize);
if (err != S_OK)
break;
block.end += inSize;
if (inSize) {
size_t cSize = FL2_compressCCtxBlock_toFn(_encoder, Write, &objs, &block, Progress);
if (FL2_isError(cSize)) {
if (FL2_getErrorCode(cSize) == FL2_error_memory_allocation)
return E_OUTOFMEMORY;
return objs.res != S_OK ? objs.res : S_FALSE;
}
if (objs.res != S_OK)
return objs.res;
objs.out_processed += cSize;
objs.in_processed += inSize;
if (progress) {
err = progress->SetRatioInfo(&objs.in_processed, &objs.out_processed);
if (err != S_OK)
break;
}
if (block.end < dictSize)
break;
}
else break;
BYTE* dict = _encoder.GetAvailableBuffer(dSize);
} while (err == S_OK);
if (err == S_OK) {
size_t cSize = FL2_endFrame_toFn(_encoder, Write, &objs);
if (FL2_isError(cSize))
return S_FALSE;
objs.out_processed += cSize;
err = objs.res;
}
inSize = dSize;
HRESULT err = ReadStream(inStream, dict, &inSize);
if (err != S_OK) {
_encoder.Cancel();
return err;
}
CHECK_H(_encoder.AddByteCount(inSize, outStream, progress));
} while (inSize == dSize);
CHECK_H(_encoder.End(outStream, progress));
return S_OK;
}
}}

View File

@@ -45,10 +45,33 @@ class CFastEncoder :
public ICompressWriteCoderProperties,
public CMyUnknownImp
{
FL2_CCtx* _encoder;
CByteBuffer inBuffer;
UInt64 reduceSize;
UInt32 dictSize;
class FastLzma2
{
public:
FastLzma2();
~FastLzma2();
HRESULT SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps);
size_t GetDictSize() const;
HRESULT Begin();
BYTE* GetAvailableBuffer(unsigned long& size);
HRESULT AddByteCount(size_t count, ISequentialOutStream *outStream, ICompressProgressInfo *progress);
HRESULT End(ISequentialOutStream *outStream, ICompressProgressInfo *progress);
void Cancel();
private:
bool UpdateProgress(ICompressProgressInfo *progress);
HRESULT WaitAndReport(size_t& res, ICompressProgressInfo *progress);
HRESULT WriteBuffers(ISequentialOutStream *outStream);
FL2_CStream* fcs;
FL2_dictBuffer dict;
size_t dict_pos;
FastLzma2(const FastLzma2&) = delete;
FastLzma2& operator=(const FastLzma2&) = delete;
};
FastLzma2 _encoder;
public:
MY_UNKNOWN_IMP3(

View File

@@ -1410,7 +1410,7 @@ typedef enum {
} FL2_strategy;
typedef struct {
unsigned dictionaryLog; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory, slower */
UInt32 dictionarySize; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory per byte, slower */
unsigned overlapFraction; /* overlap between consecutive blocks in 1/16 units: larger == more compression, slower */
unsigned chainLog; /* fully searched segment : larger == more compression, slower, more memory; hybrid mode only (ultra) */
unsigned searchLog; /* nb of searches : larger == more compression, slower; hybrid mode only (ultra) */
@@ -1424,19 +1424,23 @@ typedef struct {
#define FL2_MAX_7Z_CLEVEL 9
#define MB *(1U<<20)
static const FL2_compressionParameters FL2_7zCParameters[FL2_MAX_7Z_CLEVEL + 1] = {
{ 0,0,0,0,0,0,0 },
{ 20, 1, 7, 0, 6, 32, 1, 8, FL2_fast }, /* 1 */
{ 20, 2, 7, 0, 12, 32, 1, 8, FL2_fast }, /* 2 */
{ 21, 2, 7, 0, 16, 32, 1, 8, FL2_fast }, /* 3 */
{ 20, 2, 7, 0, 16, 32, 1, 8, FL2_opt }, /* 4 */
{ 24, 2, 9, 0, 40, 48, 1, 8, FL2_ultra }, /* 5 */
{ 25, 2, 10, 0, 48, 64, 1, 8, FL2_ultra }, /* 6 */
{ 26, 2, 11, 1, 60, 96, 1, 9, FL2_ultra }, /* 7 */
{ 27, 2, 12, 2, 128, 128, 1, 10, FL2_ultra }, /* 8 */
{ 27, 3, 14, 3, 252, 160, 0, 10, FL2_ultra } /* 9 */
{ 0,0,0,0,0,0,0,0,FL2_fast },
{ 1 MB, 1, 7, 0, 6, 32, 1, 4, FL2_fast }, /* 1 */
{ 2 MB, 2, 7, 0, 10, 32, 1, 4, FL2_fast }, /* 2 */
{ 2 MB, 2, 7, 0, 10, 32, 1, 4, FL2_opt }, /* 3 */
{ 4 MB, 2, 7, 0, 14, 32, 1, 4, FL2_opt }, /* 4 */
{ 16 MB, 2, 9, 0, 42, 48, 1, 4, FL2_ultra }, /* 5 */
{ 32 MB, 2, 10, 0, 50, 64, 1, 4, FL2_ultra }, /* 6 */
{ 64 MB, 2, 11, 1, 62, 96, 1, 3, FL2_ultra }, /* 7 */
{ 64 MB, 4, 12, 2, 90, 273, 1, 3, FL2_ultra }, /* 8 */
{ 128 MB, 2, 14, 3, 254, 273, 0, 2, FL2_ultra } /* 9 */
};
#undef MB
#define RMF_BUILDER_SIZE (8 * 0x40100U)
void CCompressDialog::SetDictionary()
@@ -1512,7 +1516,7 @@ void CCompressDialog::SetDictionary()
if (level > FL2_MAX_7Z_CLEVEL)
level = FL2_MAX_7Z_CLEVEL;
if (defaultDict == (UInt32)(Int32)-1)
defaultDict = (UInt32)1 << FL2_7zCParameters[level].dictionaryLog;
defaultDict = FL2_7zCParameters[level].dictionarySize;
m_Dictionary.SetCurSel(0);
@@ -2020,11 +2024,11 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory)
{
if (level > FL2_MAX_7Z_CLEVEL)
level = FL2_MAX_7Z_CLEVEL;
size += dict * 5 + (1UL << 18) * numThreads;
unsigned depth = FL2_7zCParameters[level].searchDepth;
UInt32 bufSize = UInt32(1) << (FL2_7zCParameters[level].dictionaryLog - FL2_7zCParameters[level].bufferLog);
/* dual buffer is enabled in Lzma2Encoder.cpp so size is dict * 6 */
size += dict * 6 + (1UL << 18) * numThreads;
UInt32 bufSize = dict >> (12 - FL2_7zCParameters[level].bufferLog);
size += (bufSize * 12 + RMF_BUILDER_SIZE) * numThreads;
if (dict > (UInt32(1) << 26) || depth > 63)
if (dict > (UInt32(1) << 26))
size += dict;
if (FL2_7zCParameters[level].strategy == FL2_ultra)
size += (UInt32(4) << 14) + (UInt32(4) << FL2_7zCParameters[level].chainLog);