mirror of
https://github.com/Xevion/easy7zip.git
synced 2025-12-09 10:07:10 -06:00
Update to Fast LZMA2 1.0.0
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
* Modified for FL2 by Conor McCarthy
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
@@ -8,13 +9,15 @@
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_COMPILER_H
|
||||
#define ZSTD_COMPILER_H
|
||||
#ifndef FL2_COMPILER_H
|
||||
#define FL2_COMPILER_H
|
||||
|
||||
/*-*******************************************************
|
||||
* Compiler specifics
|
||||
*********************************************************/
|
||||
/* force inlining */
|
||||
|
||||
#if !defined(FL2_NO_INLINE)
|
||||
#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
||||
# define INLINE_KEYWORD inline
|
||||
#else
|
||||
@@ -29,6 +32,13 @@
|
||||
# define FORCE_INLINE_ATTR
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define INLINE_KEYWORD
|
||||
#define FORCE_INLINE_ATTR
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
|
||||
* parameters. They must be inlined for the compiler to eliminate the constant
|
||||
@@ -54,24 +64,69 @@
|
||||
|
||||
/* force no inlining */
|
||||
#ifdef _MSC_VER
|
||||
# define FORCE_NOINLINE static __declspec(noinline)
|
||||
# define FORCE_NOINLINE __declspec(noinline)
|
||||
#else
|
||||
# ifdef __GNUC__
|
||||
# define FORCE_NOINLINE static __attribute__((__noinline__))
|
||||
# define FORCE_NOINLINE __attribute__((__noinline__))
|
||||
# else
|
||||
# define FORCE_NOINLINE static
|
||||
# define FORCE_NOINLINE
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* prefetch */
|
||||
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
||||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
||||
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
|
||||
#elif defined(__GNUC__)
|
||||
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
|
||||
#else
|
||||
# define PREFETCH(ptr) /* disabled */
|
||||
/* target attribute */
|
||||
#ifndef __has_attribute
|
||||
#define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
|
||||
#else
|
||||
# define TARGET_ATTRIBUTE(target)
|
||||
#endif
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
*/
|
||||
#ifndef DYNAMIC_BMI2
|
||||
#if ((defined(__clang__) && __has_attribute(__target__)) \
|
||||
|| (defined(__GNUC__) \
|
||||
&& (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
|
||||
&& (defined(__x86_64__) || defined(_M_X86)) \
|
||||
&& !defined(__BMI2__)
|
||||
# define DYNAMIC_BMI2 1
|
||||
#else
|
||||
# define DYNAMIC_BMI2 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* prefetch
|
||||
* can be disabled, by declaring NO_PREFETCH build macro */
|
||||
#if defined(NO_PREFETCH)
|
||||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
||||
#else
|
||||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
||||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
||||
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
||||
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
|
||||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
||||
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
|
||||
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
|
||||
# else
|
||||
# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
|
||||
# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
|
||||
# endif
|
||||
#endif /* NO_PREFETCH */
|
||||
|
||||
#define CACHELINE_SIZE 64
|
||||
|
||||
#define PREFETCH_AREA(p, s) { \
|
||||
const char* const _ptr = (const char*)(p); \
|
||||
size_t const _size = (size_t)(s); \
|
||||
size_t _pos; \
|
||||
for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
|
||||
PREFETCH_L2(_ptr + _pos); \
|
||||
} \
|
||||
}
|
||||
|
||||
/* disable warnings */
|
||||
#ifdef _MSC_VER /* Visual Studio */
|
||||
@@ -83,4 +138,4 @@
|
||||
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_COMPILER_H */
|
||||
#endif /* FL2_COMPILER_H */
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef ZSTD_COUNT_H_
|
||||
#define ZSTD_COUNT_H_
|
||||
|
||||
@@ -86,7 +96,7 @@ static unsigned ZSTD_NbCommonBytes(register size_t val)
|
||||
}
|
||||
|
||||
|
||||
MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
|
||||
static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
|
||||
{
|
||||
const BYTE* const pStart = pIn;
|
||||
const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t) - 1);
|
||||
|
||||
230
C/fast-lzma2/dict_buffer.c
Normal file
230
C/fast-lzma2/dict_buffer.c
Normal file
@@ -0,0 +1,230 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Conor McCarthy
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "dict_buffer.h"
|
||||
#include "fl2_internal.h"
|
||||
|
||||
#define ALIGNMENT_SIZE 16U
|
||||
#define ALIGNMENT_MASK (~(size_t)(ALIGNMENT_SIZE-1))
|
||||
|
||||
/* DICT_buffer functions */
|
||||
|
||||
int DICT_construct(DICT_buffer * const buf, int const async)
|
||||
{
|
||||
buf->data[0] = NULL;
|
||||
buf->data[1] = NULL;
|
||||
buf->size = 0;
|
||||
|
||||
buf->async = (async != 0);
|
||||
|
||||
#ifndef NO_XXHASH
|
||||
buf->xxh = NULL;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int DICT_init(DICT_buffer * const buf, size_t const dict_size, size_t const overlap, unsigned const reset_multiplier, int const do_hash)
|
||||
{
|
||||
/* Allocate if not yet allocated or existing dict too small */
|
||||
if (buf->data[0] == NULL || dict_size > buf->size) {
|
||||
/* Free any existing buffers */
|
||||
DICT_destruct(buf);
|
||||
|
||||
buf->data[0] = malloc(dict_size);
|
||||
|
||||
buf->data[1] = NULL;
|
||||
if (buf->async)
|
||||
buf->data[1] = malloc(dict_size);
|
||||
|
||||
if (buf->data[0] == NULL || (buf->async && buf->data[1] == NULL)) {
|
||||
DICT_destruct(buf);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
buf->index = 0;
|
||||
buf->overlap = overlap;
|
||||
buf->start = 0;
|
||||
buf->end = 0;
|
||||
buf->size = dict_size;
|
||||
buf->total = 0;
|
||||
buf->reset_interval = (reset_multiplier != 0) ? dict_size * reset_multiplier : ((size_t)1 << 31);
|
||||
|
||||
#ifndef NO_XXHASH
|
||||
if (do_hash) {
|
||||
if (buf->xxh == NULL) {
|
||||
buf->xxh = XXH32_createState();
|
||||
if (buf->xxh == NULL) {
|
||||
DICT_destruct(buf);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
XXH32_reset(buf->xxh, 0);
|
||||
}
|
||||
else {
|
||||
XXH32_freeState(buf->xxh);
|
||||
buf->xxh = NULL;
|
||||
}
|
||||
#else
|
||||
(void)do_hash;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DICT_destruct(DICT_buffer * const buf)
|
||||
{
|
||||
free(buf->data[0]);
|
||||
free(buf->data[1]);
|
||||
buf->data[0] = NULL;
|
||||
buf->data[1] = NULL;
|
||||
buf->size = 0;
|
||||
#ifndef NO_XXHASH
|
||||
XXH32_freeState(buf->xxh);
|
||||
buf->xxh = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t DICT_size(const DICT_buffer * const buf)
|
||||
{
|
||||
return buf->size;
|
||||
}
|
||||
|
||||
/* Get the dictionary buffer for adding input */
|
||||
size_t DICT_get(DICT_buffer * const buf, void **const dict)
|
||||
{
|
||||
DICT_shift(buf);
|
||||
|
||||
DEBUGLOG(5, "Getting dict buffer %u, pos %u, avail %u", (unsigned)buf->index, (unsigned)buf->end, (unsigned)(buf->size - buf->end));
|
||||
*dict = buf->data[buf->index] + buf->end;
|
||||
return buf->size - buf->end;
|
||||
}
|
||||
|
||||
/* Update with the amount added */
|
||||
int DICT_update(DICT_buffer * const buf, size_t const added_size)
|
||||
{
|
||||
DEBUGLOG(5, "Added %u bytes to dict buffer %u", (unsigned)added_size, (unsigned)buf->index);
|
||||
buf->end += added_size;
|
||||
assert(buf->end <= buf->size);
|
||||
return !DICT_availSpace(buf);
|
||||
}
|
||||
|
||||
/* Read from input and write to the dict */
|
||||
void DICT_put(DICT_buffer * const buf, FL2_inBuffer * const input)
|
||||
{
|
||||
size_t const to_read = MIN(buf->size - buf->end, input->size - input->pos);
|
||||
|
||||
DEBUGLOG(5, "CStream : reading %u bytes", (U32)to_read);
|
||||
|
||||
memcpy(buf->data[buf->index] + buf->end, (BYTE*)input->src + input->pos, to_read);
|
||||
|
||||
input->pos += to_read;
|
||||
buf->end += to_read;
|
||||
}
|
||||
|
||||
size_t DICT_availSpace(const DICT_buffer * const buf)
|
||||
{
|
||||
return buf->size - buf->end;
|
||||
}
|
||||
|
||||
/* Get the size of uncompressed data. start is set to end after compression */
|
||||
int DICT_hasUnprocessed(const DICT_buffer * const buf)
|
||||
{
|
||||
return buf->start < buf->end;
|
||||
}
|
||||
|
||||
/* Get the buffer, overlap and end for compression */
|
||||
void DICT_getBlock(DICT_buffer * const buf, FL2_dataBlock * const block)
|
||||
{
|
||||
block->data = buf->data[buf->index];
|
||||
block->start = buf->start;
|
||||
block->end = buf->end;
|
||||
|
||||
#ifndef NO_XXHASH
|
||||
if (buf->xxh != NULL)
|
||||
XXH32_update(buf->xxh, buf->data[buf->index] + buf->start, buf->end - buf->start);
|
||||
#endif
|
||||
|
||||
buf->total += buf->end - buf->start;
|
||||
buf->start = buf->end;
|
||||
}
|
||||
|
||||
/* Shift occurs when all is processed and end is beyond the overlap size */
|
||||
int DICT_needShift(DICT_buffer * const buf)
|
||||
{
|
||||
if (buf->start < buf->end)
|
||||
return 0;
|
||||
/* Reset the dict if the next compression cycle would exceed the reset interval */
|
||||
size_t overlap = (buf->total + buf->size - buf->overlap > buf->reset_interval) ? 0 : buf->overlap;
|
||||
return buf->start == buf->end && (overlap == 0 || buf->end >= overlap + ALIGNMENT_SIZE);
|
||||
}
|
||||
|
||||
int DICT_async(const DICT_buffer * const buf)
|
||||
{
|
||||
return (int)buf->async;
|
||||
}
|
||||
|
||||
/* Shift the overlap amount to the start of either the only dict buffer or the alternate one
|
||||
* if it exists */
|
||||
void DICT_shift(DICT_buffer * const buf)
|
||||
{
|
||||
if (buf->start < buf->end)
|
||||
return;
|
||||
|
||||
size_t overlap = buf->overlap;
|
||||
/* Reset the dict if the next compression cycle would exceed the reset interval */
|
||||
if (buf->total + buf->size - buf->overlap > buf->reset_interval) {
|
||||
DEBUGLOG(4, "Resetting dictionary after %u bytes", (unsigned)buf->total);
|
||||
overlap = 0;
|
||||
}
|
||||
|
||||
if (overlap == 0) {
|
||||
/* No overlap means a simple buffer switch */
|
||||
buf->start = 0;
|
||||
buf->end = 0;
|
||||
buf->index ^= buf->async;
|
||||
buf->total = 0;
|
||||
}
|
||||
else if (buf->end >= overlap + ALIGNMENT_SIZE) {
|
||||
size_t const from = (buf->end - overlap) & ALIGNMENT_MASK;
|
||||
const BYTE *const src = buf->data[buf->index];
|
||||
/* Copy to the alternate if one exists */
|
||||
BYTE *const dst = buf->data[buf->index ^ buf->async];
|
||||
|
||||
overlap = buf->end - from;
|
||||
|
||||
if (overlap <= from || dst != src) {
|
||||
DEBUGLOG(5, "Copy overlap data : %u bytes from %u", (unsigned)overlap, (unsigned)from);
|
||||
memcpy(dst, src + from, overlap);
|
||||
}
|
||||
else if (from != 0) {
|
||||
DEBUGLOG(5, "Move overlap data : %u bytes from %u", (unsigned)overlap, (unsigned)from);
|
||||
memmove(dst, src + from, overlap);
|
||||
}
|
||||
/* New data will be written after the overlap */
|
||||
buf->start = overlap;
|
||||
buf->end = overlap;
|
||||
/* Switch buffers */
|
||||
buf->index ^= buf->async;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NO_XXHASH
|
||||
XXH32_hash_t DICT_getDigest(const DICT_buffer * const buf)
|
||||
{
|
||||
return XXH32_digest(buf->xxh);
|
||||
}
|
||||
#endif
|
||||
|
||||
size_t DICT_memUsage(const DICT_buffer * const buf)
|
||||
{
|
||||
return (1 + buf->async) * buf->size;
|
||||
}
|
||||
81
C/fast-lzma2/dict_buffer.h
Normal file
81
C/fast-lzma2/dict_buffer.h
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Conor McCarthy
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#include "fast-lzma2.h"
|
||||
#include "mem.h"
|
||||
#include "data_block.h"
|
||||
#ifndef NO_XXHASH
|
||||
# include "xxhash.h"
|
||||
#endif
|
||||
|
||||
#ifndef FL2_DICT_BUFFER_H_
|
||||
#define FL2_DICT_BUFFER_H_
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* DICT_buffer structure.
|
||||
* Maintains one or two dictionary buffers. In a dual dict configuration (asyc==1), when the
|
||||
* current buffer is full, the overlap region will be copied to the other buffer and it
|
||||
* becomes the destination for input while the first is compressed. This is useful when I/O
|
||||
* is much slower than compression. */
|
||||
typedef struct {
|
||||
BYTE* data[2];
|
||||
size_t index;
|
||||
size_t async;
|
||||
size_t overlap;
|
||||
size_t start; /* start = 0 (first block) or overlap */
|
||||
size_t end; /* never < overlap */
|
||||
size_t size; /* allocation size */
|
||||
size_t total; /* total size compressed after last dict reset */
|
||||
size_t reset_interval;
|
||||
#ifndef NO_XXHASH
|
||||
XXH32_state_t *xxh;
|
||||
#endif
|
||||
} DICT_buffer;
|
||||
|
||||
int DICT_construct(DICT_buffer *const buf, int const async);
|
||||
|
||||
int DICT_init(DICT_buffer *const buf, size_t const dict_size, size_t const overlap, unsigned const reset_multiplier, int const do_hash);
|
||||
|
||||
void DICT_destruct(DICT_buffer *const buf);
|
||||
|
||||
size_t DICT_size(const DICT_buffer *const buf);
|
||||
|
||||
size_t DICT_get(DICT_buffer *const buf, void **const dict);
|
||||
|
||||
int DICT_update(DICT_buffer *const buf, size_t const added_size);
|
||||
|
||||
void DICT_put(DICT_buffer *const buf, FL2_inBuffer* const input);
|
||||
|
||||
size_t DICT_availSpace(const DICT_buffer *const buf);
|
||||
|
||||
int DICT_hasUnprocessed(const DICT_buffer *const buf);
|
||||
|
||||
void DICT_getBlock(DICT_buffer *const buf, FL2_dataBlock *const block);
|
||||
|
||||
int DICT_needShift(DICT_buffer *const buf);
|
||||
|
||||
int DICT_async(const DICT_buffer *const buf);
|
||||
|
||||
void DICT_shift(DICT_buffer *const buf);
|
||||
|
||||
#ifndef NO_XXHASH
|
||||
XXH32_hash_t DICT_getDigest(const DICT_buffer *const buf);
|
||||
#endif
|
||||
|
||||
size_t DICT_memUsage(const DICT_buffer *const buf);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* FL2_DICT_BUFFER_H_ */
|
||||
@@ -53,9 +53,9 @@ Introduction
|
||||
*********************************************************************************************************/
|
||||
|
||||
/*------ Version ------*/
|
||||
#define FL2_VERSION_MAJOR 0
|
||||
#define FL2_VERSION_MINOR 9
|
||||
#define FL2_VERSION_RELEASE 2
|
||||
#define FL2_VERSION_MAJOR 1
|
||||
#define FL2_VERSION_MINOR 0
|
||||
#define FL2_VERSION_RELEASE 0
|
||||
|
||||
#define FL2_VERSION_NUMBER (FL2_VERSION_MAJOR *100*100 + FL2_VERSION_MINOR *100 + FL2_VERSION_RELEASE)
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void); /**< useful to check dll version */
|
||||
@@ -67,12 +67,13 @@ FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void); /**< useful to check
|
||||
FL2LIB_API const char* FL2LIB_CALL FL2_versionString(void);
|
||||
|
||||
|
||||
#define FL2_MAXTHREADS 200
|
||||
|
||||
|
||||
/***************************************
|
||||
* Simple API
|
||||
***************************************/
|
||||
|
||||
#define FL2_MAXTHREADS 200
|
||||
|
||||
/*! FL2_compress() :
|
||||
* Compresses `src` content as a single LZMA2 compressed stream into already allocated `dst`.
|
||||
* Call FL2_compressMt() to use > 1 thread. Specify nbThreads = 0 to use all cores.
|
||||
@@ -88,20 +89,30 @@ FL2LIB_API size_t FL2LIB_CALL FL2_compressMt(void* dst, size_t dstCapacity,
|
||||
unsigned nbThreads);
|
||||
|
||||
/*! FL2_decompress() :
|
||||
* `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
|
||||
* `dstCapacity` is an upper bound of originalSize to regenerate.
|
||||
* If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
|
||||
* Decompresses a single LZMA2 compressed stream from `src` into already allocated `dst`.
|
||||
* `compressedSize` : must be at least the size of the LZMA2 stream.
|
||||
* `dstCapacity` is the original, uncompressed size to regenerate, returned by calling
|
||||
* FL2_findDecompressedSize().
|
||||
* Call FL2_decompressMt() to use > 1 thread. Specify nbThreads = 0 to use all cores. The stream
|
||||
* must contain dictionary resets to use multiple threads. These are inserted during compression by
|
||||
* default. The frequency can be changed/disabled with the FL2_p_resetInterval parameter setting.
|
||||
* @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
|
||||
* or an errorCode if it fails (which can be tested using FL2_isError()). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_decompress(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t compressedSize);
|
||||
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_decompressMt(void* dst, size_t dstCapacity,
|
||||
const void* src, size_t compressedSize,
|
||||
unsigned nbThreads);
|
||||
|
||||
/*! FL2_findDecompressedSize()
|
||||
* `src` should point to the start of a LZMA2 encoded stream.
|
||||
* `srcSize` must be at least as large as the LZMA2 stream including end marker.
|
||||
* A property byte is assumed to exist at position 0 in `src`. If the stream was created without one,
|
||||
* subtract 1 byte from `src` when passing it to the function.
|
||||
* @return : - decompressed size of the stream in `src`, if known
|
||||
* - FL2_CONTENTSIZE_ERROR if an error occurred (e.g. corruption, srcSize too small)
|
||||
* note 1 : a 0 return value means the frame is valid but "empty".
|
||||
* note 1 : a 0 return value means the stream is valid but "empty".
|
||||
* note 2 : decompressed size can be very large (64-bits value),
|
||||
* potentially larger than what local system can handle as a single memory segment.
|
||||
* In which case, it's necessary to use streaming mode to decompress data.
|
||||
@@ -109,122 +120,80 @@ FL2LIB_API size_t FL2LIB_CALL FL2_decompress(void* dst, size_t dstCapacity,
|
||||
* Always ensure return value fits within application's authorized limits.
|
||||
* Each application can set its own limits. */
|
||||
#define FL2_CONTENTSIZE_ERROR (size_t)-1
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_findDecompressedSize(const void *src, size_t srcSize);
|
||||
FL2LIB_API unsigned long long FL2LIB_CALL FL2_findDecompressedSize(const void *src, size_t srcSize);
|
||||
|
||||
|
||||
/*====== Helper functions ======*/
|
||||
#define FL2_COMPRESSBOUND(srcSize) ((srcSize) + (((srcSize) + 0xFFF) / 0x1000) * 3 + 6) /* this formula calculates the maximum size of data stored in uncompressed chunks */
|
||||
#define FL2_COMPRESSBOUND(srcSize) ((srcSize) + (((srcSize) + 0xFFF) / 0x1000) * 3 + 6) /*!< calculates the maximum size of data stored in a sequence of uncompressed chunks */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_isTimedOut(size_t code); /*!< tells if a `size_t` function result is the timeout code */
|
||||
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code); /*!< provides readable string from an error code */
|
||||
FL2LIB_API int FL2LIB_CALL FL2_maxCLevel(void); /*!< maximum compression level available */
|
||||
FL2LIB_API int FL2LIB_CALL FL2_maxHighCLevel(void); /*!< maximum compression level available in high mode */
|
||||
|
||||
|
||||
/***************************************
|
||||
* Explicit memory management
|
||||
***************************************/
|
||||
|
||||
/*= Compression context
|
||||
* When compressing many times,
|
||||
* it is recommended to allocate a context just once, and re-use it for each successive compression operation.
|
||||
* This will make workload friendlier for system's memory.
|
||||
* The context may not use the number of threads requested if the library is compiled for single-threaded
|
||||
* compression or nbThreads > FL2_MAXTHREADS. Call FL2_CCtx_nbThreads to obtain the actual number. */
|
||||
* When compressing many times, it is recommended to allocate a context just once,
|
||||
* and re-use it for each successive compression operation. This will make workload
|
||||
* friendlier for system's memory. The context may not use the number of threads requested
|
||||
* if the library is compiled for single-threaded compression or nbThreads > FL2_MAXTHREADS.
|
||||
* Call FL2_getCCtxThreadCount to obtain the actual number allocated. */
|
||||
typedef struct FL2_CCtx_s FL2_CCtx;
|
||||
FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtx(void);
|
||||
FL2LIB_API FL2_CCtx* FL2LIB_CALL FL2_createCCtxMt(unsigned nbThreads);
|
||||
FL2LIB_API void FL2LIB_CALL FL2_freeCCtx(FL2_CCtx* cctx);
|
||||
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_CCtx_nbThreads(const FL2_CCtx* ctx);
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_getCCtxThreadCount(const FL2_CCtx* cctx);
|
||||
|
||||
/*! FL2_compressCCtx() :
|
||||
* Same as FL2_compress(), requires an allocated FL2_CCtx (see FL2_createCCtx()). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtx(FL2_CCtx* ctx,
|
||||
* Same as FL2_compress(), but requires an allocated FL2_CCtx (see FL2_createCCtx()). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtx(FL2_CCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize,
|
||||
int compressionLevel);
|
||||
|
||||
/************************************************
|
||||
* Caller-managed data buffer and overlap section
|
||||
************************************************/
|
||||
|
||||
typedef struct {
|
||||
unsigned char *data;
|
||||
size_t start; /* start = 0 (first block) or overlap */
|
||||
size_t end; /* never < overlap */
|
||||
size_t bufSize; /* allocation size */
|
||||
} FL2_blockBuffer;
|
||||
|
||||
typedef int (FL2LIB_CALL *FL2_progressFn)(size_t done, void* opaque);
|
||||
|
||||
/* Get the size of the overlap section. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_blockOverlap(const FL2_CCtx* ctx);
|
||||
|
||||
/* Copy the overlap section to the start to prepare for more data */
|
||||
FL2LIB_API void FL2LIB_CALL FL2_shiftBlock(FL2_CCtx* ctx, FL2_blockBuffer *block);
|
||||
/* Copy the overlap to a different buffer. This allows a dual-buffer configuration where
|
||||
* data is read into one block while the other is compressed. */
|
||||
FL2LIB_API void FL2LIB_CALL FL2_shiftBlock_switch(FL2_CCtx* ctx, FL2_blockBuffer *block, unsigned char *dst);
|
||||
|
||||
FL2LIB_API void FL2LIB_CALL FL2_beginFrame(FL2_CCtx* const cctx);
|
||||
|
||||
/*! FL2_compressCCtxBlock() :
|
||||
* Same as FL2_compressCCtx except the caller is responsible for supplying an overlap section.
|
||||
* The FL2_p_overlapFraction parameter will not be used.
|
||||
* srcStart + srcSize should equal the dictionary size except on the last call.
|
||||
* Can be called multiple times. FL2_endFrame() must be called when finished.
|
||||
* For compatibility with this library the caller must write a property byte at
|
||||
* the beginning of the output. Obtain it by calling FL2_dictSizeProp() before
|
||||
* compressing the first block or after the last. No hash will be written, but
|
||||
* the caller can calculate it using the interface in xxhash.h, write it at the end,
|
||||
* and set bit 7 in the property byte. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock(FL2_CCtx* ctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const FL2_blockBuffer *block,
|
||||
FL2_progressFn progress, void* opaque);
|
||||
|
||||
/*! FL2_endFrame() :
|
||||
* Write the end marker to terminate the LZMA2 stream.
|
||||
* Must be called after compressing with FL2_compressCCtxBlock() */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_endFrame(FL2_CCtx* ctx,
|
||||
void* dst, size_t dstCapacity);
|
||||
|
||||
typedef int (FL2LIB_CALL *FL2_writerFn)(const void* src, size_t srcSize, void* opaque);
|
||||
|
||||
/*! FL2_compressCCtxBlock_toFn() :
|
||||
* Same as FL2_compressCCtx except the caller is responsible for supplying an
|
||||
* overlap section, and compressed data is written to a callback function.
|
||||
* The FL2_p_overlapFraction parameter will not be used.
|
||||
* Can be called multiple times. FL2_endFrame_toFn() must be called when finished. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_compressCCtxBlock_toFn(FL2_CCtx* ctx,
|
||||
FL2_writerFn writeFn, void* opaque,
|
||||
const FL2_blockBuffer *block,
|
||||
FL2_progressFn progress);
|
||||
|
||||
/*! FL2_endFrame() :
|
||||
* Write the end marker to a callback function to terminate the LZMA2 stream.
|
||||
* Must be called after compressing with FL2_compressCCtxBlock_toFn() */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_endFrame_toFn(FL2_CCtx* ctx,
|
||||
FL2_writerFn writeFn, void* opaque);
|
||||
|
||||
/*! FL2_dictSizeProp() :
|
||||
/*! FL2_getCCtxDictProp() :
|
||||
* Get the dictionary size property.
|
||||
* Intended for use with the FL2_p_omitProperties parameter for creating a
|
||||
* 7-zip compatible LZMA2 stream. */
|
||||
FL2LIB_API unsigned char FL2LIB_CALL FL2_dictSizeProp(FL2_CCtx* ctx);
|
||||
* 7-zip or XZ compatible LZMA2 stream. */
|
||||
FL2LIB_API unsigned char FL2LIB_CALL FL2_getCCtxDictProp(FL2_CCtx* cctx);
|
||||
|
||||
|
||||
/****************************
|
||||
* Decompression
|
||||
****************************/
|
||||
|
||||
/*= Decompression context
|
||||
* When decompressing many times,
|
||||
* it is recommended to allocate a context only once,
|
||||
* and re-use it for each successive compression operation.
|
||||
* This will make the workload friendlier for the system's memory.
|
||||
* Use one context per thread for parallel execution. */
|
||||
typedef struct CLzma2Dec_s FL2_DCtx;
|
||||
* When decompressing many times, it is recommended to allocate a context only once,
|
||||
* and re-use it for each successive decompression operation. This will make the workload
|
||||
* friendlier for the system's memory.
|
||||
* The context may not allocate the number of threads requested if the library is
|
||||
* compiled for single-threaded compression or nbThreads > FL2_MAXTHREADS.
|
||||
* Call FL2_getDCtxThreadCount to obtain the actual number allocated.
|
||||
* At least nbThreads dictionary resets must exist in the stream to use all of the
|
||||
* threads. Dictionary resets are inserted into the stream according to the
|
||||
* FL2_p_resetInterval parameter used in the compression context. */
|
||||
typedef struct FL2_DCtx_s FL2_DCtx;
|
||||
FL2LIB_API FL2_DCtx* FL2LIB_CALL FL2_createDCtx(void);
|
||||
FL2LIB_API FL2_DCtx* FL2LIB_CALL FL2_createDCtxMt(unsigned nbThreads);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_freeDCtx(FL2_DCtx* dctx);
|
||||
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_getDCtxThreadCount(const FL2_DCtx* dctx);
|
||||
|
||||
|
||||
/*! FL2_initDCtx() :
|
||||
* Use only when a property byte is not present at input byte 0. No init is necessary otherwise.
|
||||
* The caller must store the result from FL2_getCCtxDictProp() and pass it to this function. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_initDCtx(FL2_DCtx* dctx, unsigned char prop);
|
||||
|
||||
/*! FL2_decompressDCtx() :
|
||||
* Same as FL2_decompress(), requires an allocated FL2_DCtx (see FL2_createDCtx()) */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_decompressDCtx(FL2_DCtx* ctx,
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_decompressDCtx(FL2_DCtx* cctx,
|
||||
void* dst, size_t dstCapacity,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
@@ -232,90 +201,180 @@ FL2LIB_API size_t FL2LIB_CALL FL2_decompressDCtx(FL2_DCtx* ctx,
|
||||
* Streaming
|
||||
****************************/
|
||||
|
||||
typedef struct FL2_inBuffer_s {
|
||||
typedef struct {
|
||||
const void* src; /**< start of input buffer */
|
||||
size_t size; /**< size of input buffer */
|
||||
size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
|
||||
} FL2_inBuffer;
|
||||
|
||||
typedef struct FL2_outBuffer_s {
|
||||
typedef struct {
|
||||
void* dst; /**< start of output buffer */
|
||||
size_t size; /**< size of output buffer */
|
||||
size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
|
||||
} FL2_outBuffer;
|
||||
|
||||
/*** Push/pull structs ***/
|
||||
|
||||
typedef struct {
|
||||
void* dst; /**< start of available dict buffer */
|
||||
unsigned long size; /**< size of dict remaining */
|
||||
} FL2_dictBuffer;
|
||||
|
||||
typedef struct {
|
||||
const void* src; /**< start of compressed data */
|
||||
size_t size; /**< size of compressed data */
|
||||
} FL2_cBuffer;
|
||||
|
||||
/*-***********************************************************************
|
||||
* Streaming compression - HowTo
|
||||
* Streaming compression
|
||||
*
|
||||
* A FL2_CStream object is required to track streaming operation.
|
||||
* Use FL2_createCStream() and FL2_freeCStream() to create/release resources.
|
||||
* FL2_CStream objects can be reused multiple times on consecutive compression operations.
|
||||
* It is recommended to re-use FL2_CStream in situations where many streaming operations will be achieved consecutively,
|
||||
* since it will play nicer with system's memory, by re-using already allocated memory.
|
||||
* It is recommended to re-use FL2_CStream in situations where many streaming operations will be done
|
||||
* consecutively, since it will reduce allocation and initialization time.
|
||||
*
|
||||
* Start a new compression by initializing FL2_CStream.
|
||||
* Use FL2_initCStream() to start a new compression operation.
|
||||
* Call FL2_createCStreamMt() with a nonzero dualBuffer parameter to use two input dictionary buffers.
|
||||
* The stream will not block on FL2_compressStream() and continues to accept data while compression is
|
||||
* underway, until both buffers are full. Useful when I/O is slow.
|
||||
* To compress with a single thread with dual buffering, call FL2_createCStreamMt with nbThreads=1.
|
||||
*
|
||||
* Use FL2_initCStream() on the FL2_CStream object to start a new compression operation.
|
||||
*
|
||||
* Use FL2_compressStream() repetitively to consume input stream.
|
||||
* The function will automatically update both `pos` fields.
|
||||
* It will always consume the entire input unless an error occurs,
|
||||
* The function will automatically update the `pos` field.
|
||||
* It will always consume the entire input unless an error occurs or the dictionary buffer is filled,
|
||||
* unlike the decompression function.
|
||||
* @return : a size hint - remaining capacity to fill before compression occurs,
|
||||
* or an error code, which can be tested using FL2_isError().
|
||||
* Note : it's just a hint, any other value will work fine.
|
||||
*
|
||||
* At any moment, it's possible, but not recommended, to flush whatever data remains
|
||||
* within internal buffer using FL2_flushStream().
|
||||
* `output->pos` will be updated.
|
||||
* Note 1 : this will reduce compression ratio because the algorithm is block-based.
|
||||
* Note 2 : some content might still be left within internal buffers if `output->size` is too small.
|
||||
* @return : nb of bytes still present within internal buffers (0 if they're empty)
|
||||
* or an error code, which can be tested using FL2_isError().
|
||||
* The radix match finder allows compressed data to be stored in its match table during encoding.
|
||||
* Applications may call streaming compression functions with output == NULL. In this case,
|
||||
* when the function returns 1, the compressed data must be read from the internal buffers.
|
||||
* Call FL2_getNextCStreamBuffer() repeatedly until it returns 0.
|
||||
* Each call returns buffer information in the FL2_inBuffer parameter. Applications typically will
|
||||
* passed this to an I/O write function or downstream filter.
|
||||
* Alternately, applications may pass an FL2_outBuffer object pointer to receive the output. In this
|
||||
* case the return value is 1 if the buffer is full and more compressed data remains.
|
||||
*
|
||||
* FL2_endStream() instructs to finish a frame.
|
||||
* It will perform a flush and write the LZMA2 termination byte (required).
|
||||
* FL2_endStream() may not be able to flush full data if `output->size` is too small.
|
||||
* In which case, call again FL2_endStream() to complete the flush.
|
||||
* @return : 0 if stream fully completed and flushed,
|
||||
* or >0 to indicate the nb of bytes still present within the internal buffers,
|
||||
* or an error code, which can be tested using FL2_isError().
|
||||
* FL2_endStream() instructs to finish a stream. It will perform a flush and write the LZMA2
|
||||
* termination byte (required). Call FL2_endStream() repeatedly until it returns 0.
|
||||
*
|
||||
* Most functions may return a size_t error code, which can be tested using FL2_isError().
|
||||
*
|
||||
* *******************************************************************/
|
||||
|
||||
typedef struct FL2_CStream_s FL2_CStream;
|
||||
typedef struct FL2_CCtx_s FL2_CStream;
|
||||
|
||||
/*===== FL2_CStream management functions =====*/
|
||||
FL2LIB_API FL2_CStream* FL2LIB_CALL FL2_createCStream(void);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_freeCStream(FL2_CStream* fcs);
|
||||
FL2LIB_API FL2_CStream* FL2LIB_CALL FL2_createCStreamMt(unsigned nbThreads, int dualBuffer);
|
||||
FL2LIB_API void FL2LIB_CALL FL2_freeCStream(FL2_CStream * fcs);
|
||||
|
||||
/*===== Streaming compression functions =====*/
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_initCStream(FL2_CStream* fcs, int compressionLevel);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_compressStream(FL2_CStream* fcs, FL2_outBuffer* output, FL2_inBuffer* input);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_flushStream(FL2_CStream* fcs, FL2_outBuffer* output);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_endStream(FL2_CStream* fcs, FL2_outBuffer* output);
|
||||
|
||||
/*! FL2_initCStream() :
|
||||
* Call this function before beginning a new compressed data stream. To keep the stream object's
|
||||
* current parameters, specify zero for the compression level. The object is set to the default
|
||||
* level upon creation. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_initCStream(FL2_CStream* fcs, int compressionLevel);
|
||||
|
||||
/*! FL2_setCStreamTimeout() :
|
||||
* Sets a timeout in milliseconds. Zero disables the timeout (default). If a nonzero timout is set, functions
|
||||
* FL2_compressStream(), FL2_updateDictionary(), FL2_getNextCStreamBuffer(), FL2_flushStream(), and
|
||||
* FL2_endStream() may return a timeout code before compression of the current dictionary of data
|
||||
* completes. FL2_isError() returns true for the timeout code, so check the code with FL2_isTimedOut() before
|
||||
* testing for errors. With the exception of FL2_updateDictionary(), the above functions may be called again
|
||||
* to wait for completion. A typical application for timeouts is to update the user on compression progress. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_setCStreamTimeout(FL2_CStream * fcs, unsigned timeout);
|
||||
|
||||
/*! FL2_compressStream() :
|
||||
* Reads data from input into the dictionary buffer. Compression will begin if the buffer fills up.
|
||||
* A dual buffering stream will fill the second buffer while compression proceeds on the first.
|
||||
* A call to FL2_compressStream() will wait for ongoing compression to complete if all dictionary space
|
||||
* is filled. FL2_compressStream() must not be called with output == NULL unless the caller has read all
|
||||
* compressed data from the CStream object.
|
||||
* Returns 1 to indicate compressed data must be read (or output is full), or 0 otherwise. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_compressStream(FL2_CStream* fcs, FL2_outBuffer *output, FL2_inBuffer* input);
|
||||
|
||||
/*** Push/pull functions ***/
|
||||
|
||||
/*! FL2_getDictionaryBuffer() :
|
||||
* Returns a buffer in the FL2_outBuffer object, which the caller can directly read data into.
|
||||
* Applications will normally pass this buffer to an I/O read function or upstream filter.
|
||||
* Returns 0, or an error or timeout code. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_getDictionaryBuffer(FL2_CStream* fcs, FL2_dictBuffer* dict);
|
||||
|
||||
/*! FL2_updateDictionary() :
|
||||
* Informs the CStream how much data was added to the buffer. Compression begins if the dictionary
|
||||
* was filled. Returns 1 to indicate compressed data must be read, 0 if not, or an error code. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_updateDictionary(FL2_CStream* fcs, size_t addedSize);
|
||||
|
||||
/*! FL2_getNextCStreamBuffer() :
|
||||
* Returns a buffer containing a slice of the compressed data. Call this function and process the data
|
||||
* until the function returns zero. In most cases it will return a buffer for each compression thread
|
||||
* used. It is sometimes less but never more than nbThreads. If asynchronous compression is in progress,
|
||||
* this function will wait for completion before returning, or it will return the timeout code. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_getNextCStreamBuffer(FL2_CStream* fcs, FL2_cBuffer* cbuf);
|
||||
|
||||
/******/
|
||||
|
||||
/*! FL2_getCStreamProgress() :
|
||||
* Returns the number of bytes processed since the stream was initialized. This is a synthetic
|
||||
* estimate because the match finder does not proceed sequentially through the data. If
|
||||
* outputSize is not NULL, returns the number of bytes of compressed data generated. */
|
||||
FL2LIB_API unsigned long long FL2LIB_CALL FL2_getCStreamProgress(const FL2_CStream * fcs, unsigned long long *outputSize);
|
||||
|
||||
/*! FL2_waitCStream() :
|
||||
* Waits for compression to end. This function returns after the timeout set using
|
||||
* FL2_setCStreamTimeout has elapsed. Unnecessary when no timeout is set.
|
||||
* Returns 1 if compressed output is available, 0 if not, or the timeout code. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_waitCStream(FL2_CStream * fcs);
|
||||
|
||||
/*! FL2_cancelCStream() :
|
||||
* Cancels any compression operation underway. Useful only when dual buffering and/or timeouts
|
||||
* are enabled. The stream will be returned to an uninitialized state. */
|
||||
FL2LIB_API void FL2LIB_CALL FL2_cancelCStream(FL2_CStream *fcs);
|
||||
|
||||
/*! FL2_remainingOutputSize() :
|
||||
* The amount of compressed data remaining to be read from the CStream object. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_remainingOutputSize(const FL2_CStream* fcs);
|
||||
|
||||
/*! FL2_flushStream() :
|
||||
* Compress all data remaining in the dictionary buffer(s). It may be necessary to call
|
||||
* FL2_flushStream() more than once. If output == NULL the compressed data must be read from the
|
||||
* CStream object after each call.
|
||||
* Flushing is not normally useful and produces larger output.
|
||||
* Returns 1 if input or output still exists in the CStream object, 0 if complete, or an error code. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_flushStream(FL2_CStream* fcs, FL2_outBuffer *output);
|
||||
|
||||
/*! FL2_endStream() :
|
||||
* Compress all data remaining in the dictionary buffer(s) and write the stream end marker. It may
|
||||
* be necessary to call FL2_endStream() more than once. If output == NULL the compressed data must
|
||||
* be read from the CStream object after each call.
|
||||
* Returns 0 when compression is complete and all output has been flushed, 1 if not complete, or
|
||||
* an error code. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_endStream(FL2_CStream* fcs, FL2_outBuffer *output);
|
||||
|
||||
/*-***************************************************************************
|
||||
* Streaming decompression - HowTo
|
||||
* Streaming decompression
|
||||
*
|
||||
* A FL2_DStream object is required to track streaming operations.
|
||||
* Use FL2_createDStream() and FL2_freeDStream() to create/release resources.
|
||||
* FL2_DStream objects can be re-used multiple times.
|
||||
*
|
||||
* Use FL2_initDStream() to start a new decompression operation.
|
||||
* @return : recommended first input size
|
||||
* @return : zero or an error code
|
||||
*
|
||||
* Use FL2_decompressStream() repetitively to consume your input.
|
||||
* The function will update both `pos` fields.
|
||||
* If `input.pos < input.size`, some input has not been consumed.
|
||||
* It's up to the caller to present again remaining data.
|
||||
* More data must be loaded if `input.pos + LZMA_REQUIRED_INPUT_MAX >= input.size`
|
||||
* It's up to the caller to present again the remaining data.
|
||||
* More data must be loaded if `input.pos + LZMA_REQUIRED_INPUT_MAX >= input.size`. In this case,
|
||||
* move the remaining input (<= LZMA_REQUIRED_INPUT_MAX bytes) to the start of the buffer and
|
||||
* load new data after it.
|
||||
* If `output.pos < output.size`, decoder has flushed everything it could.
|
||||
* @return : 0 when a frame is completely decoded and fully flushed,
|
||||
* an error code, which can be tested using FL2_isError(),
|
||||
* 1, which means there is still some decoding to do to complete current frame.
|
||||
* @return : 0 when a stream is completely decoded and fully flushed,
|
||||
* 1, which means there is still some decoding to do to complete the stream,
|
||||
* or an error code, which can be tested using FL2_isError().
|
||||
* *******************************************************************************/
|
||||
|
||||
#define LZMA_REQUIRED_INPUT_MAX 20
|
||||
@@ -324,101 +383,187 @@ typedef struct FL2_DStream_s FL2_DStream;
|
||||
|
||||
/*===== FL2_DStream management functions =====*/
|
||||
FL2LIB_API FL2_DStream* FL2LIB_CALL FL2_createDStream(void);
|
||||
FL2LIB_API FL2_DStream* FL2LIB_CALL FL2_createDStreamMt(unsigned nbThreads);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_freeDStream(FL2_DStream* fds);
|
||||
|
||||
/*! FL2_setDStreamMemoryLimitMt() :
|
||||
* Set a total size limit for multithreaded decoder input and output buffers. MT decoder memory
|
||||
* usage is unknown until the input is parsed. If the limit is exceeded, the decoder switches to
|
||||
* using a single thread.
|
||||
* MT decoding memory usage is typically dictionary_size * 4 * nbThreads for the output
|
||||
* buffers plus the size of the compressed input for that amount of output. */
|
||||
FL2LIB_API void FL2LIB_CALL FL2_setDStreamMemoryLimitMt(FL2_DStream* fds, size_t limit);
|
||||
|
||||
/*! FL2_setDStreamTimeout() :
|
||||
* Sets a timeout in milliseconds. Zero disables the timeout. If a nonzero timout is set,
|
||||
* FL2_decompressStream() may return a timeout code before decompression of the available data
|
||||
* completes. FL2_isError() returns true for the timeout code, so check the code with FL2_isTimedOut()
|
||||
* before testing for errors. After a timeout occurs, do not call FL2_decompressStream() again unless
|
||||
* a call to FL2_waitDStream() returns 1. A typical application for timeouts is to update the user on
|
||||
* decompression progress. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_setDStreamTimeout(FL2_DStream * fds, unsigned timeout);
|
||||
|
||||
/*! FL2_waitDStream() :
|
||||
* Waits for decompression to end after a timeout has occurred. This function returns after the
|
||||
* timeout set using FL2_setDStreamTimeout() has elapsed, or when decompression of available input is
|
||||
* complete. Unnecessary when no timeout is set.
|
||||
* Returns 0 if the stream is complete, 1 if not complete, or an error code. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_waitDStream(FL2_DStream * fds);
|
||||
|
||||
/*! FL2_cancelDStream() :
|
||||
* Frees memory allocated for MT decoding. If a timeout is set and the caller is waiting
|
||||
* for completion of MT decoding, decompression in progress will be canceled. */
|
||||
FL2LIB_API void FL2LIB_CALL FL2_cancelDStream(FL2_DStream *fds);
|
||||
|
||||
/*! FL2_getDStreamProgress() :
|
||||
* Returns the number of bytes decoded since the stream was initialized. */
|
||||
FL2LIB_API unsigned long long FL2LIB_CALL FL2_getDStreamProgress(const FL2_DStream * fds);
|
||||
|
||||
/*===== Streaming decompression functions =====*/
|
||||
|
||||
/*! FL2_initDStream() :
|
||||
* Call this function before decompressing a stream. FL2_initDStream_withProp()
|
||||
* must be used for streams which do not include a property byte at position zero.
|
||||
* The caller is responsible for storing and passing the property byte.
|
||||
* Returns 0 if okay, or an error if the stream object is still in use from a
|
||||
* previous call to FL2_decompressStream() (see timeout info above). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_initDStream(FL2_DStream* fds);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_initDStream_withProp(FL2_DStream* fds, unsigned char prop);
|
||||
|
||||
/*! FL2_decompressStream() :
|
||||
* Reads data from input and decompresses to output.
|
||||
* Returns 1 if the stream is unfinished, 0 if the terminator was encountered (he'll be back)
|
||||
* and all data was written to output, or an error code. Call this function repeatedly if
|
||||
* necessary, removing data from output and/or loading data into input before each call.
|
||||
* Note the requirement for LZMA_REQUIRED_INPUT_MAX bytes of input if the input data is
|
||||
* incomplete (see intro above). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_decompressStream(FL2_DStream* fds, FL2_outBuffer* output, FL2_inBuffer* input);
|
||||
|
||||
/*-***************************************************************************
|
||||
* Compression parameters - HowTo
|
||||
* Compression parameters
|
||||
*
|
||||
* Any function that takes a 'compressionLevel' parameter will replace any
|
||||
* parameters affected by compression level that are already set.
|
||||
* Call FL2_CCtx_setParameter with FL2_p_compressionLevel to set the level,
|
||||
* then call FL2_CCtx_setParameter again with any other settings to change.
|
||||
* Specify compressionLevel=0 when calling a compression function.
|
||||
* To use a preset level and modify it, call FL2_CCtx_setParameter with
|
||||
* FL2_p_compressionLevel to set the level, then call FL2_CCtx_setParameter again
|
||||
* with any other settings to change.
|
||||
* Specify a compressionLevel of 0 when calling a compression function to keep
|
||||
* the current parameters.
|
||||
* *******************************************************************************/
|
||||
|
||||
#define FL2_DICTLOG_MIN 20
|
||||
#define FL2_DICTLOG_MAX_32 27
|
||||
#define FL2_DICTLOG_MAX_64 30
|
||||
#define FL2_DICTLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? FL2_DICTLOG_MAX_32 : FL2_DICTLOG_MAX_64))
|
||||
#define FL2_DICTLOG_MIN 20
|
||||
#define FL2_CHAINLOG_MAX 14
|
||||
#define FL2_CHAINLOG_MIN 4
|
||||
#define FL2_SEARCHLOG_MAX (FL2_CHAINLOG_MAX-1)
|
||||
#define FL2_SEARCHLOG_MIN 0
|
||||
#define FL2_FASTLENGTH_MIN 6 /* only used by optimizer */
|
||||
#define FL2_FASTLENGTH_MAX 273 /* only used by optimizer */
|
||||
#define FL2_DICTSIZE_MAX (1U << FL2_DICTLOG_MAX)
|
||||
#define FL2_DICTSIZE_MIN (1U << FL2_DICTLOG_MIN)
|
||||
#define FL2_BLOCK_OVERLAP_MIN 0
|
||||
#define FL2_BLOCK_OVERLAP_MAX 14
|
||||
#define FL2_BLOCK_LOG_MIN 12
|
||||
#define FL2_BLOCK_LOG_MAX 32
|
||||
#define FL2_RESET_INTERVAL_MIN 1
|
||||
#define FL2_RESET_INTERVAL_MAX 16 /* small enough to fit FL2_DICTSIZE_MAX * FL2_RESET_INTERVAL_MAX in 32-bit size_t */
|
||||
#define FL2_BUFFER_SIZE_LOG_MIN 0
|
||||
#define FL2_BUFFER_SIZE_LOG_MAX 6
|
||||
#define FL2_CHAINLOG_MIN 4
|
||||
#define FL2_CHAINLOG_MAX 14
|
||||
#define FL2_HYBRIDCYCLES_MIN 1
|
||||
#define FL2_HYBRIDCYCLES_MAX 64
|
||||
#define FL2_SEARCH_DEPTH_MIN 6
|
||||
#define FL2_SEARCH_DEPTH_MAX 254
|
||||
#define FL2_BUFFER_SIZE_LOG_MIN 6
|
||||
#define FL2_BUFFER_SIZE_LOG_MAX 12
|
||||
#define FL2_FASTLENGTH_MIN 6 /* only used by optimizer */
|
||||
#define FL2_FASTLENGTH_MAX 273 /* only used by optimizer */
|
||||
#define FL2_LC_MIN 0
|
||||
#define FL2_LC_MAX 4
|
||||
#define FL2_LP_MIN 0
|
||||
#define FL2_LP_MAX 4
|
||||
#define FL2_PB_MIN 0
|
||||
#define FL2_PB_MAX 4
|
||||
#define FL2_LCLP_MAX 4
|
||||
|
||||
typedef enum {
|
||||
FL2_fast,
|
||||
FL2_opt,
|
||||
FL2_ultra
|
||||
} FL2_strategy;
|
||||
|
||||
typedef struct {
|
||||
size_t dictionarySize; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory per byte, slower */
|
||||
unsigned overlapFraction; /* overlap between consecutive blocks in 1/16 units: larger == more compression, slower */
|
||||
unsigned chainLog; /* HC3 sliding window : larger == more compression, slower; hybrid mode only (ultra) */
|
||||
unsigned cyclesLog; /* nb of searches : larger == more compression, slower; hybrid mode only (ultra) */
|
||||
unsigned searchDepth; /* maximum depth for resolving string matches : larger == more compression, slower */
|
||||
unsigned fastLength; /* acceptable match size for parser : larger == more compression, slower; fast bytes parameter from 7-zip */
|
||||
unsigned divideAndConquer; /* split long chains of 2-byte matches into shorter chains with a small overlap : faster, somewhat less compression; enabled by default */
|
||||
unsigned bufferLog; /* buffer size for processing match chains is (dictionarySize >> (12 - bufferLog)) : affects compression when divideAndConquer enabled; */
|
||||
/* when divideAndConquer disabled, affects speed in a hardware-dependent manner */
|
||||
FL2_strategy strategy; /* encoder strategy : fast, optimized or ultra (hybrid) */
|
||||
} FL2_compressionParameters;
|
||||
|
||||
typedef enum {
|
||||
/* compression parameters */
|
||||
FL2_p_compressionLevel, /* Update all compression parameters according to pre-defined cLevel table
|
||||
* Default level is FL2_CLEVEL_DEFAULT==9.
|
||||
* Setting FL2_p_highCompression to 1 switches to an alternate cLevel table.
|
||||
* Special: value 0 means "do not change cLevel". */
|
||||
* Default level is FL2_CLEVEL_DEFAULT==6.
|
||||
* Setting FL2_p_highCompression to 1 switches to an alternate cLevel table. */
|
||||
FL2_p_highCompression, /* Maximize compression ratio for a given dictionary size.
|
||||
* Has 9 levels instead of 12, with dictionaryLog 20 - 28. */
|
||||
FL2_p_7zLevel, /* For use by the 7-zip fork employing this library. 1 - 9 */
|
||||
* Levels 1..10 = dictionaryLog 20..29 (1 Mb..512 Mb).
|
||||
* Typically provides a poor speed/ratio tradeoff. */
|
||||
FL2_p_dictionaryLog, /* Maximum allowed back-reference distance, expressed as power of 2.
|
||||
* Must be clamped between FL2_DICTLOG_MIN and FL2_DICTLOG_MAX.
|
||||
* Special: value 0 means "do not change dictionaryLog". */
|
||||
* Default = 24 */
|
||||
FL2_p_dictionarySize, /* Same as above but expressed as an absolute value.
|
||||
* Must be clamped between FL2_DICTSIZE_MIN and FL2_DICTSIZE_MAX.
|
||||
* Default = 16 Mb */
|
||||
FL2_p_overlapFraction, /* The radix match finder is block-based, so some overlap is retained from
|
||||
* each block to improve compression of the next. This value is expressed
|
||||
* as n / 16 of the block size (dictionary size). Larger values are slower.
|
||||
* Values above 2 mostly yield only a small improvement in compression. */
|
||||
FL2_p_blockSize,
|
||||
* Values above 2 mostly yield only a small improvement in compression.
|
||||
* A large value for a small dictionary may worsen multithreaded compression.
|
||||
* Default = 2 */
|
||||
FL2_p_resetInterval, /* For multithreaded decompression. A dictionary reset will occur
|
||||
* after each dictionarySize * resetInterval bytes of input.
|
||||
* Default = 4 */
|
||||
FL2_p_bufferLog, /* Buffering speeds up the matchfinder. Buffer size is
|
||||
* 2 ^ (dictionaryLog - bufferLog). Lower number = slower, better compression,
|
||||
* higher memory usage. */
|
||||
FL2_p_chainLog, /* Size of the full-search table, as a power of 2.
|
||||
* Resulting table size is (1 << (chainLog+2)).
|
||||
* (dictionarySize >> (12 - bufferLog)) * 12 bytes. Higher number = slower,
|
||||
* better compression, higher memory usage. A CPU with a large memory cache
|
||||
* may make effective use of a larger buffer.
|
||||
* Default = 4 */
|
||||
FL2_p_hybridChainLog, /* Size of the hybrid mode HC3 hash chain, as a power of 2.
|
||||
* Resulting table size is (1 << (chainLog+2)) bytes.
|
||||
* Larger tables result in better and slower compression.
|
||||
* This parameter is useless when using "fast" strategy.
|
||||
* Special: value 0 means "do not change chainLog". */
|
||||
FL2_p_searchLog, /* Number of search attempts, as a power of 2, made by the HC3 match finder
|
||||
* used only in hybrid mode.
|
||||
* This parameter is only used by the hybrid "ultra" strategy.
|
||||
* Default = 9 */
|
||||
FL2_p_hybridCycles, /* Number of search attempts made by the HC3 match finder.
|
||||
* Used only by the hybrid "ultra" strategy.
|
||||
* More attempts result in slightly better and slower compression.
|
||||
* This parameter is not used by the "fast" and "optimize" strategies.
|
||||
* Special: value 0 means "do not change searchLog". */
|
||||
FL2_p_literalCtxBits, /* lc value for LZMA2 encoder */
|
||||
FL2_p_literalPosBits, /* lp value for LZMA2 encoder */
|
||||
FL2_p_posBits, /* pb value for LZMA2 encoder */
|
||||
* Default = 1 */
|
||||
FL2_p_searchDepth, /* Match finder will resolve string matches up to this length. If a longer
|
||||
* match exists further back in the input, it will not be found. */
|
||||
* match exists further back in the input, it will not be found.
|
||||
* Default = 42 */
|
||||
FL2_p_fastLength, /* Only useful for strategies >= opt.
|
||||
* Length of Match considered "good enough" to stop search.
|
||||
* Length of match considered "good enough" to stop search.
|
||||
* Larger values make compression stronger and slower.
|
||||
* Special: value 0 means "do not change fastLength". */
|
||||
* Default = 48 */
|
||||
FL2_p_divideAndConquer, /* Split long chains of 2-byte matches into shorter chains with a small overlap
|
||||
* during further processing. Allows buffering of all chains at length 2.
|
||||
* Faster, less compression. Generally a good tradeoff. Enabled by default. */
|
||||
FL2_p_strategy, /* 1 = fast; 2 = optimize, 3 = ultra (hybrid mode).
|
||||
* for further processing. Allows buffering of all chains at length 2.
|
||||
* Faster, less compression. Generally a good tradeoff.
|
||||
* Default = enabled */
|
||||
FL2_p_strategy, /* 1 = fast; 2 = optimized, 3 = ultra (hybrid mode).
|
||||
* The higher the value of the selected strategy, the more complex it is,
|
||||
* resulting in stronger and slower compression.
|
||||
* Special: value 0 means "do not change strategy". */
|
||||
* Default = ultra */
|
||||
FL2_p_literalCtxBits, /* lc value for LZMA2 encoder
|
||||
* Default = 3 */
|
||||
FL2_p_literalPosBits, /* lp value for LZMA2 encoder
|
||||
* Default = 0 */
|
||||
FL2_p_posBits, /* pb value for LZMA2 encoder
|
||||
* Default = 2 */
|
||||
FL2_p_omitProperties, /* Omit the property byte at the start of the stream. For use within 7-zip */
|
||||
/* or other containers which store the property byte elsewhere. */
|
||||
/* A stream compressed under this setting cannot be decoded by this library. */
|
||||
#ifndef NO_XXHASH
|
||||
FL2_p_doXXHash, /* Calculate a 32-bit xxhash value from the input data and store it
|
||||
* after the stream terminator. The value will be checked on decompression.
|
||||
* 0 = do not calculate; 1 = calculate (default) */
|
||||
#endif
|
||||
FL2_p_omitProperties, /* Omit the property byte at the start of the stream. For use within 7-zip */
|
||||
/* or other containers which store the property byte elsewhere. */
|
||||
/* Cannot be decoded by this library. */
|
||||
#ifdef RMF_REFERENCE
|
||||
FL2_p_useReferenceMF /* Use the reference matchfinder for development purposes. SLOW. */
|
||||
#endif
|
||||
@@ -429,8 +574,32 @@ typedef enum {
|
||||
* Set one compression parameter, selected by enum FL2_cParameter.
|
||||
* @result : informational value (typically, the one being set, possibly corrected),
|
||||
* or an error code (which can be tested with FL2_isError()). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_setParameter(FL2_CCtx* cctx, FL2_cParameter param, unsigned value);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cParameter param, unsigned value);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_setParameter(FL2_CCtx* cctx, FL2_cParameter param, size_t value);
|
||||
|
||||
/*! FL2_CCtx_getParameter() :
|
||||
* Get one compression parameter, selected by enum FL2_cParameter.
|
||||
* @result : the parameter value, or the parameter_unsupported error code
|
||||
* (which can be tested with FL2_isError()). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_CCtx_getParameter(FL2_CCtx* cctx, FL2_cParameter param);
|
||||
|
||||
/*! FL2_CStream_setParameter() :
|
||||
* Set one compression parameter, selected by enum FL2_cParameter.
|
||||
* @result : informational value (typically, the one being set, possibly corrected),
|
||||
* or an error code (which can be tested with FL2_isError()). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cParameter param, size_t value);
|
||||
|
||||
/*! FL2_CStream_getParameter() :
|
||||
* Get one compression parameter, selected by enum FL2_cParameter.
|
||||
* @result : the parameter value, or the parameter_unsupported error code
|
||||
* (which can be tested with FL2_isError()). */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_CStream_getParameter(FL2_CStream* fcs, FL2_cParameter param);
|
||||
|
||||
/*! FL2_getLevelParameters() :
|
||||
* Get all compression parameter values defined by the preset compressionLevel.
|
||||
* @result : the values in a FL2_compressionParameters struct, or the parameter_outOfBound error code
|
||||
* (which can be tested with FL2_isError()) if compressionLevel is invalid. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_getLevelParameters(int compressionLevel, int high, FL2_compressionParameters *params);
|
||||
|
||||
|
||||
/***************************************
|
||||
* Context memory usage
|
||||
@@ -441,12 +610,29 @@ FL2LIB_API size_t FL2LIB_CALL FL2_CStream_setParameter(FL2_CStream* fcs, FL2_cPa
|
||||
* FL2_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
|
||||
* To use FL2_estimateCCtxSize_usingCCtx, set the compression level and any other settings for the context,
|
||||
* then call the function. Some allocation occurs when the context is created, but the large memory buffers
|
||||
* used for string matching are allocated only when compression begins. */
|
||||
* used for string matching are allocated only when compression is initialized. */
|
||||
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize(int compressionLevel, unsigned nbThreads); /*!< memory usage determined by level */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize_byParams(const FL2_compressionParameters *params, unsigned nbThreads); /*!< memory usage determined by params */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCCtxSize_usingCCtx(const FL2_CCtx* cctx); /*!< memory usage determined by settings */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize(int compressionLevel, unsigned nbThreads);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_usingCCtx(const FL2_CStream* fcs);
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize(int compressionLevel, unsigned nbThreads, int dualBuffer); /*!< memory usage determined by level */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_byParams(const FL2_compressionParameters *params, unsigned nbThreads, int dualBuffer); /*!< memory usage determined by params */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateCStreamSize_usingCStream(const FL2_CStream* fcs); /*!< memory usage determined by settings */
|
||||
|
||||
/*! FL2_getDictSizeFromProp() :
|
||||
* Get the dictionary size from the property byte for a stream. The property byte is the first byte
|
||||
* in the stream, unless omitProperties was enabled, in which case the caller must store it. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_getDictSizeFromProp(unsigned char prop);
|
||||
|
||||
/*! FL2_estimateDCtxSize() :
|
||||
* The size of a DCtx does not include a dictionary buffer because the caller must supply one. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateDCtxSize(unsigned nbThreads);
|
||||
|
||||
/*! FL2_estimateDStreamSize() :
|
||||
* Estimate decompression memory use from the dictionary size and number of threads.
|
||||
* For nbThreads == 0 the number of available cores will be used.
|
||||
* Obtain dictSize by passing the property byte to FL2_getDictSizeFromProp. */
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_estimateDStreamSize(size_t dictSize, unsigned nbThreads); /*!< obtain dictSize from FL2_getDictSizeFromProp() */
|
||||
|
||||
#endif /* FAST_LZMA2_H */
|
||||
|
||||
|
||||
@@ -14,10 +14,8 @@
|
||||
/*-*************************************
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include <stdlib.h> /* malloc, calloc, free */
|
||||
#include <string.h> /* memset */
|
||||
#include "fast-lzma2.h"
|
||||
#include "fl2_error_private.h"
|
||||
#include "fl2_errors.h"
|
||||
#include "fl2_internal.h"
|
||||
|
||||
|
||||
@@ -29,6 +27,9 @@ FL2LIB_API unsigned FL2LIB_CALL FL2_versionNumber(void) { return FL2_VERSION_NUM
|
||||
FL2LIB_API const char* FL2LIB_CALL FL2_versionString(void) { return FL2_VERSION_STRING; }
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Compression helpers
|
||||
******************************************/
|
||||
FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize)
|
||||
{
|
||||
return FL2_COMPRESSBOUND(srcSize);
|
||||
@@ -37,21 +38,70 @@ FL2LIB_API size_t FL2LIB_CALL FL2_compressBound(size_t srcSize)
|
||||
/*-****************************************
|
||||
* FL2 Error Management
|
||||
******************************************/
|
||||
HINT_INLINE
|
||||
unsigned IsError(size_t code)
|
||||
{
|
||||
return (code > FL2_ERROR(maxCode));
|
||||
}
|
||||
|
||||
/*! FL2_isError() :
|
||||
* tells if a return value is an error code */
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code) { return ERR_isError(code); }
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_isError(size_t code)
|
||||
{
|
||||
return IsError(code);
|
||||
}
|
||||
|
||||
/*! FL2_isTimedOut() :
|
||||
* tells if a return value is the timeout code */
|
||||
FL2LIB_API unsigned FL2LIB_CALL FL2_isTimedOut(size_t code)
|
||||
{
|
||||
return (code == FL2_ERROR(timedOut));
|
||||
}
|
||||
|
||||
/*! FL2_getErrorName() :
|
||||
* provides error code string from function result (useful for debugging) */
|
||||
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code) { return ERR_getErrorName(code); }
|
||||
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorName(size_t code)
|
||||
{
|
||||
return FL2_getErrorString(FL2_getErrorCode(code));
|
||||
}
|
||||
|
||||
/*! FL2_getError() :
|
||||
* convert a `size_t` function result into a proper FL2_errorCode enum */
|
||||
FL2LIB_API FL2_ErrorCode FL2LIB_CALL FL2_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
|
||||
FL2LIB_API FL2_ErrorCode FL2LIB_CALL FL2_getErrorCode(size_t code)
|
||||
{
|
||||
if (!IsError(code))
|
||||
return (FL2_ErrorCode)0;
|
||||
|
||||
return (FL2_ErrorCode)(0 - code);
|
||||
}
|
||||
|
||||
/*! FL2_getErrorString() :
|
||||
* provides error code string from enum */
|
||||
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorString(FL2_ErrorCode code) { return ERR_getFL2ErrorString(code); }
|
||||
FL2LIB_API const char* FL2LIB_CALL FL2_getErrorString(FL2_ErrorCode code)
|
||||
{
|
||||
static const char* const notErrorCode = "Unspecified error code";
|
||||
switch (code)
|
||||
{
|
||||
case PREFIX(no_error): return "No error detected";
|
||||
case PREFIX(GENERIC): return "Error (generic)";
|
||||
case PREFIX(corruption_detected): return "Corrupted block detected";
|
||||
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
|
||||
case PREFIX(parameter_unsupported): return "Unsupported parameter";
|
||||
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
|
||||
case PREFIX(lclpMax_exceeded): return "Parameters lc+lp > 4";
|
||||
case PREFIX(stage_wrong): return "Not possible at this stage of encoding";
|
||||
case PREFIX(init_missing): return "Context should be init first";
|
||||
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
|
||||
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
|
||||
case PREFIX(srcSize_wrong): return "Src size is incorrect";
|
||||
case PREFIX(canceled): return "Processing was canceled by a call to FL2_cancelCStream() or FL2_cancelDStream()";
|
||||
case PREFIX(buffer): return "Streaming progress halted due to buffer(s) full/empty";
|
||||
case PREFIX(timedOut): return "Wait timed out. Timeouts should be handled before errors using FL2_isTimedOut()";
|
||||
/* following error codes are not stable and may be removed or changed in a future version */
|
||||
case PREFIX(maxCode):
|
||||
default: return notErrorCode;
|
||||
}
|
||||
}
|
||||
|
||||
/*! g_debuglog_enable :
|
||||
* turn on/off debug traces (global switch) */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,8 +20,9 @@
|
||||
#include "radix_internal.h"
|
||||
#include "lzma2_enc.h"
|
||||
#include "fast-lzma2.h"
|
||||
#include "fl2threading.h"
|
||||
#include "fl2pool.h"
|
||||
#include "fl2_threading.h"
|
||||
#include "fl2_pool.h"
|
||||
#include "dict_buffer.h"
|
||||
#ifndef NO_XXHASH
|
||||
# include "xxhash.h"
|
||||
#endif
|
||||
@@ -30,19 +31,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
unsigned dictionaryLog; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory, slower */
|
||||
unsigned overlapFraction; /* overlap between consecutive blocks in 1/16 units: larger == more compression, slower */
|
||||
unsigned chainLog; /* fully searched segment : larger == more compression, slower, more memory; hybrid mode only (ultra) */
|
||||
unsigned searchLog; /* nb of searches : larger == more compression, slower; hybrid mode only (ultra) */
|
||||
unsigned searchDepth; /* maximum depth for resolving string matches : larger == more compression, slower; >= 64 == more memory, slower */
|
||||
unsigned fastLength; /* acceptable match size for parser, not less than searchDepth : larger == more compression, slower; fast bytes parameter from 7-zip */
|
||||
unsigned divideAndConquer; /* split long chains of 2-byte matches into shorter chains with a small overlap : faster, somewhat less compression; enabled by default */
|
||||
unsigned bufferLog; /* buffer size for processing match chains is (dictionaryLog - bufferLog) : when divideAndConquer enabled, affects compression; */
|
||||
/* when divideAndConquer disabled, affects speed in a hardware-dependent manner */
|
||||
FL2_strategy strategy; /* encoder strategy : fast, optimized or ultra (hybrid) */
|
||||
} FL2_compressionParameters;
|
||||
|
||||
/*-*************************************
|
||||
* Context memory management
|
||||
***************************************/
|
||||
@@ -60,38 +48,43 @@ typedef struct {
|
||||
|
||||
typedef struct {
|
||||
FL2_CCtx* cctx;
|
||||
FL2_lzmaEncoderCtx* enc;
|
||||
LZMA2_ECtx* enc;
|
||||
FL2_dataBlock block;
|
||||
size_t cSize;
|
||||
} FL2_job;
|
||||
|
||||
struct FL2_CCtx_s {
|
||||
DICT_buffer buf;
|
||||
FL2_CCtx_params params;
|
||||
#ifndef FL2_SINGLETHREAD
|
||||
FL2POOL_ctx* factory;
|
||||
FL2POOL_ctx* compressThread;
|
||||
#endif
|
||||
FL2_dataBlock curBlock;
|
||||
size_t asyncRes;
|
||||
size_t threadCount;
|
||||
size_t outThread;
|
||||
size_t outPos;
|
||||
size_t dictMax;
|
||||
U64 block_total;
|
||||
U64 streamTotal;
|
||||
U64 streamCsize;
|
||||
FL2_matchTable* matchTable;
|
||||
#ifndef FL2_SINGLETHREAD
|
||||
U32 timeout;
|
||||
#endif
|
||||
U32 rmfWeight;
|
||||
U32 encWeight;
|
||||
FL2_atomic progressIn;
|
||||
FL2_atomic progressOut;
|
||||
int canceled;
|
||||
BYTE wroteProp;
|
||||
BYTE endMarked;
|
||||
BYTE loopCount;
|
||||
BYTE lockParams;
|
||||
unsigned jobCount;
|
||||
FL2_job jobs[1];
|
||||
};
|
||||
|
||||
struct FL2_CStream_s {
|
||||
FL2_CCtx* cctx;
|
||||
FL2_blockBuffer inBuff;
|
||||
#ifndef NO_XXHASH
|
||||
XXH32_state_t *xxh;
|
||||
#endif
|
||||
size_t thread_count;
|
||||
size_t out_thread;
|
||||
size_t out_pos;
|
||||
size_t hash_pos;
|
||||
BYTE end_marked;
|
||||
BYTE wrote_prop;
|
||||
};
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
* Modified for FL2 by Conor McCarthy
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* The purpose of this file is to have a single list of error strings embedded in binary */
|
||||
|
||||
#include "fl2_error_private.h"
|
||||
|
||||
const char* ERR_getFL2ErrorString(ERR_enum code)
|
||||
{
|
||||
static const char* const notErrorCode = "Unspecified error code";
|
||||
switch( code )
|
||||
{
|
||||
case PREFIX(no_error): return "No error detected";
|
||||
case PREFIX(GENERIC): return "Error (generic)";
|
||||
case PREFIX(corruption_detected): return "Corrupted block detected";
|
||||
case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
|
||||
case PREFIX(parameter_unsupported): return "Unsupported parameter";
|
||||
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
|
||||
case PREFIX(init_missing): return "Context should be init first";
|
||||
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
|
||||
case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
|
||||
case PREFIX(srcSize_wrong): return "Src size is incorrect";
|
||||
/* following error codes are not stable and may be removed or changed in a future version */
|
||||
case PREFIX(maxCode):
|
||||
default: return notErrorCode;
|
||||
}
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
* Modified for FL2 by Conor McCarthy
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
/* Note : this module is expected to remain private, do not expose it */
|
||||
|
||||
#ifndef ERROR_H_MODULE
|
||||
#define ERROR_H_MODULE
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* ****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
#include "fl2_errors.h" /* enum list */
|
||||
|
||||
|
||||
/* ****************************************
|
||||
* Compiler-specific
|
||||
******************************************/
|
||||
#if defined(__GNUC__)
|
||||
# define ERR_STATIC static __attribute__((unused))
|
||||
#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
# define ERR_STATIC static inline
|
||||
#elif defined(_MSC_VER)
|
||||
# define ERR_STATIC static __inline
|
||||
#else
|
||||
# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
|
||||
#endif
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Customization (error_public.h)
|
||||
******************************************/
|
||||
typedef FL2_ErrorCode ERR_enum;
|
||||
#define PREFIX(name) FL2_error_##name
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Error codes handling
|
||||
******************************************/
|
||||
#define FL2_ERROR(name) ((size_t)-PREFIX(name))
|
||||
|
||||
ERR_STATIC unsigned ERR_isError(size_t code) { return (code > FL2_ERROR(maxCode)); }
|
||||
|
||||
ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Error Strings
|
||||
******************************************/
|
||||
|
||||
const char* ERR_getFL2ErrorString(ERR_enum code); /* error_private.c */
|
||||
|
||||
ERR_STATIC const char* ERR_getErrorName(size_t code)
|
||||
{
|
||||
return ERR_getFL2ErrorString(ERR_getErrorCode(code));
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ERROR_H_MODULE */
|
||||
@@ -35,13 +35,15 @@ typedef enum {
|
||||
FL2_error_checksum_wrong = 4,
|
||||
FL2_error_parameter_unsupported = 5,
|
||||
FL2_error_parameter_outOfBound = 6,
|
||||
FL2_error_stage_wrong = 7,
|
||||
FL2_error_init_missing = 8,
|
||||
FL2_error_memory_allocation = 9,
|
||||
FL2_error_dstSize_tooSmall = 10,
|
||||
FL2_error_srcSize_wrong = 11,
|
||||
FL2_error_write_failed = 12,
|
||||
FL2_error_lclpMax_exceeded = 7,
|
||||
FL2_error_stage_wrong = 8,
|
||||
FL2_error_init_missing = 9,
|
||||
FL2_error_memory_allocation = 10,
|
||||
FL2_error_dstSize_tooSmall = 11,
|
||||
FL2_error_srcSize_wrong = 12,
|
||||
FL2_error_canceled = 13,
|
||||
FL2_error_buffer = 14,
|
||||
FL2_error_timedOut = 15,
|
||||
FL2_error_maxCode = 20 /* never EVER use this value directly, it can change in future versions! Use FL2_isError() instead */
|
||||
} FL2_ErrorCode;
|
||||
|
||||
|
||||
@@ -18,19 +18,30 @@
|
||||
***************************************/
|
||||
#include "mem.h"
|
||||
#include "compiler.h"
|
||||
#include "fl2_error_private.h"
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Error codes handling
|
||||
******************************************/
|
||||
#define PREFIX(name) FL2_error_##name
|
||||
#define FL2_ERROR(name) ((size_t)-PREFIX(name))
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Stream properties
|
||||
***************************************/
|
||||
#define FL2_PROP_HASH_BIT 7
|
||||
#define FL2_LZMA_PROP_MASK 0x3FU
|
||||
#ifndef NO_XXHASH
|
||||
# define XXHASH_SIZEOF sizeof(XXH32_canonical_t)
|
||||
#endif
|
||||
|
||||
|
||||
/*-*************************************
|
||||
* Debug
|
||||
***************************************/
|
||||
@@ -77,8 +88,8 @@ extern int g_debuglog_enable;
|
||||
#undef MAX
|
||||
#define MIN(a,b) ((a)<(b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a)>(b) ? (a) : (b))
|
||||
#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
|
||||
#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return FL2_ERROR(e); } /* check and send Error code */
|
||||
#define CHECK_F(f) do { size_t const errcod = f; if (FL2_isError(errcod)) return errcod; } while(0) /* check and Forward error code */
|
||||
#define CHECK_E(f, e) do { size_t const errcod = f; if (FL2_isError(errcod)) return FL2_ERROR(e); } while(0) /* check and send Error code */
|
||||
|
||||
MEM_STATIC U32 ZSTD_highbit32(U32 val)
|
||||
{
|
||||
|
||||
198
C/fast-lzma2/fl2_pool.c
Normal file
198
C/fast-lzma2/fl2_pool.c
Normal file
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
* Modified for FL2 by Conor McCarthy
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/* ====== Dependencies ======= */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include <stdlib.h> /* malloc, calloc */
|
||||
#include "fl2_pool.h"
|
||||
#include "fl2_internal.h"
|
||||
|
||||
|
||||
#ifndef FL2_SINGLETHREAD
|
||||
|
||||
#include "fl2_threading.h" /* pthread adaptation */
|
||||
|
||||
struct FL2POOL_ctx_s {
|
||||
/* Keep track of the threads */
|
||||
size_t numThreads;
|
||||
|
||||
/* All threads work on the same function and object during a job */
|
||||
FL2POOL_function function;
|
||||
void *opaque;
|
||||
|
||||
/* The number of threads working on jobs */
|
||||
size_t numThreadsBusy;
|
||||
/* Indicates the number of threads requested and the values to pass */
|
||||
ptrdiff_t queueIndex;
|
||||
ptrdiff_t queueEnd;
|
||||
|
||||
/* The mutex protects the queue */
|
||||
FL2_pthread_mutex_t queueMutex;
|
||||
/* Condition variable for pushers to wait on when the queue is full */
|
||||
FL2_pthread_cond_t busyCond;
|
||||
/* Condition variable for poppers to wait on when the queue is empty */
|
||||
FL2_pthread_cond_t newJobsCond;
|
||||
/* Indicates if the queue is shutting down */
|
||||
int shutdown;
|
||||
|
||||
/* The threads. Extras to be calloc'd */
|
||||
FL2_pthread_t threads[1];
|
||||
};
|
||||
|
||||
/* FL2POOL_thread() :
|
||||
Work thread for the thread pool.
|
||||
Waits for jobs and executes them.
|
||||
@returns : NULL on failure else non-null.
|
||||
*/
|
||||
static void* FL2POOL_thread(void* opaque)
|
||||
{
|
||||
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)opaque;
|
||||
if (!ctx) { return NULL; }
|
||||
FL2_pthread_mutex_lock(&ctx->queueMutex);
|
||||
for (;;) {
|
||||
|
||||
/* While the mutex is locked, wait for a non-empty queue or until shutdown */
|
||||
while (ctx->queueIndex >= ctx->queueEnd && !ctx->shutdown) {
|
||||
FL2_pthread_cond_wait(&ctx->newJobsCond, &ctx->queueMutex);
|
||||
}
|
||||
/* empty => shutting down: so stop */
|
||||
if (ctx->shutdown) {
|
||||
FL2_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
return opaque;
|
||||
}
|
||||
/* Pop a job off the queue */
|
||||
size_t n = ctx->queueIndex;
|
||||
++ctx->queueIndex;
|
||||
++ctx->numThreadsBusy;
|
||||
/* Unlock the mutex and run the job */
|
||||
FL2_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
|
||||
ctx->function(ctx->opaque, n);
|
||||
|
||||
FL2_pthread_mutex_lock(&ctx->queueMutex);
|
||||
--ctx->numThreadsBusy;
|
||||
/* Signal the master thread waiting for jobs to complete */
|
||||
FL2_pthread_cond_signal(&ctx->busyCond);
|
||||
} /* for (;;) */
|
||||
/* Unreachable */
|
||||
}
|
||||
|
||||
FL2POOL_ctx* FL2POOL_create(size_t numThreads)
|
||||
{
|
||||
FL2POOL_ctx* ctx;
|
||||
/* Check the parameters */
|
||||
if (!numThreads) { return NULL; }
|
||||
/* Allocate the context and zero initialize */
|
||||
ctx = calloc(1, sizeof(FL2POOL_ctx) + (numThreads - 1) * sizeof(FL2_pthread_t));
|
||||
if (!ctx) { return NULL; }
|
||||
/* Initialize the busy count and jobs range */
|
||||
ctx->numThreadsBusy = 0;
|
||||
ctx->queueIndex = 0;
|
||||
ctx->queueEnd = 0;
|
||||
(void)FL2_pthread_mutex_init(&ctx->queueMutex, NULL);
|
||||
(void)FL2_pthread_cond_init(&ctx->busyCond, NULL);
|
||||
(void)FL2_pthread_cond_init(&ctx->newJobsCond, NULL);
|
||||
ctx->shutdown = 0;
|
||||
ctx->numThreads = 0;
|
||||
/* Initialize the threads */
|
||||
{ size_t i;
|
||||
for (i = 0; i < numThreads; ++i) {
|
||||
if (FL2_pthread_create(&ctx->threads[i], NULL, &FL2POOL_thread, ctx)) {
|
||||
ctx->numThreads = i;
|
||||
FL2POOL_free(ctx);
|
||||
return NULL;
|
||||
} }
|
||||
ctx->numThreads = numThreads;
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/*! FL2POOL_join() :
|
||||
Shutdown the queue, wake any sleeping threads, and join all of the threads.
|
||||
*/
|
||||
static void FL2POOL_join(FL2POOL_ctx* ctx)
|
||||
{
|
||||
/* Shut down the queue */
|
||||
FL2_pthread_mutex_lock(&ctx->queueMutex);
|
||||
ctx->shutdown = 1;
|
||||
/* Wake up sleeping threads */
|
||||
FL2_pthread_cond_broadcast(&ctx->newJobsCond);
|
||||
FL2_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
/* Join all of the threads */
|
||||
for (size_t i = 0; i < ctx->numThreads; ++i)
|
||||
FL2_pthread_join(ctx->threads[i], NULL);
|
||||
}
|
||||
|
||||
void FL2POOL_free(FL2POOL_ctx *ctx)
|
||||
{
|
||||
if (!ctx) { return; }
|
||||
FL2POOL_join(ctx);
|
||||
FL2_pthread_mutex_destroy(&ctx->queueMutex);
|
||||
FL2_pthread_cond_destroy(&ctx->busyCond);
|
||||
FL2_pthread_cond_destroy(&ctx->newJobsCond);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
size_t FL2POOL_sizeof(FL2POOL_ctx *ctx)
|
||||
{
|
||||
if (ctx==NULL) return 0; /* supports sizeof NULL */
|
||||
return sizeof(*ctx) + ctx->numThreads * sizeof(FL2_pthread_t);
|
||||
}
|
||||
|
||||
void FL2POOL_addRange(void* ctxVoid, FL2POOL_function function, void *opaque, ptrdiff_t first, ptrdiff_t end)
|
||||
{
|
||||
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid;
|
||||
if (!ctx)
|
||||
return;
|
||||
|
||||
/* Callers always wait for jobs to complete before adding a new set */
|
||||
assert(!ctx->numThreadsBusy);
|
||||
|
||||
FL2_pthread_mutex_lock(&ctx->queueMutex);
|
||||
ctx->function = function;
|
||||
ctx->opaque = opaque;
|
||||
ctx->queueIndex = first;
|
||||
ctx->queueEnd = end;
|
||||
FL2_pthread_cond_broadcast(&ctx->newJobsCond);
|
||||
FL2_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
}
|
||||
|
||||
void FL2POOL_add(void* ctxVoid, FL2POOL_function function, void *opaque, ptrdiff_t n)
|
||||
{
|
||||
FL2POOL_addRange(ctxVoid, function, opaque, n, n + 1);
|
||||
}
|
||||
|
||||
int FL2POOL_waitAll(void *ctxVoid, unsigned timeout)
|
||||
{
|
||||
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid;
|
||||
if (!ctx || (!ctx->numThreadsBusy && ctx->queueIndex >= ctx->queueEnd) || ctx->shutdown) { return 0; }
|
||||
|
||||
FL2_pthread_mutex_lock(&ctx->queueMutex);
|
||||
/* Need to test for ctx->queueIndex < ctx->queueEnd in case not all jobs have started */
|
||||
if (timeout != 0) {
|
||||
if ((ctx->numThreadsBusy || ctx->queueIndex < ctx->queueEnd) && !ctx->shutdown)
|
||||
FL2_pthread_cond_timedwait(&ctx->busyCond, &ctx->queueMutex, timeout);
|
||||
}
|
||||
else {
|
||||
while ((ctx->numThreadsBusy || ctx->queueIndex < ctx->queueEnd) && !ctx->shutdown)
|
||||
FL2_pthread_cond_wait(&ctx->busyCond, &ctx->queueMutex);
|
||||
}
|
||||
FL2_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
return ctx->numThreadsBusy && !ctx->shutdown;
|
||||
}
|
||||
|
||||
size_t FL2POOL_threadsBusy(void * ctx)
|
||||
{
|
||||
return ((FL2POOL_ctx*)ctx)->numThreadsBusy;
|
||||
}
|
||||
|
||||
#endif /* FL2_SINGLETHREAD */
|
||||
@@ -42,16 +42,20 @@ size_t FL2POOL_sizeof(FL2POOL_ctx *ctx);
|
||||
/*! FL2POOL_function :
|
||||
The function type that can be added to a thread pool.
|
||||
*/
|
||||
typedef void(*FL2POOL_function)(void *, size_t);
|
||||
typedef void(*FL2POOL_function)(void *, ptrdiff_t);
|
||||
|
||||
/*! FL2POOL_add() :
|
||||
Add the job `function(opaque)` to the thread pool.
|
||||
FL2POOL_addRange adds multiple jobs with size_t parameter from first to less than end.
|
||||
Possibly blocks until there is room in the queue.
|
||||
Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
|
||||
*/
|
||||
void FL2POOL_add(void *ctx, FL2POOL_function function, void *opaque, size_t n);
|
||||
void FL2POOL_add(void* ctxVoid, FL2POOL_function function, void *opaque, ptrdiff_t n);
|
||||
void FL2POOL_addRange(void *ctx, FL2POOL_function function, void *opaque, ptrdiff_t first, ptrdiff_t end);
|
||||
|
||||
void FL2POOL_waitAll(void *ctx);
|
||||
int FL2POOL_waitAll(void *ctx, unsigned timeout);
|
||||
|
||||
size_t FL2POOL_threadsBusy(void *ctx);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
@@ -17,6 +17,10 @@
|
||||
/* create fake symbol to avoid empty translation unit warning */
|
||||
int g_ZSTD_threading_useles_symbol;
|
||||
|
||||
#include "fast-lzma2.h"
|
||||
#include "fl2_threading.h"
|
||||
#include "util.h"
|
||||
|
||||
#if !defined(FL2_SINGLETHREAD) && defined(_WIN32)
|
||||
|
||||
/**
|
||||
@@ -28,19 +32,18 @@ int g_ZSTD_threading_useles_symbol;
|
||||
/* === Dependencies === */
|
||||
#include <process.h>
|
||||
#include <errno.h>
|
||||
#include "fl2threading.h"
|
||||
|
||||
|
||||
/* === Implementation === */
|
||||
|
||||
static unsigned __stdcall worker(void *arg)
|
||||
{
|
||||
ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
|
||||
FL2_pthread_t* const thread = (FL2_pthread_t*) arg;
|
||||
thread->arg = thread->start_routine(thread->arg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FL2_pthread_create(ZSTD_pthread_t* thread, const void* unused,
|
||||
int FL2_pthread_create(FL2_pthread_t* thread, const void* unused,
|
||||
void* (*start_routine) (void*), void* arg)
|
||||
{
|
||||
(void)unused;
|
||||
@@ -54,7 +57,7 @@ int FL2_pthread_create(ZSTD_pthread_t* thread, const void* unused,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FL2_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
|
||||
int FL2_pthread_join(FL2_pthread_t thread, void **value_ptr)
|
||||
{
|
||||
DWORD result;
|
||||
|
||||
@@ -73,3 +76,20 @@ int FL2_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
|
||||
}
|
||||
|
||||
#endif /* FL2_SINGLETHREAD */
|
||||
|
||||
unsigned FL2_checkNbThreads(unsigned nbThreads)
|
||||
{
|
||||
#ifndef FL2_SINGLETHREAD
|
||||
if (nbThreads == 0) {
|
||||
nbThreads = UTIL_countPhysicalCores();
|
||||
nbThreads += !nbThreads;
|
||||
}
|
||||
if (nbThreads > FL2_MAXTHREADS) {
|
||||
nbThreads = FL2_MAXTHREADS;
|
||||
}
|
||||
#else
|
||||
nbThreads = 1;
|
||||
#endif
|
||||
return nbThreads;
|
||||
}
|
||||
|
||||
178
C/fast-lzma2/fl2_threading.h
Normal file
178
C/fast-lzma2/fl2_threading.h
Normal file
@@ -0,0 +1,178 @@
|
||||
/**
|
||||
* Copyright (c) 2016 Tino Reichardt
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*
|
||||
* You can contact the author at:
|
||||
* - zstdmt source repository: https://github.com/mcmilk/zstdmt
|
||||
*/
|
||||
|
||||
#ifndef THREADING_H_938743
|
||||
#define THREADING_H_938743
|
||||
|
||||
#include "mem.h"
|
||||
|
||||
#ifndef FL2_XZ_BUILD
|
||||
# ifdef _WIN32
|
||||
# define MYTHREAD_VISTA
|
||||
# else
|
||||
# define MYTHREAD_POSIX /* posix assumed ; need a better detection method */
|
||||
# endif
|
||||
#elif defined(HAVE_CONFIG_H)
|
||||
# include <config.h>
|
||||
#endif
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
unsigned FL2_checkNbThreads(unsigned nbThreads);
|
||||
|
||||
|
||||
#if !defined(FL2_SINGLETHREAD) && defined(MYTHREAD_VISTA)
|
||||
|
||||
/**
|
||||
* Windows minimalist Pthread Wrapper, based on :
|
||||
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
|
||||
*/
|
||||
#ifdef WINVER
|
||||
# undef WINVER
|
||||
#endif
|
||||
#define WINVER 0x0600
|
||||
|
||||
#ifdef _WIN32_WINNT
|
||||
# undef _WIN32_WINNT
|
||||
#endif
|
||||
#define _WIN32_WINNT 0x0600
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
#include <synchapi.h>
|
||||
|
||||
|
||||
/* mutex */
|
||||
#define FL2_pthread_mutex_t CRITICAL_SECTION
|
||||
#define FL2_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0)
|
||||
#define FL2_pthread_mutex_destroy(a) DeleteCriticalSection((a))
|
||||
#define FL2_pthread_mutex_lock(a) EnterCriticalSection((a))
|
||||
#define FL2_pthread_mutex_unlock(a) LeaveCriticalSection((a))
|
||||
|
||||
/* condition variable */
|
||||
#define FL2_pthread_cond_t CONDITION_VARIABLE
|
||||
#define FL2_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0)
|
||||
#define FL2_pthread_cond_destroy(a) /* No delete */
|
||||
#define FL2_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
|
||||
#define FL2_pthread_cond_timedwait(a, b, c) SleepConditionVariableCS((a), (b), (c))
|
||||
#define FL2_pthread_cond_signal(a) WakeConditionVariable((a))
|
||||
#define FL2_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
|
||||
|
||||
/* FL2_pthread_create() and FL2_pthread_join() */
|
||||
typedef struct {
|
||||
HANDLE handle;
|
||||
void* (*start_routine)(void*);
|
||||
void* arg;
|
||||
} FL2_pthread_t;
|
||||
|
||||
int FL2_pthread_create(FL2_pthread_t* thread, const void* unused,
|
||||
void* (*start_routine) (void*), void* arg);
|
||||
|
||||
int FL2_pthread_join(FL2_pthread_t thread, void** value_ptr);
|
||||
|
||||
/**
|
||||
* add here more wrappers as required
|
||||
*/
|
||||
|
||||
|
||||
#elif !defined(FL2_SINGLETHREAD) && defined(MYTHREAD_POSIX)
|
||||
/* === POSIX Systems === */
|
||||
# include <sys/time.h>
|
||||
# include <pthread.h>
|
||||
|
||||
#define FL2_pthread_mutex_t pthread_mutex_t
|
||||
#define FL2_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
|
||||
#define FL2_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
|
||||
#define FL2_pthread_mutex_lock(a) pthread_mutex_lock((a))
|
||||
#define FL2_pthread_mutex_unlock(a) pthread_mutex_unlock((a))
|
||||
|
||||
#define FL2_pthread_cond_t pthread_cond_t
|
||||
#define FL2_pthread_cond_init(a, b) pthread_cond_init((a), (b))
|
||||
#define FL2_pthread_cond_destroy(a) pthread_cond_destroy((a))
|
||||
#define FL2_pthread_cond_wait(a, b) pthread_cond_wait((a), (b))
|
||||
#define FL2_pthread_cond_signal(a) pthread_cond_signal((a))
|
||||
#define FL2_pthread_cond_broadcast(a) pthread_cond_broadcast((a))
|
||||
|
||||
#define FL2_pthread_t pthread_t
|
||||
#define FL2_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
|
||||
#define FL2_pthread_join(a, b) pthread_join((a),(b))
|
||||
|
||||
/* Timed wait functions from XZ by Lasse Collin
|
||||
*/
|
||||
|
||||
/* Sets condtime to the absolute time that is timeout_ms milliseconds
|
||||
* in the future.
|
||||
*/
|
||||
static inline void
|
||||
mythread_condtime_set(struct timespec *condtime, U32 timeout_ms)
|
||||
{
|
||||
condtime->tv_sec = timeout_ms / 1000;
|
||||
condtime->tv_nsec = (timeout_ms % 1000) * 1000000;
|
||||
|
||||
struct timeval now;
|
||||
gettimeofday(&now, NULL);
|
||||
|
||||
condtime->tv_sec += now.tv_sec;
|
||||
condtime->tv_nsec += now.tv_usec * 1000L;
|
||||
|
||||
/* tv_nsec must stay in the range [0, 999_999_999]. */
|
||||
if (condtime->tv_nsec >= 1000000000L) {
|
||||
condtime->tv_nsec -= 1000000000L;
|
||||
++condtime->tv_sec;
|
||||
}
|
||||
}
|
||||
|
||||
/* Waits on a condition or until a timeout expires. If the timeout expires,
|
||||
* non-zero is returned, otherwise zero is returned.
|
||||
*/
|
||||
static inline void
|
||||
FL2_pthread_cond_timedwait(FL2_pthread_cond_t *cond, FL2_pthread_mutex_t *mutex,
|
||||
U32 timeout_ms)
|
||||
{
|
||||
struct timespec condtime;
|
||||
mythread_condtime_set(&condtime, timeout_ms);
|
||||
pthread_cond_timedwait(cond, mutex, &condtime);
|
||||
}
|
||||
|
||||
|
||||
#elif defined(FL2_SINGLETHREAD)
|
||||
/* No multithreading support */
|
||||
|
||||
typedef int FL2_pthread_mutex_t;
|
||||
#define FL2_pthread_mutex_init(a, b) ((void)a, 0)
|
||||
#define FL2_pthread_mutex_destroy(a)
|
||||
#define FL2_pthread_mutex_lock(a)
|
||||
#define FL2_pthread_mutex_unlock(a)
|
||||
|
||||
typedef int FL2_pthread_cond_t;
|
||||
#define FL2_pthread_cond_init(a, b) ((void)a, 0)
|
||||
#define FL2_pthread_cond_destroy(a)
|
||||
#define FL2_pthread_cond_wait(a, b)
|
||||
#define FL2_pthread_cond_signal(a)
|
||||
#define FL2_pthread_cond_broadcast(a)
|
||||
|
||||
/* do not use FL2_pthread_t */
|
||||
|
||||
#else
|
||||
# error FL2_SINGLETHREAD not defined but no threading support found
|
||||
#endif /* FL2_SINGLETHREAD */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* THREADING_H_938743 */
|
||||
@@ -1,201 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
* Modified for FL2 by Conor McCarthy
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
|
||||
/* ====== Dependencies ======= */
|
||||
#include <stddef.h> /* size_t */
|
||||
#include <stdlib.h> /* malloc, calloc */
|
||||
#include "fl2pool.h"
|
||||
#include "fl2_internal.h"
|
||||
|
||||
/* ====== Compiler specifics ====== */
|
||||
#if defined(_MSC_VER)
|
||||
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef FL2_SINGLETHREAD
|
||||
|
||||
#include "fl2threading.h" /* pthread adaptation */
|
||||
|
||||
/* A job is a function and an opaque argument */
|
||||
typedef struct FL2POOL_job_s {
|
||||
FL2POOL_function function;
|
||||
void *opaque;
|
||||
size_t n;
|
||||
} FL2POOL_job;
|
||||
|
||||
struct FL2POOL_ctx_s {
|
||||
/* Keep track of the threads */
|
||||
ZSTD_pthread_t *threads;
|
||||
size_t numThreads;
|
||||
|
||||
/* The queue is a single job */
|
||||
FL2POOL_job queue;
|
||||
|
||||
/* The number of threads working on jobs */
|
||||
size_t numThreadsBusy;
|
||||
/* Indicates if the queue is empty */
|
||||
int queueEmpty;
|
||||
|
||||
/* The mutex protects the queue */
|
||||
ZSTD_pthread_mutex_t queueMutex;
|
||||
/* Condition variable for pushers to wait on when the queue is full */
|
||||
ZSTD_pthread_cond_t queuePushCond;
|
||||
/* Condition variables for poppers to wait on when the queue is empty */
|
||||
ZSTD_pthread_cond_t queuePopCond;
|
||||
/* Indicates if the queue is shutting down */
|
||||
int shutdown;
|
||||
};
|
||||
|
||||
/* FL2POOL_thread() :
|
||||
Work thread for the thread pool.
|
||||
Waits for jobs and executes them.
|
||||
@returns : NULL on failure else non-null.
|
||||
*/
|
||||
static void* FL2POOL_thread(void* opaque) {
|
||||
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)opaque;
|
||||
if (!ctx) { return NULL; }
|
||||
for (;;) {
|
||||
/* Lock the mutex and wait for a non-empty queue or until shutdown */
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
|
||||
while (ctx->queueEmpty && !ctx->shutdown) {
|
||||
ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
|
||||
}
|
||||
/* empty => shutting down: so stop */
|
||||
if (ctx->queueEmpty) {
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
return opaque;
|
||||
}
|
||||
/* Pop a job off the queue */
|
||||
{ FL2POOL_job const job = ctx->queue;
|
||||
ctx->queueEmpty = 1;
|
||||
/* Unlock the mutex, signal a pusher, and run the job */
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
|
||||
job.function(job.opaque, job.n);
|
||||
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
ctx->numThreadsBusy--;
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePushCond);
|
||||
}
|
||||
} /* for (;;) */
|
||||
/* Unreachable */
|
||||
}
|
||||
|
||||
FL2POOL_ctx* FL2POOL_create(size_t numThreads) {
|
||||
FL2POOL_ctx* ctx;
|
||||
/* Check the parameters */
|
||||
if (!numThreads) { return NULL; }
|
||||
/* Allocate the context and zero initialize */
|
||||
ctx = (FL2POOL_ctx*)calloc(1, sizeof(FL2POOL_ctx));
|
||||
if (!ctx) { return NULL; }
|
||||
/* Initialize the job queue.
|
||||
* It needs one extra space since one space is wasted to differentiate empty
|
||||
* and full queues.
|
||||
*/
|
||||
ctx->numThreadsBusy = 0;
|
||||
ctx->queueEmpty = 1;
|
||||
(void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
|
||||
(void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
|
||||
(void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
|
||||
ctx->shutdown = 0;
|
||||
/* Allocate space for the thread handles */
|
||||
ctx->threads = (ZSTD_pthread_t*)malloc(numThreads * sizeof(ZSTD_pthread_t));
|
||||
ctx->numThreads = 0;
|
||||
/* Check for errors */
|
||||
if (!ctx->threads) { FL2POOL_free(ctx); return NULL; }
|
||||
/* Initialize the threads */
|
||||
{ size_t i;
|
||||
for (i = 0; i < numThreads; ++i) {
|
||||
if (FL2_pthread_create(&ctx->threads[i], NULL, &FL2POOL_thread, ctx)) {
|
||||
ctx->numThreads = i;
|
||||
FL2POOL_free(ctx);
|
||||
return NULL;
|
||||
} }
|
||||
ctx->numThreads = numThreads;
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/*! FL2POOL_join() :
|
||||
Shutdown the queue, wake any sleeping threads, and join all of the threads.
|
||||
*/
|
||||
static void FL2POOL_join(FL2POOL_ctx* ctx) {
|
||||
/* Shut down the queue */
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
ctx->shutdown = 1;
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
/* Wake up sleeping threads */
|
||||
ZSTD_pthread_cond_broadcast(&ctx->queuePushCond);
|
||||
ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
|
||||
/* Join all of the threads */
|
||||
{ size_t i;
|
||||
for (i = 0; i < ctx->numThreads; ++i) {
|
||||
FL2_pthread_join(ctx->threads[i], NULL);
|
||||
} }
|
||||
}
|
||||
|
||||
void FL2POOL_free(FL2POOL_ctx *ctx) {
|
||||
if (!ctx) { return; }
|
||||
FL2POOL_join(ctx);
|
||||
ZSTD_pthread_mutex_destroy(&ctx->queueMutex);
|
||||
ZSTD_pthread_cond_destroy(&ctx->queuePushCond);
|
||||
ZSTD_pthread_cond_destroy(&ctx->queuePopCond);
|
||||
free(ctx->threads);
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
size_t FL2POOL_sizeof(FL2POOL_ctx *ctx) {
|
||||
if (ctx==NULL) return 0; /* supports sizeof NULL */
|
||||
return sizeof(*ctx)
|
||||
+ ctx->numThreads * sizeof(ZSTD_pthread_t);
|
||||
}
|
||||
|
||||
void FL2POOL_add(void* ctxVoid, FL2POOL_function function, void *opaque, size_t n) {
|
||||
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid;
|
||||
if (!ctx)
|
||||
return;
|
||||
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
{ FL2POOL_job const job = {function, opaque, n};
|
||||
|
||||
/* Wait until there is space in the queue for the new job */
|
||||
while (!ctx->queueEmpty && !ctx->shutdown) {
|
||||
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
|
||||
}
|
||||
/* The queue is still going => there is space */
|
||||
if (!ctx->shutdown) {
|
||||
ctx->numThreadsBusy++;
|
||||
ctx->queueEmpty = 0;
|
||||
ctx->queue = job;
|
||||
}
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
ZSTD_pthread_cond_signal(&ctx->queuePopCond);
|
||||
}
|
||||
|
||||
void FL2POOL_waitAll(void *ctxVoid)
|
||||
{
|
||||
FL2POOL_ctx* const ctx = (FL2POOL_ctx*)ctxVoid;
|
||||
if (!ctx) { return; }
|
||||
|
||||
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
|
||||
while (ctx->numThreadsBusy && !ctx->shutdown) {
|
||||
ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
|
||||
}
|
||||
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
|
||||
}
|
||||
|
||||
#endif /* FL2_SINGLETHREAD */
|
||||
@@ -1,120 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2016 Tino Reichardt
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
*
|
||||
* You can contact the author at:
|
||||
* - zstdmt source repository: https://github.com/mcmilk/zstdmt
|
||||
*/
|
||||
|
||||
#ifndef THREADING_H_938743
|
||||
#define THREADING_H_938743
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(FL2_SINGLETHREAD) && defined(_WIN32)
|
||||
|
||||
/**
|
||||
* Windows minimalist Pthread Wrapper, based on :
|
||||
* http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
|
||||
*/
|
||||
#ifdef WINVER
|
||||
# undef WINVER
|
||||
#endif
|
||||
#define WINVER 0x0600
|
||||
|
||||
#ifdef _WIN32_WINNT
|
||||
# undef _WIN32_WINNT
|
||||
#endif
|
||||
#define _WIN32_WINNT 0x0600
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
|
||||
/* mutex */
|
||||
#define ZSTD_pthread_mutex_t CRITICAL_SECTION
|
||||
#define ZSTD_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0)
|
||||
#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a))
|
||||
#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a))
|
||||
#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a))
|
||||
|
||||
/* condition variable */
|
||||
#define ZSTD_pthread_cond_t CONDITION_VARIABLE
|
||||
#define ZSTD_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0)
|
||||
#define ZSTD_pthread_cond_destroy(a) /* No delete */
|
||||
#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
|
||||
#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a))
|
||||
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
|
||||
|
||||
/* FL2_pthread_create() and FL2_pthread_join() */
|
||||
typedef struct {
|
||||
HANDLE handle;
|
||||
void* (*start_routine)(void*);
|
||||
void* arg;
|
||||
} ZSTD_pthread_t;
|
||||
|
||||
int FL2_pthread_create(ZSTD_pthread_t* thread, const void* unused,
|
||||
void* (*start_routine) (void*), void* arg);
|
||||
|
||||
int FL2_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
|
||||
|
||||
/**
|
||||
* add here more wrappers as required
|
||||
*/
|
||||
|
||||
|
||||
#elif !defined(FL2_SINGLETHREAD) /* posix assumed ; need a better detection method */
|
||||
/* === POSIX Systems === */
|
||||
# include <pthread.h>
|
||||
|
||||
#define ZSTD_pthread_mutex_t pthread_mutex_t
|
||||
#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b))
|
||||
#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a))
|
||||
#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a))
|
||||
#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a))
|
||||
|
||||
#define ZSTD_pthread_cond_t pthread_cond_t
|
||||
#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b))
|
||||
#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a))
|
||||
#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b))
|
||||
#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a))
|
||||
#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a))
|
||||
|
||||
#define ZSTD_pthread_t pthread_t
|
||||
#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
|
||||
#define ZSTD_pthread_join(a, b) pthread_join((a),(b))
|
||||
|
||||
#else /* FL2_SINGLETHREAD defined */
|
||||
/* No multithreading support */
|
||||
|
||||
typedef int ZSTD_pthread_mutex_t;
|
||||
#define ZSTD_pthread_mutex_init(a, b) ((void)a, 0)
|
||||
#define ZSTD_pthread_mutex_destroy(a)
|
||||
#define ZSTD_pthread_mutex_lock(a)
|
||||
#define ZSTD_pthread_mutex_unlock(a)
|
||||
|
||||
typedef int ZSTD_pthread_cond_t;
|
||||
#define ZSTD_pthread_cond_init(a, b) ((void)a, 0)
|
||||
#define ZSTD_pthread_cond_destroy(a)
|
||||
#define ZSTD_pthread_cond_wait(a, b)
|
||||
#define ZSTD_pthread_cond_signal(a)
|
||||
#define ZSTD_pthread_cond_broadcast(a)
|
||||
|
||||
/* do not use ZSTD_pthread_t */
|
||||
|
||||
#endif /* FL2_SINGLETHREAD */
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* THREADING_H_938743 */
|
||||
File diff suppressed because it is too large
Load Diff
@@ -10,6 +10,7 @@ Public domain
|
||||
#include "mem.h"
|
||||
#include "data_block.h"
|
||||
#include "radix_mf.h"
|
||||
#include "atomic.h"
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
@@ -19,14 +20,10 @@ extern "C" {
|
||||
|
||||
#define LZMA2_END_MARKER '\0'
|
||||
#define LZMA_MIN_DICT_BITS 12
|
||||
#define ENC_MIN_BYTES_PER_THREAD 0x20000
|
||||
|
||||
typedef struct FL2_lzmaEncoderCtx_s FL2_lzmaEncoderCtx;
|
||||
|
||||
typedef enum {
|
||||
FL2_fast,
|
||||
FL2_opt,
|
||||
FL2_ultra
|
||||
} FL2_strategy;
|
||||
typedef struct LZMA2_ECtx_s LZMA2_ECtx;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -37,25 +34,28 @@ typedef struct
|
||||
unsigned match_cycles;
|
||||
FL2_strategy strategy;
|
||||
unsigned second_dict_bits;
|
||||
unsigned random_filter;
|
||||
unsigned reset_interval;
|
||||
} FL2_lzma2Parameters;
|
||||
|
||||
|
||||
FL2_lzmaEncoderCtx* FL2_lzma2Create();
|
||||
LZMA2_ECtx* LZMA2_createECtx(void);
|
||||
|
||||
void FL2_lzma2Free(FL2_lzmaEncoderCtx* enc);
|
||||
void LZMA2_freeECtx(LZMA2_ECtx *const enc);
|
||||
|
||||
int FL2_lzma2HashAlloc(FL2_lzmaEncoderCtx* enc, const FL2_lzma2Parameters* options);
|
||||
int LZMA2_hashAlloc(LZMA2_ECtx *const enc, const FL2_lzma2Parameters* const options);
|
||||
|
||||
size_t FL2_lzma2Encode(FL2_lzmaEncoderCtx* enc,
|
||||
FL2_matchTable* tbl,
|
||||
const FL2_dataBlock block,
|
||||
const FL2_lzma2Parameters* options,
|
||||
FL2_progressFn progress, void* opaque, size_t base, U32 weight);
|
||||
size_t LZMA2_encode(LZMA2_ECtx *const enc,
|
||||
FL2_matchTable* const tbl,
|
||||
FL2_dataBlock const block,
|
||||
const FL2_lzma2Parameters* const options,
|
||||
int stream_prop,
|
||||
FL2_atomic *const progress_in,
|
||||
FL2_atomic *const progress_out,
|
||||
int *const canceled);
|
||||
|
||||
BYTE FL2_getDictSizeProp(size_t dictionary_size);
|
||||
BYTE LZMA2_getDictSizeProp(size_t const dictionary_size);
|
||||
|
||||
size_t FL2_lzma2MemoryUsage(unsigned chain_log, FL2_strategy strategy, unsigned thread_count);
|
||||
size_t LZMA2_encMemoryUsage(unsigned const chain_log, FL2_strategy const strategy, unsigned const thread_count);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
||||
@@ -28,9 +28,6 @@ extern "C" {
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
# include <stdlib.h> /* _byteswap_ulong */
|
||||
# include <intrin.h> /* _byteswap_* */
|
||||
# pragma warning(disable : 4389) /* disable: C4389: '==' : signed/unsigned mismatch */
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
# define MEM_STATIC static __inline __attribute__((unused))
|
||||
@@ -42,6 +39,10 @@ extern "C" {
|
||||
# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0 /* compat. with non-clang compilers */
|
||||
#endif
|
||||
|
||||
/* code only tested on 32 and 64 bits systems */
|
||||
#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
|
||||
MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
|
||||
@@ -60,11 +61,23 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
|
||||
typedef uint64_t U64;
|
||||
typedef int64_t S64;
|
||||
#else
|
||||
# include <limits.h>
|
||||
#if CHAR_BIT != 8
|
||||
# error "this implementation requires char to be exactly 8-bit type"
|
||||
#endif
|
||||
typedef unsigned char BYTE;
|
||||
#if USHRT_MAX != 65535
|
||||
# error "this implementation requires short to be exactly 16-bit type"
|
||||
#endif
|
||||
typedef unsigned short U16;
|
||||
typedef signed short S16;
|
||||
#if UINT_MAX != 4294967295
|
||||
# error "this implementation requires int to be exactly 32-bit type"
|
||||
#endif
|
||||
typedef unsigned int U32;
|
||||
typedef signed int S32;
|
||||
/* note : there are no limits defined for long long type in C90.
|
||||
* limits exist in C99, however, in such case, <stdint.h> is preferred */
|
||||
typedef unsigned long long U64;
|
||||
typedef signed long long S64;
|
||||
#endif
|
||||
@@ -189,7 +202,8 @@ MEM_STATIC U32 MEM_swap32(U32 in)
|
||||
{
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
return _byteswap_ulong(in);
|
||||
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
|
||||
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|
||||
|| (defined(__clang__) && __has_builtin(__builtin_bswap32))
|
||||
return __builtin_bswap32(in);
|
||||
#else
|
||||
return ((in << 24) & 0xff000000 ) |
|
||||
@@ -203,7 +217,8 @@ MEM_STATIC U64 MEM_swap64(U64 in)
|
||||
{
|
||||
#if defined(_MSC_VER) /* Visual Studio */
|
||||
return _byteswap_uint64(in);
|
||||
#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)
|
||||
#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
|
||||
|| (defined(__clang__) && __has_builtin(__builtin_bswap64))
|
||||
return __builtin_bswap64(in);
|
||||
#else
|
||||
return ((in << 56) & 0xff00000000000000ULL) |
|
||||
|
||||
@@ -22,8 +22,8 @@ extern "C" {
|
||||
****************************************/
|
||||
#if defined(_MSC_VER)
|
||||
# define _CRT_SECURE_NO_WARNINGS /* Disable Visual Studio warning messages for fopen, strncpy, strerror */
|
||||
# if (_MSC_VER <= 1800) /* 1800 == Visual Studio 2013 */
|
||||
# define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before <io.h> and <windows.h> */
|
||||
# if (_MSC_VER <= 1800) /* (1800 = Visual Studio 2013) */
|
||||
# define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */
|
||||
# endif
|
||||
#endif
|
||||
@@ -65,38 +65,55 @@ extern "C" {
|
||||
|
||||
/* ************************************************************
|
||||
* Detect POSIX version
|
||||
* PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows
|
||||
* PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX
|
||||
* PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION
|
||||
* PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows
|
||||
* PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX
|
||||
* PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION
|
||||
* Value of PLATFORM_POSIX_VERSION can be forced on command line
|
||||
***************************************************************/
|
||||
#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
|
||||
|| defined(__midipix__) || defined(__VMS))
|
||||
# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \
|
||||
#ifndef PLATFORM_POSIX_VERSION
|
||||
|
||||
# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
|
||||
|| defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */
|
||||
/* exception rule : force posix version to 200112L,
|
||||
* note: it's better to use unistd.h's _POSIX_VERSION whenever possible */
|
||||
# define PLATFORM_POSIX_VERSION 200112L
|
||||
# else
|
||||
|
||||
/* try to determine posix version through official unistd.h's _POSIX_VERSION (http://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html).
|
||||
* note : there is no simple way to know in advance if <unistd.h> is present or not on target system,
|
||||
* Posix specification mandates its presence and its content, but target system must respect this spec.
|
||||
* It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like
|
||||
* otherwise it will block preprocessing stage.
|
||||
* The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h>
|
||||
*/
|
||||
# elif !defined(_WIN32) \
|
||||
&& (defined(__unix__) || defined(__unix) \
|
||||
|| defined(__midipix__) || defined(__VMS) || defined(__HAIKU__))
|
||||
|
||||
# if defined(__linux__) || defined(__linux)
|
||||
# ifndef _POSIX_C_SOURCE
|
||||
# define _POSIX_C_SOURCE 200112L /* use feature test macro */
|
||||
# define _POSIX_C_SOURCE 200112L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */
|
||||
# endif
|
||||
# endif
|
||||
# include <unistd.h> /* declares _POSIX_VERSION */
|
||||
# if defined(_POSIX_VERSION) /* POSIX compliant */
|
||||
# define PLATFORM_POSIX_VERSION _POSIX_VERSION
|
||||
# else
|
||||
# define PLATFORM_POSIX_VERSION 1
|
||||
# endif
|
||||
|
||||
# else /* non-unix target platform (like Windows) */
|
||||
# define PLATFORM_POSIX_VERSION 0
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#if !defined(PLATFORM_POSIX_VERSION)
|
||||
# define PLATFORM_POSIX_VERSION -1
|
||||
#endif
|
||||
|
||||
#endif /* PLATFORM_POSIX_VERSION */
|
||||
|
||||
/*-*********************************************
|
||||
* Detect if isatty() and fileno() are available
|
||||
************************************************/
|
||||
#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__)
|
||||
#if (defined(__linux__) && (PLATFORM_POSIX_VERSION > 1)) \
|
||||
|| (PLATFORM_POSIX_VERSION >= 200112L) \
|
||||
|| defined(__DJGPP__) \
|
||||
|| defined(__MSYS__)
|
||||
# include <unistd.h> /* isatty */
|
||||
# define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
|
||||
#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__)
|
||||
@@ -106,8 +123,7 @@ extern "C" {
|
||||
# include <io.h> /* _isatty */
|
||||
# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
|
||||
# include <stdio.h> /* FILE */
|
||||
static __inline int IS_CONSOLE(FILE* stdStream)
|
||||
{
|
||||
static __inline int IS_CONSOLE(FILE* stdStream) {
|
||||
DWORD dummy;
|
||||
return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
|
||||
}
|
||||
@@ -117,7 +133,7 @@ static __inline int IS_CONSOLE(FILE* stdStream)
|
||||
|
||||
|
||||
/******************************
|
||||
* OS-specific Includes
|
||||
* OS-specific IO behaviors
|
||||
******************************/
|
||||
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
|
||||
# include <fcntl.h> /* _O_BINARY */
|
||||
@@ -125,7 +141,7 @@ static __inline int IS_CONSOLE(FILE* stdStream)
|
||||
# if !defined(__DJGPP__)
|
||||
# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
|
||||
# include <winioctl.h> /* FSCTL_SET_SPARSE */
|
||||
# define SET_BINARY_MODE(file) { int unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
|
||||
# define SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
|
||||
# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); }
|
||||
# else
|
||||
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
|
||||
@@ -146,6 +162,34 @@ static __inline int IS_CONSOLE(FILE* stdStream)
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef ZSTD_START_SYMBOLLIST_FRAME
|
||||
# ifdef __linux__
|
||||
# define ZSTD_START_SYMBOLLIST_FRAME 2
|
||||
# elif defined __APPLE__
|
||||
# define ZSTD_START_SYMBOLLIST_FRAME 4
|
||||
# else
|
||||
# define ZSTD_START_SYMBOLLIST_FRAME 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef ZSTD_SETPRIORITY_SUPPORT
|
||||
/* mandates presence of <sys/resource.h> and support for setpriority() : http://man7.org/linux/man-pages/man2/setpriority.2.html */
|
||||
# define ZSTD_SETPRIORITY_SUPPORT (PLATFORM_POSIX_VERSION >= 200112L)
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef ZSTD_NANOSLEEP_SUPPORT
|
||||
/* mandates support of nanosleep() within <time.h> : http://man7.org/linux/man-pages/man2/nanosleep.2.html */
|
||||
# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) \
|
||||
|| (PLATFORM_POSIX_VERSION >= 200112L)
|
||||
# define ZSTD_NANOSLEEP_SUPPORT 1
|
||||
# else
|
||||
# define ZSTD_NANOSLEEP_SUPPORT 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "mem.h" /* U32, U64 */
|
||||
#include "fl2threading.h"
|
||||
#include "fl2_threading.h"
|
||||
#include "fl2_internal.h"
|
||||
#include "radix_internal.h"
|
||||
|
||||
@@ -52,10 +52,9 @@ void RMF_bitpackLimitLengths(FL2_matchTable* const tbl, size_t const index)
|
||||
SetNull(index - 1);
|
||||
for (U32 length = 2; length < RADIX_MAX_LENGTH && length <= index; ++length) {
|
||||
U32 const link = tbl->table[index - length];
|
||||
if (link != RADIX_NULL_LINK) {
|
||||
if (link != RADIX_NULL_LINK)
|
||||
tbl->table[index - length] = (MIN(length, link >> RADIX_LINK_BITS) << RADIX_LINK_BITS) | (link & RADIX_LINK_MASK);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#include "radix_engine.h"
|
||||
@@ -9,80 +9,82 @@
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "count.h"
|
||||
|
||||
#define MAX_READ_BEYOND_DEPTH 2
|
||||
|
||||
/* If a repeating byte is found, fill that section of the table with matches of distance 1 */
|
||||
static size_t HandleRepeat(FL2_matchTable* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t const block_size, ptrdiff_t i, size_t const radix_16)
|
||||
static size_t RMF_handleRepeat(RMF_builder* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t i, U32 depth)
|
||||
{
|
||||
ptrdiff_t const rpt_index = i - (MAX_REPEAT / 2 - 2);
|
||||
ptrdiff_t rpt_end;
|
||||
/* Set the head to the first byte of the repeat and adjust the count */
|
||||
tbl->list_heads[radix_16].head = (U32)(rpt_index - 1);
|
||||
tbl->list_heads[radix_16].count -= MAX_REPEAT / 2 - 2;
|
||||
/* Find the end */
|
||||
i += ZSTD_count(data_block + i + 2, data_block + i + 1, data_block + block_size);
|
||||
rpt_end = i;
|
||||
/* Normally the last 2 bytes, but may be 4 if depth == 4 */
|
||||
ptrdiff_t const last_2 = i + MAX_REPEAT / 2 - 1;
|
||||
|
||||
/* Find the start */
|
||||
i += (4 - (i & 3)) & 3;
|
||||
U32 u = *(U32*)(data_block + i);
|
||||
while (i != 0 && *(U32*)(data_block + i - 4) == u)
|
||||
i -= 4;
|
||||
while (i != 0 && data_block[i - 1] == (BYTE)u)
|
||||
--i;
|
||||
|
||||
ptrdiff_t const rpt_index = i;
|
||||
/* No point if it's in the overlap region */
|
||||
if (i >= (ptrdiff_t)start) {
|
||||
U32 len = 2;
|
||||
if (last_2 >= (ptrdiff_t)start) {
|
||||
U32 len = depth;
|
||||
/* Set matches at distance 1 and available length */
|
||||
for (; i >= rpt_index && len <= RADIX_MAX_LENGTH; --i) {
|
||||
for (i = last_2; i > rpt_index && len <= RADIX_MAX_LENGTH; --i) {
|
||||
SetMatchLinkAndLength(i, (U32)(i - 1), len);
|
||||
++len;
|
||||
}
|
||||
/* Set matches at distance 1 and max length */
|
||||
for (; i >= rpt_index; --i) {
|
||||
for (; i > rpt_index; --i)
|
||||
SetMatchLinkAndLength(i, (U32)(i - 1), RADIX_MAX_LENGTH);
|
||||
}
|
||||
}
|
||||
return rpt_end;
|
||||
return rpt_index;
|
||||
}
|
||||
|
||||
/* If a 2-byte repeat is found, fill that section of the table with matches of distance 2 */
|
||||
static size_t HandleRepeat2(FL2_matchTable* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t const block_size, ptrdiff_t i, size_t const radix_16)
|
||||
static size_t RMF_handleRepeat2(RMF_builder* const tbl, const BYTE* const data_block, size_t const start, ptrdiff_t i, U32 depth)
|
||||
{
|
||||
size_t radix_16_rev;
|
||||
ptrdiff_t const rpt_index = i - (MAX_REPEAT - 3);
|
||||
ptrdiff_t rpt_end;
|
||||
/* Normally the last 2 bytes, but may be 4 if depth == 4 */
|
||||
ptrdiff_t const last_2 = i + MAX_REPEAT * 2U - 4;
|
||||
|
||||
/* Set the head to the first byte of the repeat and adjust the count */
|
||||
tbl->list_heads[radix_16].head = (U32)(rpt_index - 1);
|
||||
tbl->list_heads[radix_16].count -= MAX_REPEAT / 2 - 2;
|
||||
radix_16_rev = ((radix_16 >> 8) | (radix_16 << 8)) & 0xFFFF;
|
||||
tbl->list_heads[radix_16_rev].head = (U32)(rpt_index - 2);
|
||||
tbl->list_heads[radix_16_rev].count -= MAX_REPEAT / 2 - 1;
|
||||
/* Find the end */
|
||||
i += ZSTD_count(data_block + i + 2, data_block + i, data_block + block_size);
|
||||
rpt_end = i;
|
||||
/* Find the start */
|
||||
ptrdiff_t realign = i & 1;
|
||||
i += (4 - (i & 3)) & 3;
|
||||
U32 u = *(U32*)(data_block + i);
|
||||
while (i != 0 && *(U32*)(data_block + i - 4) == u)
|
||||
i -= 4;
|
||||
while (i != 0 && data_block[i - 1] == data_block[i + 1])
|
||||
--i;
|
||||
i += (i & 1) ^ realign;
|
||||
|
||||
ptrdiff_t const rpt_index = i;
|
||||
/* No point if it's in the overlap region */
|
||||
if (i >= (ptrdiff_t)start) {
|
||||
U32 len = 2;
|
||||
U32 len = depth + (data_block[last_2 + depth] == data_block[last_2]);
|
||||
/* Set matches at distance 2 and available length */
|
||||
for (; i >= rpt_index && len <= RADIX_MAX_LENGTH; --i) {
|
||||
for (i = last_2; i > rpt_index && len <= RADIX_MAX_LENGTH; i -= 2) {
|
||||
SetMatchLinkAndLength(i, (U32)(i - 2), len);
|
||||
++len;
|
||||
len += 2;
|
||||
}
|
||||
/* Set matches at distance 2 and max length */
|
||||
for (; i >= rpt_index; --i) {
|
||||
for (; i > rpt_index; i -= 2)
|
||||
SetMatchLinkAndLength(i, (U32)(i - 2), RADIX_MAX_LENGTH);
|
||||
}
|
||||
}
|
||||
return rpt_end;
|
||||
return rpt_index;
|
||||
}
|
||||
|
||||
/* Initialization for the reference algortithm */
|
||||
#ifdef RMF_REFERENCE
|
||||
static void RadixInitReference(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end)
|
||||
static void RMF_initReference(FL2_matchTable* const tbl, const void* const data, size_t const end)
|
||||
{
|
||||
const BYTE* const data_block = (const BYTE*)data;
|
||||
ptrdiff_t const block_size = end - 1;
|
||||
size_t st_index = 0;
|
||||
for (ptrdiff_t i = 0; i < block_size; ++i)
|
||||
{
|
||||
size_t radix_16 = ((size_t)data_block[i] << 8) | data_block[i + 1];
|
||||
U32 prev = tbl->list_heads[radix_16].head;
|
||||
size_t const radix_16 = ((size_t)data_block[i] << 8) | data_block[i + 1];
|
||||
U32 const prev = tbl->list_heads[radix_16].head;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
SetMatchLinkAndLength(i, prev, 2U);
|
||||
tbl->list_heads[radix_16].head = (U32)i;
|
||||
@@ -98,7 +100,6 @@ static void RadixInitReference(FL2_matchTable* const tbl, const void* const data
|
||||
SetNull(end - 1);
|
||||
tbl->end_index = (U32)st_index;
|
||||
tbl->st_index = ATOMIC_INITIAL_VALUE;
|
||||
(void)start;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -108,48 +109,43 @@ RMF_bitpackInit
|
||||
#else
|
||||
RMF_structuredInit
|
||||
#endif
|
||||
(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end)
|
||||
(FL2_matchTable* const tbl, const void* const data, size_t const end)
|
||||
{
|
||||
const BYTE* const data_block = (const BYTE*)data;
|
||||
size_t st_index = 0;
|
||||
size_t radix_16;
|
||||
ptrdiff_t const block_size = end - 2;
|
||||
ptrdiff_t rpt_total = 0;
|
||||
U32 count = 0;
|
||||
|
||||
if (end <= 2) {
|
||||
for (size_t i = 0; i < end; ++i) {
|
||||
for (size_t i = 0; i < end; ++i)
|
||||
SetNull(i);
|
||||
}
|
||||
tbl->end_index = 0;
|
||||
return 0;
|
||||
}
|
||||
#ifdef RMF_REFERENCE
|
||||
if (tbl->params.use_ref_mf) {
|
||||
RadixInitReference(tbl, data, start, end);
|
||||
RMF_initReference(tbl, data, end);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
SetNull(0);
|
||||
|
||||
const BYTE* const data_block = (const BYTE*)data;
|
||||
size_t st_index = 0;
|
||||
/* Initial 2-byte radix value */
|
||||
radix_16 = ((size_t)data_block[0] << 8) | data_block[1];
|
||||
size_t radix_16 = ((size_t)data_block[0] << 8) | data_block[1];
|
||||
tbl->stack[st_index++] = (U32)radix_16;
|
||||
tbl->list_heads[radix_16].head = 0;
|
||||
tbl->list_heads[radix_16].count = 1;
|
||||
|
||||
radix_16 = ((size_t)((BYTE)radix_16) << 8) | data_block[2];
|
||||
|
||||
ptrdiff_t rpt_total = 0;
|
||||
ptrdiff_t i = 1;
|
||||
ptrdiff_t const block_size = end - 2;
|
||||
for (; i < block_size; ++i) {
|
||||
/* Pre-load the next value for speed increase */
|
||||
/* Pre-load the next value for speed increase on some hardware. Execution can continue while memory read is pending */
|
||||
size_t const next_radix = ((size_t)((BYTE)radix_16) << 8) | data_block[i + 2];
|
||||
|
||||
U32 const prev = tbl->list_heads[radix_16].head;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
S32 dist = (S32)i - prev;
|
||||
/* Check for repeat */
|
||||
if (dist > 2) {
|
||||
count = 0;
|
||||
/* Link this position to the previous occurance */
|
||||
/* Link this position to the previous occurrence */
|
||||
InitMatchLink(i, prev);
|
||||
/* Set the previous to this position */
|
||||
tbl->list_heads[radix_16].head = (U32)i;
|
||||
@@ -157,33 +153,6 @@ RMF_structuredInit
|
||||
radix_16 = next_radix;
|
||||
}
|
||||
else {
|
||||
count += 3 - dist;
|
||||
/* Do the usual if the repeat is too short */
|
||||
if (count < MAX_REPEAT - 2) {
|
||||
InitMatchLink(i, prev);
|
||||
tbl->list_heads[radix_16].head = (U32)i;
|
||||
++tbl->list_heads[radix_16].count;
|
||||
radix_16 = next_radix;
|
||||
}
|
||||
else {
|
||||
ptrdiff_t const prev_i = i;
|
||||
/* Eliminate the repeat from the linked list to save time */
|
||||
if (dist == 1) {
|
||||
i = HandleRepeat(tbl, data_block, start, end, i, radix_16);
|
||||
rpt_total += i - prev_i + MAX_REPEAT / 2U - 1;
|
||||
}
|
||||
else {
|
||||
i = HandleRepeat2(tbl, data_block, start, end, i, radix_16);
|
||||
rpt_total += i - prev_i + MAX_REPEAT - 2;
|
||||
}
|
||||
if (i < block_size)
|
||||
radix_16 = ((size_t)data_block[i + 1] << 8) | data_block[i + 2];
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
count = 0;
|
||||
SetNull(i);
|
||||
tbl->list_heads[radix_16].head = (U32)i;
|
||||
tbl->list_heads[radix_16].count = 1;
|
||||
@@ -192,65 +161,100 @@ RMF_structuredInit
|
||||
}
|
||||
}
|
||||
/* Handle the last value */
|
||||
if (i <= block_size && tbl->list_heads[radix_16].head != RADIX_NULL_LINK) {
|
||||
if (tbl->list_heads[radix_16].head != RADIX_NULL_LINK)
|
||||
SetMatchLinkAndLength(block_size, tbl->list_heads[radix_16].head, 2);
|
||||
}
|
||||
else {
|
||||
else
|
||||
SetNull(block_size);
|
||||
}
|
||||
|
||||
/* Never a match at the last byte */
|
||||
SetNull(end - 1);
|
||||
|
||||
tbl->end_index = (U32)st_index;
|
||||
tbl->st_index = ATOMIC_INITIAL_VALUE;
|
||||
|
||||
return rpt_total;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable */
|
||||
#endif
|
||||
|
||||
|
||||
/* Copy the list into a buffer and recurse it there. This decreases cache misses and allows */
|
||||
/* data characters to be loaded every fourth pass and stored for use in the next 4 passes */
|
||||
static void RecurseListsBuffered(RMF_builder* const tbl,
|
||||
static void RMF_recurseListsBuffered(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_start,
|
||||
size_t link,
|
||||
BYTE depth,
|
||||
BYTE const max_depth,
|
||||
U32 depth,
|
||||
U32 const max_depth,
|
||||
U32 orig_list_count,
|
||||
size_t const stack_base)
|
||||
{
|
||||
if (orig_list_count < 2 || tbl->match_buffer_limit < 2)
|
||||
return;
|
||||
|
||||
/* Create an offset data buffer pointer for reading the next bytes */
|
||||
const BYTE* data_src = data_block + depth;
|
||||
size_t start = 0;
|
||||
|
||||
if (orig_list_count < 2 || tbl->match_buffer_limit < 2)
|
||||
return;
|
||||
do {
|
||||
size_t count = start;
|
||||
U32 list_count = (U32)(start + orig_list_count);
|
||||
U32 overlap;
|
||||
|
||||
if (list_count > tbl->match_buffer_limit) {
|
||||
if (list_count > tbl->match_buffer_limit)
|
||||
list_count = (U32)tbl->match_buffer_limit;
|
||||
}
|
||||
|
||||
size_t count = start;
|
||||
size_t prev_link = (size_t)-1;
|
||||
size_t rpt = 0;
|
||||
size_t rpt_tail = link;
|
||||
for (; count < list_count; ++count) {
|
||||
/* Pre-load next link */
|
||||
size_t const next_link = GetMatchLink(link);
|
||||
size_t dist = prev_link - link;
|
||||
if (dist > 2) {
|
||||
/* Get 4 data characters for later. This doesn't block on a cache miss. */
|
||||
tbl->match_buffer[count].src.u32 = MEM_read32(data_src + link);
|
||||
/* Record the actual location of this suffix */
|
||||
tbl->match_buffer[count].from = (U32)link;
|
||||
/* Initialize the next link */
|
||||
tbl->match_buffer[count].next = (U32)(count + 1) | ((U32)depth << 24);
|
||||
tbl->match_buffer[count].next = (U32)(count + 1) | (depth << 24);
|
||||
rpt = 0;
|
||||
prev_link = link;
|
||||
rpt_tail = link;
|
||||
link = next_link;
|
||||
}
|
||||
else {
|
||||
rpt += 3 - dist;
|
||||
/* Do the usual if the repeat is too short */
|
||||
if (rpt < MAX_REPEAT - 2) {
|
||||
/* Get 4 data characters for later. This doesn't block on a cache miss. */
|
||||
tbl->match_buffer[count].src.u32 = MEM_read32(data_src + link);
|
||||
/* Record the actual location of this suffix */
|
||||
tbl->match_buffer[count].from = (U32)link;
|
||||
/* Initialize the next link */
|
||||
tbl->match_buffer[count].next = (U32)(count + 1) | (depth << 24);
|
||||
prev_link = link;
|
||||
link = next_link;
|
||||
}
|
||||
else {
|
||||
/* Eliminate the repeat from the linked list to save time */
|
||||
if (dist == 1) {
|
||||
link = RMF_handleRepeat(tbl, data_block, block_start, link, depth);
|
||||
count -= MAX_REPEAT / 2;
|
||||
orig_list_count -= (U32)(rpt_tail - link);
|
||||
}
|
||||
else {
|
||||
link = RMF_handleRepeat2(tbl, data_block, block_start, link, depth);
|
||||
count -= MAX_REPEAT - 1;
|
||||
orig_list_count -= (U32)(rpt_tail - link) >> 1;
|
||||
}
|
||||
rpt = 0;
|
||||
list_count = (U32)(start + orig_list_count);
|
||||
|
||||
if (list_count > tbl->match_buffer_limit)
|
||||
list_count = (U32)tbl->match_buffer_limit;
|
||||
}
|
||||
}
|
||||
}
|
||||
count = list_count;
|
||||
/* Make the last element circular so pre-loading doesn't read past the end. */
|
||||
tbl->match_buffer[count - 1].next = (U32)(count - 1) | ((U32)depth << 24);
|
||||
overlap = 0;
|
||||
tbl->match_buffer[count - 1].next = (U32)(count - 1) | (depth << 24);
|
||||
U32 overlap = 0;
|
||||
if (list_count < (U32)(start + orig_list_count)) {
|
||||
overlap = list_count >> MATCH_BUFFER_OVERLAP;
|
||||
overlap += !overlap;
|
||||
@@ -259,23 +263,33 @@ static void RecurseListsBuffered(RMF_builder* const tbl,
|
||||
orig_list_count -= (U32)(list_count - start);
|
||||
/* Copy everything back, except the last link which never changes, and any extra overlap */
|
||||
count -= overlap + (overlap == 0);
|
||||
#ifdef RMF_BITPACK
|
||||
if (max_depth > RADIX_MAX_LENGTH) for (size_t index = 0; index < count; ++index) {
|
||||
size_t const from = tbl->match_buffer[index].from;
|
||||
if (from < block_start)
|
||||
return;
|
||||
U32 length = tbl->match_buffer[index].next >> 24;
|
||||
length = (length > RADIX_MAX_LENGTH) ? RADIX_MAX_LENGTH : length;
|
||||
size_t const next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
|
||||
SetMatchLinkAndLength(from, tbl->match_buffer[next].from, length);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
for (size_t index = 0; index < count; ++index) {
|
||||
size_t const from = tbl->match_buffer[index].from;
|
||||
if (from < block_start)
|
||||
return;
|
||||
|
||||
{ U32 length = tbl->match_buffer[index].next >> 24;
|
||||
size_t next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
|
||||
U32 const length = tbl->match_buffer[index].next >> 24;
|
||||
size_t const next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
|
||||
SetMatchLinkAndLength(from, tbl->match_buffer[next].from, length);
|
||||
}
|
||||
}
|
||||
start = 0;
|
||||
if (overlap) {
|
||||
size_t dest = 0;
|
||||
for (size_t src = list_count - overlap; src < list_count; ++src) {
|
||||
tbl->match_buffer[dest].from = tbl->match_buffer[src].from;
|
||||
tbl->match_buffer[dest].src.u32 = MEM_read32(data_src + tbl->match_buffer[src].from);
|
||||
tbl->match_buffer[dest].next = (U32)(dest + 1) | ((U32)depth << 24);
|
||||
tbl->match_buffer[dest].next = (U32)(dest + 1) | (depth << 24);
|
||||
++dest;
|
||||
}
|
||||
start = dest;
|
||||
@@ -283,30 +297,23 @@ static void RecurseListsBuffered(RMF_builder* const tbl,
|
||||
} while (orig_list_count != 0);
|
||||
}
|
||||
|
||||
/* Parse the list with bounds checks on data reads. Stop at the point where bound checks are not required. */
|
||||
/* Parse the list with an upper bound check on data reads. Stop at the point where bound checks are not required. */
|
||||
/* Buffering is used so that parsing can continue below the bound to find a few matches without altering the main table. */
|
||||
static void RecurseListsBound(RMF_builder* const tbl,
|
||||
static void RMF_recurseListsBound(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
ptrdiff_t const block_size,
|
||||
RMF_tableHead* const list_head,
|
||||
U32 const max_depth)
|
||||
U32 max_depth)
|
||||
{
|
||||
U32 list_count = list_head->count;
|
||||
if (list_count < 2)
|
||||
return;
|
||||
|
||||
ptrdiff_t link = list_head->head;
|
||||
ptrdiff_t const bounded_size = max_depth + MAX_READ_BEYOND_DEPTH;
|
||||
ptrdiff_t const bounded_start = block_size - MIN(block_size, bounded_size);
|
||||
/* Create an offset data buffer pointer for reading the next bytes */
|
||||
size_t count = 0;
|
||||
size_t extra_count = (max_depth >> 4) + 4;
|
||||
ptrdiff_t limit;
|
||||
const BYTE* data_src;
|
||||
U32 depth;
|
||||
size_t index;
|
||||
size_t st_index;
|
||||
RMF_listTail* tails_8;
|
||||
|
||||
if (list_count < 2)
|
||||
return;
|
||||
|
||||
list_count = MIN((U32)bounded_size, list_count);
|
||||
list_count = MIN(list_count, (U32)tbl->match_buffer_size);
|
||||
@@ -314,10 +321,9 @@ static void RecurseListsBound(RMF_builder* const tbl,
|
||||
ptrdiff_t next_link = GetMatchLink(link);
|
||||
if (link >= bounded_start) {
|
||||
--list_head->count;
|
||||
if (next_link < bounded_start) {
|
||||
if (next_link < bounded_start)
|
||||
list_head->head = (U32)next_link;
|
||||
}
|
||||
}
|
||||
else {
|
||||
--extra_count;
|
||||
}
|
||||
@@ -328,18 +334,20 @@ static void RecurseListsBound(RMF_builder* const tbl,
|
||||
link = next_link;
|
||||
}
|
||||
list_count = (U32)count;
|
||||
limit = block_size - 2;
|
||||
data_src = data_block + 2;
|
||||
depth = 3;
|
||||
index = 0;
|
||||
st_index = 0;
|
||||
tails_8 = tbl->tails_8;
|
||||
ptrdiff_t limit = block_size - 2;
|
||||
/* Create an offset data buffer pointer for reading the next bytes */
|
||||
const BYTE* data_src = data_block + 2;
|
||||
U32 depth = 3;
|
||||
size_t index = 0;
|
||||
size_t st_index = 0;
|
||||
RMF_listTail* const tails_8 = tbl->tails_8;
|
||||
do {
|
||||
link = tbl->match_buffer[index].from;
|
||||
if (link < limit) {
|
||||
size_t const radix_8 = data_src[link];
|
||||
/* Seen this char before? */
|
||||
const U32 prev = tails_8[radix_8].prev_index;
|
||||
U32 const prev = tails_8[radix_8].prev_index;
|
||||
tails_8[radix_8].prev_index = (U32)index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tails_8[radix_8].list_count;
|
||||
/* Link the previous occurrence to this one and record the new length */
|
||||
@@ -353,7 +361,6 @@ static void RecurseListsBound(RMF_builder* const tbl,
|
||||
tbl->stack[st_index].count = (U32)radix_8;
|
||||
++st_index;
|
||||
}
|
||||
tails_8[radix_8].prev_index = (U32)index;
|
||||
}
|
||||
++index;
|
||||
} while (index < list_count);
|
||||
@@ -368,10 +375,9 @@ static void RecurseListsBound(RMF_builder* const tbl,
|
||||
/* Pop an item off the stack */
|
||||
--st_index;
|
||||
list_count = tbl->stack[st_index].count;
|
||||
if (list_count < 2) {
|
||||
/* Nothing to match with */
|
||||
if (list_count < 2) /* Nothing to match with */
|
||||
continue;
|
||||
}
|
||||
|
||||
index = tbl->stack[st_index].head;
|
||||
depth = (tbl->match_buffer[index].next >> 24);
|
||||
if (depth >= max_depth)
|
||||
@@ -390,9 +396,10 @@ static void RecurseListsBound(RMF_builder* const tbl,
|
||||
if (link < limit) {
|
||||
size_t const radix_8 = data_src[link];
|
||||
U32 const prev = tails_8[radix_8].prev_index;
|
||||
tails_8[radix_8].prev_index = (U32)index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tails_8[radix_8].list_count;
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
else {
|
||||
tails_8[radix_8].list_count = 1;
|
||||
@@ -400,7 +407,6 @@ static void RecurseListsBound(RMF_builder* const tbl,
|
||||
tbl->stack[st_index].count = (U32)radix_8;
|
||||
++st_index;
|
||||
}
|
||||
tails_8[radix_8].prev_index = (U32)index;
|
||||
}
|
||||
index = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
|
||||
} while (--list_count != 0);
|
||||
@@ -413,20 +419,20 @@ static void RecurseListsBound(RMF_builder* const tbl,
|
||||
--count;
|
||||
for (index = 0; index < count; ++index) {
|
||||
ptrdiff_t const from = tbl->match_buffer[index].from;
|
||||
size_t next;
|
||||
U32 length;
|
||||
|
||||
if (from < bounded_start)
|
||||
break;
|
||||
length = tbl->match_buffer[index].next >> 24;
|
||||
|
||||
U32 length = tbl->match_buffer[index].next >> 24;
|
||||
length = MIN(length, (U32)(block_size - from));
|
||||
next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
|
||||
length = MIN(length, RADIX_MAX_LENGTH);
|
||||
|
||||
size_t const next = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
|
||||
SetMatchLinkAndLength(from, tbl->match_buffer[next].from, length);
|
||||
}
|
||||
}
|
||||
|
||||
/* Compare each string with all others to find the best match */
|
||||
static void BruteForce(RMF_builder* const tbl,
|
||||
static void RMF_bruteForce(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_start,
|
||||
size_t link,
|
||||
@@ -445,6 +451,7 @@ static void BruteForce(RMF_builder* const tbl,
|
||||
link = GetMatchLink(link);
|
||||
buffer[i] = link;
|
||||
} while (++i < list_count);
|
||||
|
||||
i = 0;
|
||||
do {
|
||||
size_t longest = 0;
|
||||
@@ -454,34 +461,37 @@ static void BruteForce(RMF_builder* const tbl,
|
||||
do {
|
||||
const BYTE* data_2 = data_src + buffer[j];
|
||||
size_t len_test = 0;
|
||||
while (data[len_test] == data_2[len_test] && len_test < limit) {
|
||||
while (data[len_test] == data_2[len_test] && len_test < limit)
|
||||
++len_test;
|
||||
}
|
||||
|
||||
if (len_test > longest) {
|
||||
longest_index = j;
|
||||
longest = len_test;
|
||||
if (len_test >= limit) {
|
||||
if (len_test >= limit)
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (++j < list_count);
|
||||
if (longest > 0) {
|
||||
SetMatchLinkAndLength(buffer[i],
|
||||
(U32)buffer[longest_index],
|
||||
depth + (U32)longest);
|
||||
}
|
||||
|
||||
if (longest > 0)
|
||||
SetMatchLinkAndLength(buffer[i], (U32)buffer[longest_index], depth + (U32)longest);
|
||||
|
||||
++i;
|
||||
/* Test with block_start to avoid wasting time matching strings in the overlap region with each other */
|
||||
} while (i < list_count - 1 && buffer[i] >= block_start);
|
||||
}
|
||||
|
||||
static void RecurseLists16(RMF_builder* const tbl,
|
||||
/* RMF_recurseLists16() :
|
||||
* Match strings at depth 2 using a 16-bit radix to lengthen to depth 4
|
||||
*/
|
||||
static void RMF_recurseLists16(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_start,
|
||||
size_t link,
|
||||
U32 count,
|
||||
U32 const max_depth)
|
||||
{
|
||||
/* Offset data pointer. This method is only called at depth 2 */
|
||||
U32 const table_max_depth = MIN(max_depth, RADIX_MAX_LENGTH);
|
||||
/* Offset data pointer. This function is only called at depth 2 */
|
||||
const BYTE* const data_src = data_block + 2;
|
||||
/* Load radix values from the data chars */
|
||||
size_t next_radix_8 = data_src[link];
|
||||
@@ -489,7 +499,6 @@ static void RecurseLists16(RMF_builder* const tbl,
|
||||
size_t reset_list[RADIX8_TABLE_SIZE];
|
||||
size_t reset_count = 0;
|
||||
size_t st_index = 0;
|
||||
U32 prev;
|
||||
/* Last one is done separately */
|
||||
--count;
|
||||
do
|
||||
@@ -504,7 +513,8 @@ static void RecurseLists16(RMF_builder* const tbl,
|
||||
next_radix_8 = data_src[next_link];
|
||||
next_radix_16 = next_radix_8 + ((size_t)(data_src[next_link + 1]) << 8);
|
||||
|
||||
prev = tbl->tails_8[radix_8].prev_index;
|
||||
U32 prev = tbl->tails_8[radix_8].prev_index;
|
||||
tbl->tails_8[radix_8].prev_index = (U32)link;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
/* Link the previous occurrence to this one at length 3. */
|
||||
/* This will be overwritten if a 4 is found. */
|
||||
@@ -513,9 +523,9 @@ static void RecurseLists16(RMF_builder* const tbl,
|
||||
else {
|
||||
reset_list[reset_count++] = radix_8;
|
||||
}
|
||||
tbl->tails_8[radix_8].prev_index = (U32)link;
|
||||
|
||||
prev = tbl->tails_16[radix_16].prev_index;
|
||||
tbl->tails_16[radix_16].prev_index = (U32)link;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tbl->tails_16[radix_16].list_count;
|
||||
/* Link at length 4, overwriting the 3 */
|
||||
@@ -524,35 +534,35 @@ static void RecurseLists16(RMF_builder* const tbl,
|
||||
else {
|
||||
tbl->tails_16[radix_16].list_count = 1;
|
||||
tbl->stack[st_index].head = (U32)link;
|
||||
/* Store a reference to this table location to retrieve the count at the end */
|
||||
tbl->stack[st_index].count = (U32)radix_16;
|
||||
++st_index;
|
||||
}
|
||||
tbl->tails_16[radix_16].prev_index = (U32)link;
|
||||
link = next_link;
|
||||
} while (--count > 0);
|
||||
|
||||
/* Do the last location */
|
||||
prev = tbl->tails_8[next_radix_8].prev_index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
U32 prev = tbl->tails_8[next_radix_8].prev_index;
|
||||
if (prev != RADIX_NULL_LINK)
|
||||
SetMatchLinkAndLength(prev, (U32)link, 3);
|
||||
}
|
||||
|
||||
prev = tbl->tails_16[next_radix_16].prev_index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tbl->tails_16[next_radix_16].list_count;
|
||||
SetMatchLinkAndLength(prev, (U32)link, 4);
|
||||
}
|
||||
for (size_t i = 0; i < reset_count; ++i) {
|
||||
|
||||
for (size_t i = 0; i < reset_count; ++i)
|
||||
tbl->tails_8[reset_list[i]].prev_index = RADIX_NULL_LINK;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < st_index; ++i) {
|
||||
tbl->tails_16[tbl->stack[i].count].prev_index = RADIX_NULL_LINK;
|
||||
tbl->stack[i].count = tbl->tails_16[tbl->stack[i].count].list_count;
|
||||
}
|
||||
while (st_index > 0) {
|
||||
U32 list_count;
|
||||
U32 depth;
|
||||
|
||||
while (st_index > 0) {
|
||||
--st_index;
|
||||
list_count = tbl->stack[st_index].count;
|
||||
U32 const list_count = tbl->stack[st_index].count;
|
||||
if (list_count < 2) {
|
||||
/* Nothing to do */
|
||||
continue;
|
||||
@@ -567,19 +577,19 @@ static void RecurseLists16(RMF_builder* const tbl,
|
||||
continue;
|
||||
}
|
||||
/* The current depth */
|
||||
depth = GetMatchLength(link);
|
||||
U32 const depth = GetMatchLength(link);
|
||||
if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) {
|
||||
/* Quicker to use brute force, each string compared with all previous strings */
|
||||
BruteForce(tbl, data_block,
|
||||
RMF_bruteForce(tbl, data_block,
|
||||
block_start,
|
||||
link,
|
||||
list_count,
|
||||
depth,
|
||||
max_depth);
|
||||
table_max_depth);
|
||||
continue;
|
||||
}
|
||||
/* Send to the buffer at depth 4 */
|
||||
RecurseListsBuffered(tbl,
|
||||
RMF_recurseListsBuffered(tbl,
|
||||
data_block,
|
||||
block_start,
|
||||
link,
|
||||
@@ -591,7 +601,10 @@ static void RecurseLists16(RMF_builder* const tbl,
|
||||
}
|
||||
|
||||
#if 0
|
||||
static void RecurseListsUnbuf16(RMF_builder* const tbl,
|
||||
/* Unbuffered complete processing to max_depth.
|
||||
* This may be faster on CPUs without a large memory cache.
|
||||
*/
|
||||
static void RMF_recurseListsUnbuf16(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_start,
|
||||
size_t link,
|
||||
@@ -607,7 +620,6 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
|
||||
size_t reset_list[RADIX8_TABLE_SIZE];
|
||||
size_t reset_count = 0;
|
||||
size_t st_index = 0;
|
||||
U32 prev;
|
||||
/* Last one is done separately */
|
||||
--count;
|
||||
do
|
||||
@@ -620,7 +632,7 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
|
||||
size_t radix_16 = next_radix_16;
|
||||
next_radix_8 = data_src[next_link];
|
||||
next_radix_16 = next_radix_8 + ((size_t)(data_src[next_link + 1]) << 8);
|
||||
prev = tails_8[radix_8].prev_index;
|
||||
U32 prev = tails_8[radix_8].prev_index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
/* Link the previous occurrence to this one at length 3. */
|
||||
/* This will be overwritten if a 4 is found. */
|
||||
@@ -646,7 +658,7 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
|
||||
link = next_link;
|
||||
} while (--count > 0);
|
||||
/* Do the last location */
|
||||
prev = tails_8[next_radix_8].prev_index;
|
||||
U32 prev = tails_8[next_radix_8].prev_index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
SetMatchLinkAndLength(prev, (U32)link, 3);
|
||||
}
|
||||
@@ -683,7 +695,7 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
|
||||
U32 depth = GetMatchLength(link);
|
||||
if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) {
|
||||
/* Quicker to use brute force, each string compared with all previous strings */
|
||||
BruteForce(tbl, data_block,
|
||||
RMF_bruteForce(tbl, data_block,
|
||||
block_start,
|
||||
link,
|
||||
list_count,
|
||||
@@ -800,7 +812,7 @@ static void RecurseListsUnbuf16(RMF_builder* const tbl,
|
||||
#ifdef RMF_REFERENCE
|
||||
|
||||
/* Simple, slow, complete parsing for reference */
|
||||
static void RecurseListsReference(RMF_builder* const tbl,
|
||||
static void RMF_recurseListsReference(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_size,
|
||||
size_t link,
|
||||
@@ -836,12 +848,8 @@ static void RecurseListsReference(RMF_builder* const tbl,
|
||||
}
|
||||
memset(tbl->tails_8, 0xFF, sizeof(tbl->tails_8));
|
||||
while (st_index > 0) {
|
||||
U32 list_count;
|
||||
U32 depth;
|
||||
size_t prev_st_index;
|
||||
|
||||
--st_index;
|
||||
list_count = tbl->stack[st_index].count;
|
||||
U32 list_count = tbl->stack[st_index].count;
|
||||
if (list_count < 2) {
|
||||
/* Nothing to do */
|
||||
continue;
|
||||
@@ -854,14 +862,14 @@ static void RecurseListsReference(RMF_builder* const tbl,
|
||||
}
|
||||
link = tbl->stack[st_index].head;
|
||||
/* The current depth */
|
||||
depth = GetMatchLength(link);
|
||||
U32 depth = GetMatchLength(link);
|
||||
if (depth >= max_depth)
|
||||
continue;
|
||||
data_src = data_block + depth;
|
||||
limit = block_size - depth;
|
||||
/* Next depth for 1 extra char */
|
||||
++depth;
|
||||
prev_st_index = st_index;
|
||||
size_t prev_st_index = st_index;
|
||||
do {
|
||||
if (link < limit) {
|
||||
size_t const radix_8 = data_src[link];
|
||||
@@ -890,21 +898,29 @@ static void RecurseListsReference(RMF_builder* const tbl,
|
||||
#endif /* RMF_REFERENCE */
|
||||
|
||||
/* Atomically take a list from the head table */
|
||||
static ptrdiff_t RMF_getNextList(FL2_matchTable* const tbl, unsigned const multi_thread)
|
||||
static ptrdiff_t RMF_getNextList_mt(FL2_matchTable* const tbl)
|
||||
{
|
||||
if (tbl->st_index < tbl->end_index) {
|
||||
long index = multi_thread ? FL2_atomic_increment(tbl->st_index) : FL2_nonAtomic_increment(tbl->st_index);
|
||||
if (index < tbl->end_index) {
|
||||
long index = FL2_atomic_increment(tbl->st_index);
|
||||
if (index < tbl->end_index)
|
||||
return index;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define UPDATE_INTERVAL 0x40000U
|
||||
/* Non-atomically take a list from the head table */
|
||||
static ptrdiff_t RMF_getNextList_st(FL2_matchTable* const tbl)
|
||||
{
|
||||
if (tbl->st_index < tbl->end_index) {
|
||||
long index = FL2_nonAtomic_increment(tbl->st_index);
|
||||
if (index < tbl->end_index)
|
||||
return index;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Iterate the head table concurrently with other threads, and recurse each list until max_depth is reached */
|
||||
int
|
||||
void
|
||||
#ifdef RMF_BITPACK
|
||||
RMF_bitpackBuildTable
|
||||
#else
|
||||
@@ -913,69 +929,58 @@ RMF_structuredBuildTable
|
||||
(FL2_matchTable* const tbl,
|
||||
size_t const job,
|
||||
unsigned const multi_thread,
|
||||
FL2_dataBlock const block,
|
||||
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done)
|
||||
FL2_dataBlock const block)
|
||||
{
|
||||
if (!block.end)
|
||||
return 0;
|
||||
U64 const enc_size = block.end - block.start;
|
||||
if (block.end == 0)
|
||||
return;
|
||||
|
||||
unsigned const best = !tbl->params.divide_and_conquer;
|
||||
unsigned const max_depth = MIN(tbl->params.depth, RADIX_MAX_LENGTH) & ~1;
|
||||
size_t const bounded_start = block.end - max_depth - MAX_READ_BEYOND_DEPTH;
|
||||
ptrdiff_t next_progress = 0;
|
||||
size_t update = UPDATE_INTERVAL;
|
||||
size_t total = init_done;
|
||||
unsigned const max_depth = MIN(tbl->params.depth, STRUCTURED_MAX_LENGTH) & ~1;
|
||||
size_t bounded_start = max_depth + MAX_READ_BEYOND_DEPTH;
|
||||
bounded_start = block.end - MIN(block.end, bounded_start);
|
||||
ptrdiff_t next_progress = (job == 0) ? 0 : RADIX16_TABLE_SIZE;
|
||||
ptrdiff_t(*getNextList)(FL2_matchTable* const tbl)
|
||||
= multi_thread ? RMF_getNextList_mt : RMF_getNextList_st;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* Get the next to process */
|
||||
ptrdiff_t index = RMF_getNextList(tbl, multi_thread);
|
||||
RMF_tableHead list_head;
|
||||
ptrdiff_t index = getNextList(tbl);
|
||||
|
||||
if (index < 0) {
|
||||
if (index < 0)
|
||||
break;
|
||||
}
|
||||
if (progress) {
|
||||
|
||||
while (next_progress < index) {
|
||||
total += tbl->list_heads[tbl->stack[next_progress]].count;
|
||||
/* initial value of next_progress ensures only thread 0 executes this */
|
||||
tbl->progress += tbl->list_heads[tbl->stack[next_progress]].count;
|
||||
++next_progress;
|
||||
}
|
||||
if (total >= update) {
|
||||
if (progress((size_t)((total * enc_size / block.end * weight) >> 4), opaque)) {
|
||||
FL2_atomic_add(tbl->st_index, RADIX16_TABLE_SIZE);
|
||||
return 1;
|
||||
}
|
||||
update = total + UPDATE_INTERVAL;
|
||||
}
|
||||
}
|
||||
index = tbl->stack[index];
|
||||
list_head = tbl->list_heads[index];
|
||||
RMF_tableHead list_head = tbl->list_heads[index];
|
||||
tbl->list_heads[index].head = RADIX_NULL_LINK;
|
||||
if (list_head.count < 2 || list_head.head < block.start) {
|
||||
if (list_head.count < 2 || list_head.head < block.start)
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef RMF_REFERENCE
|
||||
if (tbl->params.use_ref_mf) {
|
||||
RecurseListsReference(tbl->builders[job], block.data, block.end, list_head.head, list_head.count, max_depth);
|
||||
RMF_recurseListsReference(tbl->builders[job], block.data, block.end, list_head.head, list_head.count, max_depth);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (list_head.head >= bounded_start) {
|
||||
RecurseListsBound(tbl->builders[job], block.data, block.end, &list_head, (BYTE)max_depth);
|
||||
if (list_head.count < 2 || list_head.head < block.start) {
|
||||
RMF_recurseListsBound(tbl->builders[job], block.data, block.end, &list_head, max_depth);
|
||||
if (list_head.count < 2 || list_head.head < block.start)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (best && list_head.count > tbl->builders[job]->match_buffer_limit)
|
||||
{
|
||||
/* Not worth buffering or too long */
|
||||
RecurseLists16(tbl->builders[job], block.data, block.start, list_head.head, list_head.count, max_depth);
|
||||
RMF_recurseLists16(tbl->builders[job], block.data, block.start, list_head.head, list_head.count, max_depth);
|
||||
}
|
||||
else {
|
||||
RecurseListsBuffered(tbl->builders[job], block.data, block.start, list_head.head, 2, (BYTE)max_depth, list_head.count, 0);
|
||||
RMF_recurseListsBuffered(tbl->builders[job], block.data, block.start, list_head.head, 2, (BYTE)max_depth, list_head.count, 0);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -984,28 +989,24 @@ RMF_bitpackIntegrityCheck
|
||||
#else
|
||||
RMF_structuredIntegrityCheck
|
||||
#endif
|
||||
(const FL2_matchTable* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth)
|
||||
(const FL2_matchTable* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned max_depth)
|
||||
{
|
||||
max_depth &= ~1;
|
||||
int err = 0;
|
||||
for (index += !index; index < end; ++index) {
|
||||
U32 link;
|
||||
U32 length;
|
||||
U32 len_test;
|
||||
U32 limit;
|
||||
|
||||
if (IsNull(index))
|
||||
continue;
|
||||
link = GetMatchLink(index);
|
||||
U32 const link = GetMatchLink(index);
|
||||
if (link >= index) {
|
||||
printf("Forward link at %X to %u\r\n", (U32)index, link);
|
||||
err = 1;
|
||||
continue;
|
||||
}
|
||||
length = GetMatchLength(index);
|
||||
U32 const length = GetMatchLength(index);
|
||||
if (index && length < RADIX_MAX_LENGTH && link - 1 == GetMatchLink(index - 1) && length + 1 == GetMatchLength(index - 1))
|
||||
continue;
|
||||
len_test = 0;
|
||||
limit = MIN((U32)(end - index), RADIX_MAX_LENGTH);
|
||||
U32 len_test = 0;
|
||||
U32 const limit = MIN((U32)(end - index), RADIX_MAX_LENGTH);
|
||||
for (; len_test < limit && data[link + len_test] == data[index + len_test]; ++len_test) {
|
||||
}
|
||||
if (len_test < length) {
|
||||
@@ -1013,63 +1014,8 @@ RMF_structuredIntegrityCheck
|
||||
err = 1;
|
||||
}
|
||||
if (length < max_depth && len_test > length)
|
||||
/* These occur occasionally due to splitting of chains in the buffer when long repeats are present */
|
||||
printf("Shortened match at %X: %u of %u\r\n", (U32)index, length, len_test);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
static size_t ExtendMatch(const FL2_matchTable* const tbl,
|
||||
const BYTE* const data,
|
||||
ptrdiff_t const start_index,
|
||||
ptrdiff_t const limit,
|
||||
U32 const link,
|
||||
size_t const length)
|
||||
{
|
||||
ptrdiff_t end_index = start_index + length;
|
||||
ptrdiff_t const dist = start_index - link;
|
||||
while (end_index < limit && end_index - (ptrdiff_t)GetMatchLink(end_index) == dist) {
|
||||
end_index += GetMatchLength(end_index);
|
||||
}
|
||||
if (end_index >= limit) {
|
||||
return limit - start_index;
|
||||
}
|
||||
while (end_index < limit && data[end_index - dist] == data[end_index]) {
|
||||
++end_index;
|
||||
}
|
||||
return end_index - start_index;
|
||||
}
|
||||
|
||||
size_t
|
||||
#ifdef RMF_BITPACK
|
||||
RMF_bitpackGetMatch
|
||||
#else
|
||||
RMF_structuredGetMatch
|
||||
#endif
|
||||
(const FL2_matchTable* const tbl,
|
||||
const BYTE* const data,
|
||||
size_t const index,
|
||||
size_t const limit,
|
||||
unsigned const max_depth,
|
||||
size_t* const offset_ptr)
|
||||
{
|
||||
size_t length;
|
||||
size_t dist;
|
||||
U32 link;
|
||||
if (IsNull(index))
|
||||
return 0;
|
||||
link = GetMatchLink(index);
|
||||
length = GetMatchLength(index);
|
||||
if (length < 2)
|
||||
return 0;
|
||||
dist = index - link;
|
||||
*offset_ptr = dist;
|
||||
if (length > limit - index)
|
||||
return limit - index;
|
||||
if (length == max_depth
|
||||
|| length == RADIX_MAX_LENGTH /* from HandleRepeat */)
|
||||
{
|
||||
length = ExtendMatch(tbl, data, index, limit, link, length);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
210
C/fast-lzma2/radix_get.h
Normal file
210
C/fast-lzma2/radix_get.h
Normal file
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Conor McCarthy
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#ifndef FL2_RADIX_GET_H_
|
||||
#define FL2_RADIX_GET_H_
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
U32 length;
|
||||
U32 dist;
|
||||
} RMF_match;
|
||||
|
||||
static size_t RMF_bitpackExtendMatch(const BYTE* const data,
|
||||
const U32* const table,
|
||||
ptrdiff_t const start_index,
|
||||
ptrdiff_t limit,
|
||||
U32 const link,
|
||||
size_t const length)
|
||||
{
|
||||
ptrdiff_t end_index = start_index + length;
|
||||
ptrdiff_t const dist = start_index - link;
|
||||
|
||||
if (limit > start_index + (ptrdiff_t)kMatchLenMax)
|
||||
limit = start_index + kMatchLenMax;
|
||||
|
||||
while (end_index < limit && end_index - (ptrdiff_t)(table[end_index] & RADIX_LINK_MASK) == dist)
|
||||
end_index += table[end_index] >> RADIX_LINK_BITS;
|
||||
|
||||
if (end_index >= limit) {
|
||||
DEBUGLOG(7, "RMF_bitpackExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(limit - start_index));
|
||||
return limit - start_index;
|
||||
}
|
||||
|
||||
while (end_index < limit && data[end_index - dist] == data[end_index])
|
||||
++end_index;
|
||||
|
||||
DEBUGLOG(7, "RMF_bitpackExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(end_index - start_index));
|
||||
return end_index - start_index;
|
||||
}
|
||||
|
||||
#define GetMatchLink(table, index) ((const RMF_unit*)(table))[(index) >> UNIT_BITS].links[(index) & UNIT_MASK]
|
||||
|
||||
#define GetMatchLength(table, index) ((const RMF_unit*)(table))[(index) >> UNIT_BITS].lengths[(index) & UNIT_MASK]
|
||||
|
||||
static size_t RMF_structuredExtendMatch(const BYTE* const data,
|
||||
const U32* const table,
|
||||
ptrdiff_t const start_index,
|
||||
ptrdiff_t limit,
|
||||
U32 const link,
|
||||
size_t const length)
|
||||
{
|
||||
ptrdiff_t end_index = start_index + length;
|
||||
ptrdiff_t const dist = start_index - link;
|
||||
|
||||
if (limit > start_index + (ptrdiff_t)kMatchLenMax)
|
||||
limit = start_index + kMatchLenMax;
|
||||
|
||||
while (end_index < limit && end_index - (ptrdiff_t)GetMatchLink(table, end_index) == dist)
|
||||
end_index += GetMatchLength(table, end_index);
|
||||
|
||||
if (end_index >= limit) {
|
||||
DEBUGLOG(7, "RMF_structuredExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(limit - start_index));
|
||||
return limit - start_index;
|
||||
}
|
||||
|
||||
while (end_index < limit && data[end_index - dist] == data[end_index])
|
||||
++end_index;
|
||||
|
||||
DEBUGLOG(7, "RMF_structuredExtendMatch : pos %u, link %u, init length %u, full length %u", (U32)start_index, link, (U32)length, (U32)(end_index - start_index));
|
||||
return end_index - start_index;
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
RMF_match RMF_getMatch(FL2_dataBlock block,
|
||||
FL2_matchTable* tbl,
|
||||
unsigned max_depth,
|
||||
int structTbl,
|
||||
size_t index)
|
||||
{
|
||||
if (structTbl)
|
||||
{
|
||||
U32 const link = GetMatchLink(tbl->table, index);
|
||||
|
||||
RMF_match match;
|
||||
match.length = 0;
|
||||
|
||||
if (link == RADIX_NULL_LINK)
|
||||
return match;
|
||||
|
||||
size_t const length = GetMatchLength(tbl->table, index);
|
||||
size_t const dist = index - link - 1;
|
||||
|
||||
if (length > block.end - index)
|
||||
match.length = (U32)(block.end - index);
|
||||
else if (length == max_depth || length == STRUCTURED_MAX_LENGTH /* from HandleRepeat */)
|
||||
match.length = (U32)RMF_structuredExtendMatch(block.data, tbl->table, index, block.end, link, length);
|
||||
else
|
||||
match.length = (U32)length;
|
||||
|
||||
match.dist = (U32)dist;
|
||||
|
||||
return match;
|
||||
}
|
||||
else {
|
||||
U32 link = tbl->table[index];
|
||||
|
||||
RMF_match match;
|
||||
match.length = 0;
|
||||
|
||||
if (link == RADIX_NULL_LINK)
|
||||
return match;
|
||||
|
||||
size_t const length = link >> RADIX_LINK_BITS;
|
||||
link &= RADIX_LINK_MASK;
|
||||
size_t const dist = index - link - 1;
|
||||
|
||||
if (length > block.end - index)
|
||||
match.length = (U32)(block.end - index);
|
||||
else if (length == max_depth || length == BITPACK_MAX_LENGTH /* from HandleRepeat */)
|
||||
match.length = (U32)RMF_bitpackExtendMatch(block.data, tbl->table, index, block.end, link, length);
|
||||
else
|
||||
match.length = (U32)length;
|
||||
|
||||
match.dist = (U32)dist;
|
||||
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE
|
||||
RMF_match RMF_getNextMatch(FL2_dataBlock block,
|
||||
FL2_matchTable* tbl,
|
||||
unsigned max_depth,
|
||||
int structTbl,
|
||||
size_t index)
|
||||
{
|
||||
if (structTbl)
|
||||
{
|
||||
U32 const link = GetMatchLink(tbl->table, index);
|
||||
|
||||
RMF_match match;
|
||||
match.length = 0;
|
||||
|
||||
if (link == RADIX_NULL_LINK)
|
||||
return match;
|
||||
|
||||
size_t const length = GetMatchLength(tbl->table, index);
|
||||
size_t const dist = index - link - 1;
|
||||
|
||||
/* same distance, one byte shorter */
|
||||
if (link - 1 == GetMatchLink(tbl->table, index - 1))
|
||||
return match;
|
||||
|
||||
if (length > block.end - index)
|
||||
match.length = (U32)(block.end - index);
|
||||
else if (length == max_depth || length == STRUCTURED_MAX_LENGTH /* from HandleRepeat */)
|
||||
match.length = (U32)RMF_structuredExtendMatch(block.data, tbl->table, index, block.end, link, length);
|
||||
else
|
||||
match.length = (U32)length;
|
||||
|
||||
match.dist = (U32)dist;
|
||||
|
||||
return match;
|
||||
}
|
||||
else {
|
||||
U32 link = tbl->table[index];
|
||||
|
||||
RMF_match match;
|
||||
match.length = 0;
|
||||
|
||||
if (link == RADIX_NULL_LINK)
|
||||
return match;
|
||||
|
||||
size_t const length = link >> RADIX_LINK_BITS;
|
||||
link &= RADIX_LINK_MASK;
|
||||
size_t const dist = index - link - 1;
|
||||
|
||||
/* same distance, one byte shorter */
|
||||
if (link - 1 == (tbl->table[index - 1] & RADIX_LINK_MASK))
|
||||
return match;
|
||||
|
||||
if (length > block.end - index)
|
||||
match.length = (U32)(block.end - index);
|
||||
else if (length == max_depth || length == BITPACK_MAX_LENGTH /* from HandleRepeat */)
|
||||
match.length = (U32)RMF_bitpackExtendMatch(block.data, tbl->table, index, block.end, link, length);
|
||||
else
|
||||
match.length = (U32)length;
|
||||
|
||||
match.dist = (U32)dist;
|
||||
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* FL2_RADIX_GET_H_ */
|
||||
@@ -14,6 +14,10 @@
|
||||
#include "atomic.h"
|
||||
#include "radix_mf.h"
|
||||
|
||||
#if defined(FL2_XZ_BUILD) && defined(TUKLIB_FAST_UNALIGNED_ACCESS)
|
||||
# define MEM_read32(a) (*(const U32*)(a))
|
||||
#endif
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -21,26 +25,27 @@ extern "C" {
|
||||
#define DICTIONARY_LOG_MIN 12U
|
||||
#define DICTIONARY_LOG_MAX_64 30U
|
||||
#define DICTIONARY_LOG_MAX_32 27U
|
||||
#define DEFAULT_BUFFER_LOG 8U
|
||||
#define DEFAULT_BLOCK_OVERLAP 2U
|
||||
#define DEFAULT_SEARCH_DEPTH 32U
|
||||
#define DEFAULT_DIVIDEANDCONQUER 1
|
||||
#define MAX_REPEAT 32
|
||||
#define RADIX16_TABLE_SIZE (1UL << 16)
|
||||
#define RADIX8_TABLE_SIZE (1UL << 8)
|
||||
#define DICTIONARY_SIZE_MIN ((size_t)1 << DICTIONARY_LOG_MIN)
|
||||
#define DICTIONARY_SIZE_MAX_64 ((size_t)1 << DICTIONARY_LOG_MAX_64)
|
||||
#define DICTIONARY_SIZE_MAX_32 ((size_t)1 << DICTIONARY_LOG_MAX_32)
|
||||
#define MAX_REPEAT 24
|
||||
#define RADIX16_TABLE_SIZE ((size_t)1 << 16)
|
||||
#define RADIX8_TABLE_SIZE ((size_t)1 << 8)
|
||||
#define STACK_SIZE (RADIX16_TABLE_SIZE * 3)
|
||||
#define MAX_BRUTE_FORCE_LIST_SIZE 5
|
||||
#define BUFFER_LINK_MASK 0xFFFFFFU
|
||||
#define MATCH_BUFFER_OVERLAP 6
|
||||
#define BITPACK_MAX_LENGTH 63UL
|
||||
#define STRUCTURED_MAX_LENGTH 255UL
|
||||
#define BITPACK_MAX_LENGTH 63U
|
||||
#define STRUCTURED_MAX_LENGTH 255U
|
||||
|
||||
#define RADIX_LINK_BITS 26
|
||||
#define RADIX_LINK_MASK ((1UL << RADIX_LINK_BITS) - 1)
|
||||
#define RADIX_NULL_LINK 0xFFFFFFFFUL
|
||||
#define RADIX_LINK_MASK ((1U << RADIX_LINK_BITS) - 1)
|
||||
#define RADIX_NULL_LINK 0xFFFFFFFFU
|
||||
|
||||
#define UNIT_BITS 2
|
||||
#define UNIT_MASK ((1UL << UNIT_BITS) - 1)
|
||||
#define UNIT_MASK ((1U << UNIT_BITS) - 1)
|
||||
|
||||
#define RADIX_CANCEL_INDEX (long)(RADIX16_TABLE_SIZE + FL2_MAXTHREADS + 2)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -88,9 +93,10 @@ struct FL2_matchTable_s
|
||||
{
|
||||
FL2_atomic st_index;
|
||||
long end_index;
|
||||
int isStruct;
|
||||
int allocStruct;
|
||||
int is_struct;
|
||||
int alloc_struct;
|
||||
unsigned thread_count;
|
||||
size_t progress;
|
||||
RMF_parameters params;
|
||||
RMF_builder** builders;
|
||||
U32 stack[RADIX16_TABLE_SIZE];
|
||||
@@ -98,27 +104,25 @@ struct FL2_matchTable_s
|
||||
U32 table[1];
|
||||
};
|
||||
|
||||
size_t RMF_bitpackInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const start, size_t const end);
|
||||
size_t RMF_structuredInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const start, size_t const end);
|
||||
int RMF_bitpackBuildTable(struct FL2_matchTable_s* const tbl,
|
||||
size_t RMF_bitpackInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const end);
|
||||
size_t RMF_structuredInit(struct FL2_matchTable_s* const tbl, const void* data, size_t const end);
|
||||
void RMF_bitpackBuildTable(struct FL2_matchTable_s* const tbl,
|
||||
size_t const job,
|
||||
unsigned const multi_thread,
|
||||
FL2_dataBlock const block,
|
||||
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done);
|
||||
int RMF_structuredBuildTable(struct FL2_matchTable_s* const tbl,
|
||||
FL2_dataBlock const block);
|
||||
void RMF_structuredBuildTable(struct FL2_matchTable_s* const tbl,
|
||||
size_t const job,
|
||||
unsigned const multi_thread,
|
||||
FL2_dataBlock const block,
|
||||
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done);
|
||||
FL2_dataBlock const block);
|
||||
void RMF_recurseListChunk(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_start,
|
||||
BYTE const depth,
|
||||
BYTE const max_depth,
|
||||
U32 const depth,
|
||||
U32 const max_depth,
|
||||
U32 const list_count,
|
||||
size_t const stack_base);
|
||||
int RMF_bitpackIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth);
|
||||
int RMF_structuredIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned const max_depth);
|
||||
int RMF_bitpackIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned max_depth);
|
||||
int RMF_structuredIntegrityCheck(const struct FL2_matchTable_s* const tbl, const BYTE* const data, size_t index, size_t const end, unsigned max_depth);
|
||||
void RMF_bitpackLimitLengths(struct FL2_matchTable_s* const tbl, size_t const index);
|
||||
void RMF_structuredLimitLengths(struct FL2_matchTable_s* const tbl, size_t const index);
|
||||
BYTE* RMF_bitpackAsOutputBuffer(struct FL2_matchTable_s* const tbl, size_t const index);
|
||||
|
||||
@@ -11,21 +11,20 @@
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include <stdlib.h> /* malloc, free */
|
||||
#include "fast-lzma2.h"
|
||||
#include "fl2_errors.h"
|
||||
#include "mem.h" /* U32, U64, MEM_64bits */
|
||||
#include "fl2_internal.h"
|
||||
#include "radix_internal.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" /* warning: 'rpt_head_next' may be used uninitialized in this function */
|
||||
# pragma GCC diagnostic ignored "-Wmaybe-uninitialized" /* warning: 'rpt_head_next' may be used uninitialized in this function */
|
||||
#elif defined(_MSC_VER)
|
||||
# pragma warning(disable : 4701) /* disable: C4701: potentially uninitialized local variable */
|
||||
# pragma warning(disable : 4701) /* warning: 'rpt_head_next' may be used uninitialized in this function */
|
||||
#endif
|
||||
|
||||
#define MIN_MATCH_BUFFER_SIZE 256U /* min buffer size at least FL2_SEARCH_DEPTH_MAX + 2 for bounded build */
|
||||
#define MAX_MATCH_BUFFER_SIZE (1UL << 24) /* max buffer size constrained by 24-bit link values */
|
||||
|
||||
#define REPEAT_CHECK_TABLE ((1 << 1) | (1 << 2) | (1 << 4) | (1 << 8) | (1 << 16) | (1ULL << 32))
|
||||
|
||||
static void RMF_initTailTable(RMF_builder* const tbl)
|
||||
{
|
||||
for (size_t i = 0; i < RADIX8_TABLE_SIZE; i += 2) {
|
||||
@@ -43,89 +42,107 @@ static RMF_builder* RMF_createBuilder(size_t match_buffer_size)
|
||||
match_buffer_size = MIN(match_buffer_size, MAX_MATCH_BUFFER_SIZE);
|
||||
match_buffer_size = MAX(match_buffer_size, MIN_MATCH_BUFFER_SIZE);
|
||||
|
||||
{ RMF_builder* const builder = (RMF_builder*)malloc(
|
||||
RMF_builder* const builder = malloc(
|
||||
sizeof(RMF_builder) + (match_buffer_size - 1) * sizeof(RMF_buildMatch));
|
||||
|
||||
if (builder == NULL)
|
||||
return NULL;
|
||||
|
||||
builder->match_buffer_size = match_buffer_size;
|
||||
builder->match_buffer_limit = match_buffer_size;
|
||||
|
||||
RMF_initTailTable(builder);
|
||||
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
static void RMF_freeBuilderTable(RMF_builder** const builders, unsigned const size)
|
||||
{
|
||||
if (builders == NULL)
|
||||
return;
|
||||
for (unsigned i = 0; i < size; ++i) {
|
||||
|
||||
for (unsigned i = 0; i < size; ++i)
|
||||
free(builders[i]);
|
||||
}
|
||||
|
||||
free(builders);
|
||||
}
|
||||
|
||||
static RMF_builder** RMF_createBuilderTable(U32* const matchTable, size_t const match_buffer_size, unsigned const max_len, unsigned const size)
|
||||
/* RMF_createBuilderTable() :
|
||||
* Create one match table builder object per thread.
|
||||
* max_len : maximum match length supported by the table structure
|
||||
* size : number of threads
|
||||
*/
|
||||
static RMF_builder** RMF_createBuilderTable(U32* const match_table, size_t const match_buffer_size, unsigned const max_len, unsigned const size)
|
||||
{
|
||||
RMF_builder** builders = (RMF_builder**)malloc(size * sizeof(RMF_builder*));
|
||||
DEBUGLOG(3, "RMF_createBuilderTable : match_buffer_size %u, builders %u", (U32)match_buffer_size, size);
|
||||
|
||||
RMF_builder** const builders = malloc(size * sizeof(RMF_builder*));
|
||||
|
||||
if (builders == NULL)
|
||||
return NULL;
|
||||
|
||||
for (unsigned i = 0; i < size; ++i)
|
||||
builders[i] = NULL;
|
||||
|
||||
for (unsigned i = 0; i < size; ++i) {
|
||||
builders[i] = RMF_createBuilder(match_buffer_size);
|
||||
if (builders[i] == NULL) {
|
||||
RMF_freeBuilderTable(builders, i);
|
||||
return NULL;
|
||||
}
|
||||
builders[i]->table = matchTable;
|
||||
builders[i]->table = match_table;
|
||||
builders[i]->max_len = max_len;
|
||||
}
|
||||
return builders;
|
||||
}
|
||||
|
||||
static int RMF_isStruct(unsigned dictionary_log, unsigned depth)
|
||||
static int RMF_isStruct(size_t const dictionary_size)
|
||||
{
|
||||
return dictionary_log > RADIX_LINK_BITS || depth > BITPACK_MAX_LENGTH;
|
||||
return dictionary_size > ((size_t)1 << RADIX_LINK_BITS);
|
||||
}
|
||||
|
||||
static int RMF_isStructParam(const RMF_parameters* const params)
|
||||
{
|
||||
return RMF_isStruct(params->dictionary_log, params->depth);
|
||||
}
|
||||
|
||||
/** RMF_clampCParams() :
|
||||
* make CParam values within valid range.
|
||||
* @return : valid CParams */
|
||||
/* RMF_clampParams() :
|
||||
* Make param values within valid range.
|
||||
* Return : valid RMF_parameters */
|
||||
static RMF_parameters RMF_clampParams(RMF_parameters params)
|
||||
{
|
||||
# define CLAMP(val,min,max) { \
|
||||
if (val<(min)) val=(min); \
|
||||
else if (val>(max)) val=(max); \
|
||||
}
|
||||
CLAMP(params.dictionary_log, DICTIONARY_LOG_MIN, MEM_64bits() ? DICTIONARY_LOG_MAX_64 : DICTIONARY_LOG_MAX_32);
|
||||
CLAMP(params.match_buffer_log, FL2_BUFFER_SIZE_LOG_MIN, FL2_BUFFER_SIZE_LOG_MAX);
|
||||
CLAMP(params.overlap_fraction, FL2_BLOCK_OVERLAP_MIN, FL2_BLOCK_OVERLAP_MAX);
|
||||
CLAMP(params.dictionary_size, DICTIONARY_SIZE_MIN, MEM_64bits() ? DICTIONARY_SIZE_MAX_64 : DICTIONARY_SIZE_MAX_32);
|
||||
CLAMP(params.match_buffer_log, RMF_BUFFER_LOG_MIN, RMF_BUFFER_LOG_MAX);
|
||||
if (params.overlap_fraction > FL2_BLOCK_OVERLAP_MAX)
|
||||
params.overlap_fraction = FL2_BLOCK_OVERLAP_MAX;
|
||||
CLAMP(params.depth, FL2_SEARCH_DEPTH_MIN, FL2_SEARCH_DEPTH_MAX);
|
||||
return params;
|
||||
# undef CLAMP
|
||||
}
|
||||
|
||||
/* RMF_applyParameters_internal() :
|
||||
* Set parameters to those specified.
|
||||
* Create a builder table if none exists. Free an existing one if incompatible.
|
||||
* Set match_buffer_limit and max supported match length.
|
||||
* Returns an error if dictionary won't fit.
|
||||
*/
|
||||
static size_t RMF_applyParameters_internal(FL2_matchTable* const tbl, const RMF_parameters* const params)
|
||||
{
|
||||
int const isStruct = RMF_isStructParam(params);
|
||||
unsigned const dictionary_log = tbl->params.dictionary_log;
|
||||
int const is_struct = RMF_isStruct(params->dictionary_size);
|
||||
size_t const dictionary_size = tbl->params.dictionary_size;
|
||||
/* dictionary is allocated with the struct and is immutable */
|
||||
if (params->dictionary_log > tbl->params.dictionary_log
|
||||
|| (params->dictionary_log == tbl->params.dictionary_log && isStruct > tbl->allocStruct))
|
||||
if (params->dictionary_size > tbl->params.dictionary_size
|
||||
|| (params->dictionary_size == tbl->params.dictionary_size && is_struct > tbl->alloc_struct))
|
||||
return FL2_ERROR(parameter_unsupported);
|
||||
|
||||
{ size_t const match_buffer_size = (size_t)1 << (params->dictionary_log - params->match_buffer_log);
|
||||
size_t const match_buffer_size = params->dictionary_size >> params->match_buffer_log;
|
||||
tbl->params = *params;
|
||||
tbl->params.dictionary_log = dictionary_log;
|
||||
tbl->isStruct = isStruct;
|
||||
tbl->params.dictionary_size = dictionary_size;
|
||||
tbl->is_struct = is_struct;
|
||||
if (tbl->builders == NULL
|
||||
|| match_buffer_size > tbl->builders[0]->match_buffer_size)
|
||||
{
|
||||
RMF_freeBuilderTable(tbl->builders, tbl->thread_count);
|
||||
tbl->builders = RMF_createBuilderTable(tbl->table, match_buffer_size, tbl->isStruct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH, tbl->thread_count);
|
||||
tbl->builders = RMF_createBuilderTable(tbl->table, match_buffer_size, tbl->is_struct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH, tbl->thread_count);
|
||||
if (tbl->builders == NULL) {
|
||||
return FL2_ERROR(memory_allocation);
|
||||
}
|
||||
@@ -133,56 +150,67 @@ static size_t RMF_applyParameters_internal(FL2_matchTable* const tbl, const RMF_
|
||||
else {
|
||||
for (unsigned i = 0; i < tbl->thread_count; ++i) {
|
||||
tbl->builders[i]->match_buffer_limit = match_buffer_size;
|
||||
tbl->builders[i]->max_len = tbl->isStruct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH;
|
||||
}
|
||||
tbl->builders[i]->max_len = tbl->is_struct ? STRUCTURED_MAX_LENGTH : BITPACK_MAX_LENGTH;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* RMF_reduceDict() :
|
||||
* Reduce dictionary and match buffer size if the total input size is known and < dictionary_size.
|
||||
*/
|
||||
static void RMF_reduceDict(RMF_parameters* const params, size_t const dict_reduce)
|
||||
{
|
||||
if (dict_reduce)
|
||||
while (params->dictionary_log > DICTIONARY_LOG_MIN && (size_t)1 << (params->dictionary_log - 1) >= dict_reduce) {
|
||||
--params->dictionary_log;
|
||||
params->match_buffer_log = MAX(params->match_buffer_log - 1, FL2_BUFFER_SIZE_LOG_MIN);
|
||||
if (dict_reduce) {
|
||||
for (size_t dict_size = params->dictionary_size; dict_size > DICTIONARY_SIZE_MIN && (dict_size >> 1) >= dict_reduce; dict_size >>= 1) {
|
||||
/* Use unchanged match buffer size for reduced dict */
|
||||
params->match_buffer_log = MAX(params->match_buffer_log - 1, RMF_BUFFER_LOG_MIN);
|
||||
}
|
||||
params->dictionary_size = MIN(params->dictionary_size, MAX(dict_reduce, DICTIONARY_SIZE_MIN));
|
||||
}
|
||||
}
|
||||
|
||||
FL2_matchTable* RMF_createMatchTable(const RMF_parameters* const p, size_t const dict_reduce, unsigned const thread_count)
|
||||
static void RMF_initListHeads(FL2_matchTable* const tbl)
|
||||
{
|
||||
int isStruct;
|
||||
size_t dictionary_size;
|
||||
size_t table_bytes;
|
||||
FL2_matchTable* tbl;
|
||||
RMF_parameters params = RMF_clampParams(*p);
|
||||
|
||||
RMF_reduceDict(¶ms, dict_reduce);
|
||||
isStruct = RMF_isStructParam(¶ms);
|
||||
dictionary_size = (size_t)1 << params.dictionary_log;
|
||||
|
||||
DEBUGLOG(3, "RMF_createMatchTable : isStruct %d, dict %u", isStruct, (U32)dictionary_size);
|
||||
|
||||
table_bytes = isStruct ? ((dictionary_size + 3U) / 4U) * sizeof(RMF_unit)
|
||||
: dictionary_size * sizeof(U32);
|
||||
tbl = (FL2_matchTable*)malloc(
|
||||
sizeof(FL2_matchTable) + table_bytes - sizeof(U32));
|
||||
if (!tbl) return NULL;
|
||||
|
||||
tbl->isStruct = isStruct;
|
||||
tbl->allocStruct = isStruct;
|
||||
tbl->thread_count = thread_count + !thread_count;
|
||||
tbl->params = params;
|
||||
tbl->builders = NULL;
|
||||
|
||||
RMF_applyParameters_internal(tbl, ¶ms);
|
||||
|
||||
for (size_t i = 0; i < RADIX16_TABLE_SIZE; i += 2) {
|
||||
tbl->list_heads[i].head = RADIX_NULL_LINK;
|
||||
tbl->list_heads[i].count = 0;
|
||||
tbl->list_heads[i + 1].head = RADIX_NULL_LINK;
|
||||
tbl->list_heads[i + 1].count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* RMF_createMatchTable() :
|
||||
* Create a match table. Reduce the dict size to input size if possible.
|
||||
* A thread_count of 0 will be raised to 1.
|
||||
*/
|
||||
FL2_matchTable* RMF_createMatchTable(const RMF_parameters* const p, size_t const dict_reduce, unsigned const thread_count)
|
||||
{
|
||||
RMF_parameters params = RMF_clampParams(*p);
|
||||
RMF_reduceDict(¶ms, dict_reduce);
|
||||
|
||||
int const is_struct = RMF_isStruct(params.dictionary_size);
|
||||
size_t dictionary_size = params.dictionary_size;
|
||||
|
||||
DEBUGLOG(3, "RMF_createMatchTable : is_struct %d, dict %u", is_struct, (U32)dictionary_size);
|
||||
|
||||
size_t const table_bytes = is_struct ? ((dictionary_size + 3U) / 4U) * sizeof(RMF_unit)
|
||||
: dictionary_size * sizeof(U32);
|
||||
FL2_matchTable* const tbl = malloc(sizeof(FL2_matchTable) + table_bytes - sizeof(U32));
|
||||
if (!tbl) return NULL;
|
||||
|
||||
tbl->is_struct = is_struct;
|
||||
tbl->alloc_struct = is_struct;
|
||||
tbl->thread_count = thread_count + !thread_count;
|
||||
tbl->params = params;
|
||||
tbl->builders = NULL;
|
||||
|
||||
RMF_applyParameters_internal(tbl, ¶ms);
|
||||
|
||||
RMF_initListHeads(tbl);
|
||||
|
||||
RMF_initProgress(tbl);
|
||||
|
||||
return tbl;
|
||||
}
|
||||
|
||||
@@ -190,7 +218,9 @@ void RMF_freeMatchTable(FL2_matchTable* const tbl)
|
||||
{
|
||||
if (tbl == NULL)
|
||||
return;
|
||||
|
||||
DEBUGLOG(3, "RMF_freeMatchTable");
|
||||
|
||||
RMF_freeBuilderTable(tbl->builders, tbl->thread_count);
|
||||
free(tbl);
|
||||
}
|
||||
@@ -199,8 +229,8 @@ BYTE RMF_compatibleParameters(const FL2_matchTable* const tbl, const RMF_paramet
|
||||
{
|
||||
RMF_parameters params = RMF_clampParams(*p);
|
||||
RMF_reduceDict(¶ms, dict_reduce);
|
||||
return tbl->params.dictionary_log > params.dictionary_log
|
||||
|| (tbl->params.dictionary_log == params.dictionary_log && tbl->allocStruct >= RMF_isStructParam(¶ms));
|
||||
return tbl->params.dictionary_size > params.dictionary_size
|
||||
|| (tbl->params.dictionary_size == params.dictionary_size && tbl->alloc_struct >= RMF_isStruct(params.dictionary_size));
|
||||
}
|
||||
|
||||
size_t RMF_applyParameters(FL2_matchTable* const tbl, const RMF_parameters* const p, size_t const dict_reduce)
|
||||
@@ -215,18 +245,25 @@ size_t RMF_threadCount(const FL2_matchTable* const tbl)
|
||||
return tbl->thread_count;
|
||||
}
|
||||
|
||||
size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end)
|
||||
void RMF_initProgress(FL2_matchTable * const tbl)
|
||||
{
|
||||
DEBUGLOG(5, "RMF_initTable : start %u, size %u", (U32)start, (U32)end);
|
||||
if (tbl->isStruct) {
|
||||
return RMF_structuredInit(tbl, data, start, end);
|
||||
}
|
||||
else {
|
||||
return RMF_bitpackInit(tbl, data, start, end);
|
||||
}
|
||||
if (tbl != NULL)
|
||||
tbl->progress = 0;
|
||||
}
|
||||
|
||||
static void HandleRepeat(RMF_buildMatch* const match_buffer,
|
||||
size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const end)
|
||||
{
|
||||
DEBUGLOG(5, "RMF_initTable : size %u", (U32)end);
|
||||
|
||||
tbl->st_index = ATOMIC_INITIAL_VALUE;
|
||||
|
||||
if (tbl->is_struct)
|
||||
return RMF_structuredInit(tbl, data, end);
|
||||
else
|
||||
return RMF_bitpackInit(tbl, data, end);
|
||||
}
|
||||
|
||||
static void RMF_handleRepeat(RMF_buildMatch* const match_buffer,
|
||||
const BYTE* const data_block,
|
||||
size_t const next,
|
||||
U32 count,
|
||||
@@ -235,20 +272,22 @@ static void HandleRepeat(RMF_buildMatch* const match_buffer,
|
||||
U32 const max_len)
|
||||
{
|
||||
size_t index = next;
|
||||
size_t next_i;
|
||||
U32 length = depth + rpt_len;
|
||||
|
||||
const BYTE* const data = data_block + match_buffer[index].from;
|
||||
const BYTE* const data_2 = data - rpt_len;
|
||||
|
||||
while (data[length] == data_2[length] && length < max_len)
|
||||
++length;
|
||||
|
||||
for (; length <= max_len && count; --count) {
|
||||
next_i = match_buffer[index].next & 0xFFFFFF;
|
||||
size_t next_i = match_buffer[index].next & 0xFFFFFF;
|
||||
match_buffer[index].next = (U32)next_i | (length << 24);
|
||||
length += rpt_len;
|
||||
index = next_i;
|
||||
}
|
||||
for (; count; --count) {
|
||||
next_i = match_buffer[index].next & 0xFFFFFF;
|
||||
size_t next_i = match_buffer[index].next & 0xFFFFFF;
|
||||
match_buffer[index].next = (U32)next_i | (max_len << 24);
|
||||
index = next_i;
|
||||
}
|
||||
@@ -261,27 +300,29 @@ typedef struct
|
||||
union src_data_u src;
|
||||
} BruteForceMatch;
|
||||
|
||||
static void BruteForceBuffered(RMF_builder* const tbl,
|
||||
static void RMF_bruteForceBuffered(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_start,
|
||||
size_t index,
|
||||
size_t list_count,
|
||||
size_t const list_count,
|
||||
size_t const slot,
|
||||
size_t const depth,
|
||||
size_t const max_depth)
|
||||
{
|
||||
BruteForceMatch buffer[MAX_BRUTE_FORCE_LIST_SIZE + 1];
|
||||
const BYTE* data_src = data_block + depth;
|
||||
size_t limit = max_depth - depth;
|
||||
const BYTE* start = data_src + block_start;
|
||||
const BYTE* const data_src = data_block + depth;
|
||||
size_t const limit = max_depth - depth;
|
||||
const BYTE* const start = data_src + block_start;
|
||||
size_t i = 0;
|
||||
for (;;) {
|
||||
/* Load all locations from the match buffer */
|
||||
buffer[i].index = index;
|
||||
buffer[i].data_src = data_src + tbl->match_buffer[index].from;
|
||||
buffer[i].src.u32 = tbl->match_buffer[index].src.u32;
|
||||
if (++i >= list_count) {
|
||||
|
||||
if (++i >= list_count)
|
||||
break;
|
||||
}
|
||||
|
||||
index = tbl->match_buffer[index].next & 0xFFFFFF;
|
||||
}
|
||||
i = 0;
|
||||
@@ -289,28 +330,29 @@ static void BruteForceBuffered(RMF_builder* const tbl,
|
||||
size_t longest = 0;
|
||||
size_t j = i + 1;
|
||||
size_t longest_index = j;
|
||||
const BYTE* data = buffer[i].data_src;
|
||||
const BYTE* const data = buffer[i].data_src;
|
||||
do {
|
||||
/* Begin with the remaining chars pulled from the match buffer */
|
||||
size_t len_test = slot;
|
||||
while (len_test < 4 && buffer[i].src.chars[len_test] == buffer[j].src.chars[len_test] && len_test - slot < limit) {
|
||||
while (len_test < 4 && buffer[i].src.chars[len_test] == buffer[j].src.chars[len_test] && len_test - slot < limit)
|
||||
++len_test;
|
||||
}
|
||||
|
||||
len_test -= slot;
|
||||
if (len_test) {
|
||||
/* Complete the match length count in the raw input buffer */
|
||||
const BYTE* data_2 = buffer[j].data_src;
|
||||
while (data[len_test] == data_2[len_test] && len_test < limit) {
|
||||
while (data[len_test] == data_2[len_test] && len_test < limit)
|
||||
++len_test;
|
||||
}
|
||||
}
|
||||
if (len_test > longest) {
|
||||
longest_index = j;
|
||||
longest = len_test;
|
||||
if (len_test >= limit) {
|
||||
if (len_test >= limit)
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (++j < list_count);
|
||||
if (longest > 0) {
|
||||
/* If the existing match was extended, store the new link and length info in the match buffer */
|
||||
index = buffer[i].index;
|
||||
tbl->match_buffer[index].next = (U32)(buffer[longest_index].index | ((depth + longest) << 24));
|
||||
}
|
||||
@@ -318,17 +360,19 @@ static void BruteForceBuffered(RMF_builder* const tbl,
|
||||
} while (i < list_count - 1 && buffer[i].data_src >= start);
|
||||
}
|
||||
|
||||
/* Lengthen and divide buffered chains into smaller chains, save them on a stack and process in turn.
|
||||
* The match finder spends most of its time here.
|
||||
*/
|
||||
FORCE_INLINE_TEMPLATE
|
||||
void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_start,
|
||||
BYTE depth,
|
||||
BYTE const max_depth,
|
||||
U32 depth,
|
||||
U32 const max_depth,
|
||||
U32 list_count,
|
||||
size_t const stack_base)
|
||||
{
|
||||
/* Create an offset data buffer pointer for reading the next bytes */
|
||||
const BYTE base_depth = depth;
|
||||
U32 const base_depth = depth;
|
||||
size_t st_index = stack_base;
|
||||
size_t index = 0;
|
||||
++depth;
|
||||
@@ -338,10 +382,11 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
size_t const radix_8 = tbl->match_buffer[index].src.chars[0];
|
||||
/* Seen this char before? */
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tbl->tails_8[radix_8].list_count;
|
||||
/* Link the previous occurrence to this one and record the new length */
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
else {
|
||||
tbl->tails_8[radix_8].list_count = 1;
|
||||
@@ -351,7 +396,6 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
tbl->stack[st_index].count = (U32)radix_8;
|
||||
++st_index;
|
||||
}
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
++index;
|
||||
} while (index < list_count);
|
||||
|
||||
@@ -361,7 +405,7 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tbl->tails_8[radix_8].list_count;
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
}
|
||||
/* Convert radix values on the stack to counts and reset any used tail slots */
|
||||
@@ -370,11 +414,6 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
tbl->stack[j].count = (U32)tbl->tails_8[tbl->stack[j].count].list_count;
|
||||
}
|
||||
while (st_index > stack_base) {
|
||||
const BYTE* data_src;
|
||||
size_t link;
|
||||
size_t slot;
|
||||
U32 test;
|
||||
|
||||
/* Pop an item off the stack */
|
||||
--st_index;
|
||||
list_count = tbl->stack[st_index].count;
|
||||
@@ -383,7 +422,7 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
continue;
|
||||
}
|
||||
index = tbl->stack[st_index].head;
|
||||
link = tbl->match_buffer[index].from;
|
||||
size_t link = tbl->match_buffer[index].from;
|
||||
if (link < block_start) {
|
||||
/* Chain starts in the overlap region which is already encoded */
|
||||
continue;
|
||||
@@ -396,10 +435,11 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
continue;
|
||||
}
|
||||
depth = tbl->match_buffer[index].next >> 24;
|
||||
slot = (depth - base_depth) & 3;
|
||||
/* Index into the 4-byte pre-loaded input char cache */
|
||||
size_t slot = (depth - base_depth) & 3;
|
||||
if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE) {
|
||||
/* Quicker to use brute force, each string compared with all previous strings */
|
||||
BruteForceBuffered(tbl,
|
||||
RMF_bruteForceBuffered(tbl,
|
||||
data_block,
|
||||
block_start,
|
||||
index,
|
||||
@@ -409,35 +449,41 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
max_depth);
|
||||
continue;
|
||||
}
|
||||
/* check for repeats at depth 4,8,16,32 etc */
|
||||
test = max_depth != 6 && ((depth & 3) == 0) && ((REPEAT_CHECK_TABLE >> ((depth >> 2) & 31)) & 1) && (max_depth >= depth + (depth >> 1));
|
||||
/* check for repeats at depth 4,8,16,32 etc unless depth is near max_depth */
|
||||
U32 const test = max_depth != 6 && ((depth & 3) == 0)
|
||||
&& (depth & (depth - 1)) == 0
|
||||
&& (max_depth >= depth + (depth >> 1));
|
||||
++depth;
|
||||
/* Update the offset data buffer pointer */
|
||||
data_src = data_block + depth;
|
||||
/* Create an offset data buffer pointer for reading the next bytes */
|
||||
const BYTE* const data_src = data_block + depth;
|
||||
/* Last pass is done separately */
|
||||
if (!test && depth < max_depth) {
|
||||
size_t const prev_st_index = st_index;
|
||||
/* Last element done separately */
|
||||
--list_count;
|
||||
/* slot is the char cache index. If 3 then chars need to be loaded. */
|
||||
/* If slot is 3 then chars need to be loaded. */
|
||||
if (slot == 3 && max_depth != 6) do {
|
||||
size_t const radix_8 = tbl->match_buffer[index].src.chars[3];
|
||||
size_t const next_index = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
|
||||
/* Pre-load the next link and data bytes to avoid waiting for RAM access */
|
||||
/* Pre-load the next link and data bytes. On some hardware execution can continue
|
||||
* ahead while the data is retrieved if no operations except move are done on the data. */
|
||||
tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link);
|
||||
size_t const next_link = tbl->match_buffer[next_index].from;
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
if (prev!=RADIX_NULL_LINK) {
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
/* This char has occurred before in the chain. Link the previous (> index) occurance with this */
|
||||
++tbl->tails_8[radix_8].list_count;
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
else {
|
||||
/* First occurrence in the chain */
|
||||
tbl->tails_8[radix_8].list_count = 1;
|
||||
tbl->stack[st_index].head = (U32)index;
|
||||
/* Save the char as a reference to load the count at the end */
|
||||
tbl->stack[st_index].count = (U32)radix_8;
|
||||
++st_index;
|
||||
}
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
index = next_index;
|
||||
link = next_link;
|
||||
} while (--list_count != 0);
|
||||
@@ -447,9 +493,10 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
/* Pre-load the next link to avoid waiting for RAM access */
|
||||
size_t const next_link = tbl->match_buffer[next_index].from;
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tbl->tails_8[radix_8].list_count;
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
else {
|
||||
tbl->tails_8[radix_8].list_count = 1;
|
||||
@@ -457,20 +504,18 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
tbl->stack[st_index].count = (U32)radix_8;
|
||||
++st_index;
|
||||
}
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
index = next_index;
|
||||
link = next_link;
|
||||
} while (--list_count != 0);
|
||||
|
||||
{ size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
|
||||
size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
if (slot == 3) {
|
||||
if (slot == 3)
|
||||
tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link);
|
||||
}
|
||||
|
||||
++tbl->tails_8[radix_8].list_count;
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
}
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
for (size_t j = prev_st_index; j < st_index; ++j) {
|
||||
tbl->tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK;
|
||||
@@ -490,14 +535,15 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
size_t const next_index = tbl->match_buffer[index].next & BUFFER_LINK_MASK;
|
||||
size_t const next_link = tbl->match_buffer[next_index].from;
|
||||
if ((link - next_link) > rpt_depth) {
|
||||
if (rpt > 0) {
|
||||
HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
|
||||
}
|
||||
if (rpt > 0)
|
||||
RMF_handleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
|
||||
|
||||
rpt = -1;
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tbl->tails_8[radix_8].list_count;
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
else {
|
||||
tbl->tails_8[radix_8].list_count = 1;
|
||||
@@ -505,23 +551,23 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
tbl->stack[st_index].count = (U32)radix_8;
|
||||
++st_index;
|
||||
}
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
index = next_index;
|
||||
link = next_link;
|
||||
}
|
||||
else {
|
||||
U32 const dist = (U32)(link - next_link);
|
||||
if (rpt < 0 || dist != rpt_dist) {
|
||||
if (rpt > 0) {
|
||||
HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
|
||||
}
|
||||
if (rpt > 0)
|
||||
RMF_handleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
|
||||
|
||||
rpt = 0;
|
||||
rpt_head_next = next_index;
|
||||
rpt_dist = dist;
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
++tbl->tails_8[radix_8].list_count;
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
else {
|
||||
tbl->tails_8[radix_8].list_count = 1;
|
||||
@@ -529,7 +575,6 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
tbl->stack[st_index].count = (U32)radix_8;
|
||||
++st_index;
|
||||
}
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
}
|
||||
else {
|
||||
++rpt;
|
||||
@@ -538,19 +583,18 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
link = next_link;
|
||||
}
|
||||
} while (--list_count != 0);
|
||||
if (rpt > 0) {
|
||||
HandleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
|
||||
}
|
||||
|
||||
{ size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
|
||||
if (rpt > 0)
|
||||
RMF_handleRepeat(tbl->match_buffer, data_block, rpt_head_next, rpt, rpt_dist, rpt_depth, tbl->max_len);
|
||||
|
||||
size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
if (slot == 3) {
|
||||
tbl->match_buffer[index].src.u32 = MEM_read32(data_src + link);
|
||||
}
|
||||
++tbl->tails_8[radix_8].list_count;
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
}
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
for (size_t j = prev_st_index; j < st_index; ++j) {
|
||||
tbl->tails_8[tbl->stack[j].count].prev_index = RADIX_NULL_LINK;
|
||||
@@ -558,7 +602,7 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
}
|
||||
}
|
||||
else {
|
||||
size_t prev_st_index = st_index;
|
||||
size_t const prev_st_index = st_index;
|
||||
/* The last pass at max_depth */
|
||||
do {
|
||||
size_t const radix_8 = tbl->match_buffer[index].src.chars[slot];
|
||||
@@ -567,14 +611,14 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
/* The last element in tbl->match_buffer is circular so this is never an access violation. */
|
||||
size_t const next_link = tbl->match_buffer[next_index].from;
|
||||
U32 const prev = tbl->tails_8[radix_8].prev_index;
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
if (prev != RADIX_NULL_LINK) {
|
||||
tbl->match_buffer[prev].next = (U32)index | ((U32)depth << 24);
|
||||
tbl->match_buffer[prev].next = (U32)index | (depth << 24);
|
||||
}
|
||||
else {
|
||||
tbl->stack[st_index].count = (U32)radix_8;
|
||||
++st_index;
|
||||
}
|
||||
tbl->tails_8[radix_8].prev_index = (U32)index;
|
||||
index = next_index;
|
||||
link = next_link;
|
||||
} while (--list_count != 0);
|
||||
@@ -589,84 +633,81 @@ void RMF_recurseListChunk_generic(RMF_builder* const tbl,
|
||||
void RMF_recurseListChunk(RMF_builder* const tbl,
|
||||
const BYTE* const data_block,
|
||||
size_t const block_start,
|
||||
BYTE const depth,
|
||||
BYTE const max_depth,
|
||||
U32 const depth,
|
||||
U32 const max_depth,
|
||||
U32 const list_count,
|
||||
size_t const stack_base)
|
||||
{
|
||||
if (max_depth > 6) {
|
||||
if (list_count < 2)
|
||||
return;
|
||||
/* Template-like inline functions */
|
||||
if (list_count <= MAX_BRUTE_FORCE_LIST_SIZE)
|
||||
RMF_bruteForceBuffered(tbl, data_block, block_start, 0, list_count, 0, depth, max_depth);
|
||||
else if (max_depth > 6)
|
||||
RMF_recurseListChunk_generic(tbl, data_block, block_start, depth, max_depth, list_count, stack_base);
|
||||
}
|
||||
else {
|
||||
else
|
||||
RMF_recurseListChunk_generic(tbl, data_block, block_start, depth, 6, list_count, stack_base);
|
||||
}
|
||||
}
|
||||
|
||||
/* Iterate the head table concurrently with other threads, and recurse each list until max_depth is reached */
|
||||
int RMF_buildTable(FL2_matchTable* const tbl,
|
||||
size_t const job,
|
||||
unsigned const multi_thread,
|
||||
FL2_dataBlock const block,
|
||||
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done)
|
||||
FL2_dataBlock const block)
|
||||
{
|
||||
DEBUGLOG(5, "RMF_buildTable : thread %u", (U32)job);
|
||||
if (tbl->isStruct) {
|
||||
return RMF_structuredBuildTable(tbl, job, multi_thread, block, progress, opaque, weight, init_done);
|
||||
}
|
||||
else {
|
||||
return RMF_bitpackBuildTable(tbl, job, multi_thread, block, progress, opaque, weight, init_done);
|
||||
|
||||
if (tbl->is_struct)
|
||||
RMF_structuredBuildTable(tbl, job, multi_thread, block);
|
||||
else
|
||||
RMF_bitpackBuildTable(tbl, job, multi_thread, block);
|
||||
|
||||
if (job == 0 && tbl->st_index >= RADIX_CANCEL_INDEX) {
|
||||
RMF_initListHeads(tbl);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RMF_cancelBuild(FL2_matchTable * const tbl)
|
||||
{
|
||||
if(tbl != NULL)
|
||||
FL2_atomic_add(tbl->st_index, RADIX_CANCEL_INDEX - ATOMIC_INITIAL_VALUE);
|
||||
}
|
||||
|
||||
void RMF_resetIncompleteBuild(FL2_matchTable * const tbl)
|
||||
{
|
||||
RMF_initListHeads(tbl);
|
||||
}
|
||||
|
||||
int RMF_integrityCheck(const FL2_matchTable* const tbl, const BYTE* const data, size_t const index, size_t const end, unsigned const max_depth)
|
||||
{
|
||||
if (tbl->isStruct) {
|
||||
if (tbl->is_struct)
|
||||
return RMF_structuredIntegrityCheck(tbl, data, index, end, max_depth);
|
||||
}
|
||||
else {
|
||||
else
|
||||
return RMF_bitpackIntegrityCheck(tbl, data, index, end, max_depth);
|
||||
}
|
||||
}
|
||||
|
||||
size_t RMF_getMatch(FL2_matchTable* const tbl,
|
||||
const BYTE* const data,
|
||||
size_t const index,
|
||||
size_t const limit,
|
||||
unsigned max_depth,
|
||||
size_t* const offset_ptr)
|
||||
{
|
||||
if (tbl->isStruct) {
|
||||
return RMF_structuredGetMatch(tbl, data, index, limit, max_depth, offset_ptr);
|
||||
}
|
||||
else {
|
||||
return RMF_bitpackGetMatch(tbl, data, index, limit, max_depth, offset_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void RMF_limitLengths(FL2_matchTable* const tbl, size_t const index)
|
||||
{
|
||||
if (tbl->isStruct) {
|
||||
if (tbl->is_struct)
|
||||
RMF_structuredLimitLengths(tbl, index);
|
||||
}
|
||||
else {
|
||||
else
|
||||
RMF_bitpackLimitLengths(tbl, index);
|
||||
}
|
||||
}
|
||||
|
||||
BYTE* RMF_getTableAsOutputBuffer(FL2_matchTable* const tbl, size_t const index)
|
||||
{
|
||||
if (tbl->isStruct) {
|
||||
if (tbl->is_struct)
|
||||
return RMF_structuredAsOutputBuffer(tbl, index);
|
||||
}
|
||||
else {
|
||||
else
|
||||
return RMF_bitpackAsOutputBuffer(tbl, index);
|
||||
}
|
||||
}
|
||||
|
||||
size_t RMF_memoryUsage(unsigned const dict_log, unsigned const buffer_log, unsigned const depth, unsigned thread_count)
|
||||
size_t RMF_memoryUsage(size_t const dict_size, unsigned const buffer_log, unsigned const thread_count)
|
||||
{
|
||||
size_t size = (size_t)(4U + RMF_isStruct(dict_log, depth)) << dict_log;
|
||||
U32 buf_size = (U32)1 << (dict_log - buffer_log);
|
||||
size_t size = (size_t)(4U + RMF_isStruct(dict_size)) * dict_size;
|
||||
size_t const buf_size = dict_size >> buffer_log;
|
||||
size += ((buf_size - 1) * sizeof(RMF_buildMatch) + sizeof(RMF_builder)) * thread_count;
|
||||
return size;
|
||||
}
|
||||
|
||||
@@ -20,16 +20,19 @@ extern "C" {
|
||||
|
||||
typedef struct FL2_matchTable_s FL2_matchTable;
|
||||
|
||||
#define OVERLAP_FROM_DICT_LOG(d, o) (((size_t)1 << ((d) - 4)) * (o))
|
||||
#define OVERLAP_FROM_DICT_SIZE(d, o) (((d) >> 4) * (o))
|
||||
|
||||
#define RMF_MIN_BYTES_PER_THREAD 1024
|
||||
|
||||
#define RMF_BUFFER_LOG_BASE 12
|
||||
#define RMF_BUFFER_LOG_MIN 6
|
||||
#define RMF_BUFFER_LOG_MAX 12
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned dictionary_log;
|
||||
size_t dictionary_size;
|
||||
unsigned match_buffer_log;
|
||||
unsigned overlap_fraction;
|
||||
unsigned block_size_log;
|
||||
unsigned divide_and_conquer;
|
||||
unsigned depth;
|
||||
#ifdef RMF_REFERENCE
|
||||
@@ -42,16 +45,18 @@ void RMF_freeMatchTable(FL2_matchTable* const tbl);
|
||||
BYTE RMF_compatibleParameters(const FL2_matchTable* const tbl, const RMF_parameters* const params, size_t const dict_reduce);
|
||||
size_t RMF_applyParameters(FL2_matchTable* const tbl, const RMF_parameters* const params, size_t const dict_reduce);
|
||||
size_t RMF_threadCount(const FL2_matchTable * const tbl);
|
||||
size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const start, size_t const end);
|
||||
void RMF_initProgress(FL2_matchTable * const tbl);
|
||||
size_t RMF_initTable(FL2_matchTable* const tbl, const void* const data, size_t const end);
|
||||
int RMF_buildTable(FL2_matchTable* const tbl,
|
||||
size_t const job,
|
||||
unsigned const multi_thread,
|
||||
FL2_dataBlock const block,
|
||||
FL2_progressFn progress, void* opaque, U32 weight, size_t init_done);
|
||||
FL2_dataBlock const block);
|
||||
void RMF_cancelBuild(FL2_matchTable* const tbl);
|
||||
void RMF_resetIncompleteBuild(FL2_matchTable* const tbl);
|
||||
int RMF_integrityCheck(const FL2_matchTable* const tbl, const BYTE* const data, size_t const index, size_t const end, unsigned const max_depth);
|
||||
void RMF_limitLengths(FL2_matchTable* const tbl, size_t const index);
|
||||
BYTE* RMF_getTableAsOutputBuffer(FL2_matchTable* const tbl, size_t const index);
|
||||
size_t RMF_memoryUsage(unsigned const dict_log, unsigned const buffer_log, unsigned const depth, unsigned thread_count);
|
||||
size_t RMF_memoryUsage(size_t const dict_size, unsigned const buffer_log, unsigned const thread_count);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "mem.h" /* U32, U64 */
|
||||
#include "fl2threading.h"
|
||||
#include "fl2_threading.h"
|
||||
#include "fl2_internal.h"
|
||||
#include "radix_internal.h"
|
||||
|
||||
@@ -34,7 +34,7 @@ typedef struct FL2_matchTable_s FL2_matchTable;
|
||||
|
||||
#define SetMatchLength(index, link, length) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].lengths[(index) & UNIT_MASK] = (BYTE)(length)
|
||||
|
||||
#define SetMatchLinkAndLength(index, link, length) { size_t i_ = (index) >> UNIT_BITS, u_ = (index) & UNIT_MASK; ((RMF_unit*)tbl->table)[i_].links[u_] = (U32)(link); ((RMF_unit*)tbl->table)[i_].lengths[u_] = (BYTE)(length); }
|
||||
#define SetMatchLinkAndLength(index, link, length) do { size_t i_ = (index) >> UNIT_BITS, u_ = (index) & UNIT_MASK; ((RMF_unit*)tbl->table)[i_].links[u_] = (U32)(link); ((RMF_unit*)tbl->table)[i_].lengths[u_] = (BYTE)(length); } while(0)
|
||||
|
||||
#define SetNull(index) ((RMF_unit*)tbl->table)[(index) >> UNIT_BITS].links[(index) & UNIT_MASK] = RADIX_NULL_LINK
|
||||
|
||||
|
||||
@@ -7,84 +7,194 @@
|
||||
|
||||
#include "fl2_internal.h"
|
||||
#include "mem.h"
|
||||
#include "platform.h"
|
||||
#include "range_enc.h"
|
||||
|
||||
const unsigned price_table[kBitModelTotal >> kNumMoveReducingBits] = {
|
||||
128, 103, 91, 84, 78, 73, 69, 66,
|
||||
63, 61, 58, 56, 54, 52, 51, 49,
|
||||
48, 46, 45, 44, 43, 42, 41, 40,
|
||||
39, 38, 37, 36, 35, 34, 34, 33,
|
||||
32, 31, 31, 30, 29, 29, 28, 28,
|
||||
27, 26, 26, 25, 25, 24, 24, 23,
|
||||
23, 22, 22, 22, 21, 21, 20, 20,
|
||||
19, 19, 19, 18, 18, 17, 17, 17,
|
||||
16, 16, 16, 15, 15, 15, 14, 14,
|
||||
14, 13, 13, 13, 12, 12, 12, 11,
|
||||
11, 11, 11, 10, 10, 10, 10, 9,
|
||||
9, 9, 9, 8, 8, 8, 8, 7,
|
||||
7, 7, 7, 6, 6, 6, 6, 5,
|
||||
5, 5, 5, 5, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 2, 2, 2,
|
||||
2, 2, 2, 1, 1, 1, 1, 1
|
||||
};
|
||||
/* The first and last elements of these tables are never used */
|
||||
BYTE price_table[2][kPriceTableSize] = { {
|
||||
0, 193, 182, 166, 154, 145, 137, 131,
|
||||
125, 120, 115, 111, 107, 103, 100, 97,
|
||||
94, 91, 89, 86, 84, 82, 80, 78,
|
||||
76, 74, 72, 71, 69, 67, 66, 64,
|
||||
63, 61, 60, 59, 57, 56, 55, 54,
|
||||
53, 52, 50, 49, 48, 47, 46, 45,
|
||||
44, 43, 42, 42, 41, 40, 39, 38,
|
||||
37, 36, 36, 35, 34, 33, 33, 32,
|
||||
31, 30, 30, 29, 28, 28, 27, 26,
|
||||
26, 25, 25, 24, 23, 23, 22, 21,
|
||||
21, 20, 20, 19, 19, 18, 18, 17,
|
||||
17, 16, 16, 15, 15, 14, 14, 13,
|
||||
13, 12, 12, 11, 11, 10, 10, 9,
|
||||
9, 8, 8, 8, 7, 7, 6, 6,
|
||||
5, 5, 5, 4, 4, 3, 3, 3,
|
||||
2, 2, 2, 1, 1, 0, 0, 0
|
||||
}, {
|
||||
0, 0, 0, 1, 1, 2, 2, 2,
|
||||
3, 3, 3, 4, 4, 5, 5, 5,
|
||||
6, 6, 7, 7, 8, 8, 8, 9,
|
||||
9, 10, 10, 11, 11, 12, 12, 13,
|
||||
13, 13, 14, 14, 15, 15, 16, 17,
|
||||
17, 18, 18, 19, 19, 20, 20, 21,
|
||||
21, 22, 23, 23, 24, 24, 25, 26,
|
||||
26, 27, 28, 28, 29, 30, 30, 31,
|
||||
32, 33, 33, 34, 35, 36, 36, 37,
|
||||
38, 39, 40, 41, 41, 42, 43, 44,
|
||||
45, 46, 47, 48, 49, 50, 51, 53,
|
||||
54, 55, 56, 57, 59, 60, 61, 63,
|
||||
64, 66, 67, 69, 70, 72, 74, 76,
|
||||
78, 80, 82, 84, 86, 89, 91, 94,
|
||||
97, 100, 103, 107, 111, 115, 119, 125,
|
||||
130, 137, 145, 154, 165, 181, 192, 0
|
||||
} };
|
||||
|
||||
void SetOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size)
|
||||
#if 0
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
/* Generates price_table */
|
||||
void RC_printPriceTable()
|
||||
{
|
||||
static const unsigned test_size = 0x4000;
|
||||
const unsigned test_div = test_size >> 8;
|
||||
BYTE buf[0x3062];
|
||||
unsigned table0[kPriceTableSize];
|
||||
unsigned table1[kPriceTableSize];
|
||||
unsigned count[kPriceTableSize];
|
||||
memset(table0, 0, sizeof(table0));
|
||||
memset(table1, 0, sizeof(table1));
|
||||
memset(count, 0, sizeof(count));
|
||||
for (Probability i = 31; i <= kBitModelTotal - 31; ++i) {
|
||||
RangeEncoder rc;
|
||||
RC_reset(&rc);
|
||||
RC_setOutputBuffer(&rc, buf, sizeof(buf));
|
||||
for (unsigned j = 0; j < test_size; ++j) {
|
||||
Probability prob = i;
|
||||
RC_encodeBit0(&rc, &prob);
|
||||
}
|
||||
RC_flush(&rc);
|
||||
table0[i >> kNumMoveReducingBits] += (unsigned)rc.out_index - 5;
|
||||
RC_reset(&rc);
|
||||
RC_setOutputBuffer(&rc, buf, sizeof(buf));
|
||||
for (unsigned j = 0; j < test_size; ++j) {
|
||||
Probability prob = i;
|
||||
RC_encodeBit1(&rc, &prob);
|
||||
}
|
||||
RC_flush(&rc);
|
||||
table1[i >> kNumMoveReducingBits] += (unsigned)rc.out_index - 5;
|
||||
++count[i >> kNumMoveReducingBits];
|
||||
}
|
||||
for (int i = 0; i < kPriceTableSize; ++i) if (count[i]) {
|
||||
table0[i] = (table0[i] / count[i]) / test_div;
|
||||
table1[i] = (table1[i] / count[i]) / test_div;
|
||||
}
|
||||
fputs("const BYTE price_table[2][kPriceTableSize] = {\r\n", stdout);
|
||||
for (int i = 0; i < kPriceTableSize;) {
|
||||
for (int j = 0; j < 8; ++j, ++i)
|
||||
printf("%4d,", table0[i]);
|
||||
fputs("\r\n", stdout);
|
||||
}
|
||||
fputs("}, {\r\n", stdout);
|
||||
for (int i = 0; i < kPriceTableSize;) {
|
||||
for (int j = 0; j < 8; ++j, ++i)
|
||||
printf("%4d,", table1[i]);
|
||||
fputs("\r\n", stdout);
|
||||
}
|
||||
fputs("} };\r\n", stdout);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void RC_setOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size)
|
||||
{
|
||||
rc->out_buffer = out_buffer;
|
||||
rc->chunk_size = chunk_size;
|
||||
rc->out_index = 0;
|
||||
}
|
||||
|
||||
void RangeEncReset(RangeEncoder* const rc)
|
||||
void RC_reset(RangeEncoder* const rc)
|
||||
{
|
||||
rc->low = 0;
|
||||
rc->range = (U32)-1;
|
||||
rc->cache_size = 1;
|
||||
rc->cache_size = 0;
|
||||
rc->cache = 0;
|
||||
}
|
||||
|
||||
void ShiftLow(RangeEncoder* const rc)
|
||||
#ifdef __64BIT__
|
||||
|
||||
void FORCE_NOINLINE RC_shiftLow(RangeEncoder* const rc)
|
||||
{
|
||||
if (rc->low < 0xFF000000 || rc->low > 0xFFFFFFFF)
|
||||
{
|
||||
BYTE temp = rc->cache;
|
||||
U64 low = rc->low;
|
||||
rc->low = (U32)(low << 8);
|
||||
if (low < 0xFF000000 || low > 0xFFFFFFFF) {
|
||||
BYTE high = (BYTE)(low >> 32);
|
||||
rc->out_buffer[rc->out_index++] = rc->cache + high;
|
||||
rc->cache = (BYTE)(low >> 24);
|
||||
if (rc->cache_size != 0) {
|
||||
high += 0xFF;
|
||||
do {
|
||||
assert (rc->out_index < rc->chunk_size - 4096);
|
||||
rc->out_buffer[rc->out_index++] = temp + (BYTE)(rc->low >> 32);
|
||||
temp = 0xFF;
|
||||
rc->out_buffer[rc->out_index++] = high;
|
||||
} while (--rc->cache_size != 0);
|
||||
rc->cache = (BYTE)(rc->low >> 24);
|
||||
}
|
||||
++rc->cache_size;
|
||||
rc->low = (rc->low << 8) & 0xFFFFFFFF;
|
||||
}
|
||||
else {
|
||||
rc->cache_size++;
|
||||
}
|
||||
}
|
||||
|
||||
void EncodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol)
|
||||
#else
|
||||
|
||||
void FORCE_NOINLINE RC_shiftLow(RangeEncoder* const rc)
|
||||
{
|
||||
size_t tree_index = 1;
|
||||
assert(bit_count > 0);
|
||||
U32 low = (U32)rc->low;
|
||||
unsigned high = (unsigned)(rc->low >> 32);
|
||||
rc->low = low << 8;
|
||||
if (low < (U32)0xFF000000 || high != 0) {
|
||||
rc->out_buffer[rc->out_index++] = rc->cache + (BYTE)high;
|
||||
rc->cache = (BYTE)(low >> 24);
|
||||
if (rc->cache_size != 0) {
|
||||
high += 0xFF;
|
||||
do {
|
||||
rc->out_buffer[rc->out_index++] = (BYTE)high;
|
||||
} while (--rc->cache_size != 0);
|
||||
}
|
||||
}
|
||||
else {
|
||||
rc->cache_size++;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void RC_encodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol)
|
||||
{
|
||||
assert(bit_count > 1);
|
||||
--bit_count;
|
||||
unsigned bit = symbol >> bit_count;
|
||||
RC_encodeBit(rc, &probs[1], bit);
|
||||
size_t tree_index = 1;
|
||||
do {
|
||||
unsigned bit;
|
||||
--bit_count;
|
||||
bit = (symbol >> bit_count) & 1;
|
||||
EncodeBit(rc, &probs[tree_index], bit);
|
||||
tree_index = (tree_index << 1) | bit;
|
||||
bit = (symbol >> bit_count) & 1;
|
||||
RC_encodeBit(rc, &probs[tree_index], bit);
|
||||
} while (bit_count != 0);
|
||||
}
|
||||
|
||||
void EncodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol)
|
||||
void RC_encodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol)
|
||||
{
|
||||
unsigned tree_index = 1;
|
||||
assert(bit_count != 0);
|
||||
do {
|
||||
unsigned bit = symbol & 1;
|
||||
EncodeBit(rc, &probs[tree_index], bit);
|
||||
RC_encodeBit(rc, &probs[1], bit);
|
||||
unsigned tree_index = 1;
|
||||
while (--bit_count != 0) {
|
||||
tree_index = (tree_index << 1) + bit;
|
||||
symbol >>= 1;
|
||||
} while (--bit_count != 0);
|
||||
bit = symbol & 1;
|
||||
RC_encodeBit(rc, &probs[tree_index], bit);
|
||||
}
|
||||
}
|
||||
|
||||
void EncodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count)
|
||||
void FORCE_NOINLINE RC_encodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count)
|
||||
{
|
||||
assert(bit_count > 0);
|
||||
do {
|
||||
@@ -93,7 +203,7 @@ void EncodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count)
|
||||
rc->low += rc->range & -((int)(value >> bit_count) & 1);
|
||||
if (rc->range < kTopValue) {
|
||||
rc->range <<= 8;
|
||||
ShiftLow(rc);
|
||||
RC_shiftLow(rc);
|
||||
}
|
||||
} while (bit_count != 0);
|
||||
}
|
||||
|
||||
@@ -28,9 +28,13 @@ typedef U16 Probability;
|
||||
#define kNumMoveBits 5U
|
||||
#define kProbInitValue (kBitModelTotal >> 1U)
|
||||
#define kNumMoveReducingBits 4U
|
||||
#define kNumBitPriceShiftBits 4U
|
||||
#define kNumBitPriceShiftBits 5U
|
||||
#define kPriceTableSize (kBitModelTotal >> kNumMoveReducingBits)
|
||||
|
||||
extern const unsigned price_table[kBitModelTotal >> kNumMoveReducingBits];
|
||||
extern BYTE price_table[2][kPriceTableSize];
|
||||
#if 0
|
||||
void RC_printPriceTable();
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -43,22 +47,20 @@ typedef struct
|
||||
BYTE cache;
|
||||
} RangeEncoder;
|
||||
|
||||
void RangeEncReset(RangeEncoder* const rc);
|
||||
void RC_reset(RangeEncoder* const rc);
|
||||
|
||||
void SetOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size);
|
||||
void RC_setOutputBuffer(RangeEncoder* const rc, BYTE *const out_buffer, size_t chunk_size);
|
||||
|
||||
void RangeEncReset(RangeEncoder* const rc);
|
||||
void FORCE_NOINLINE RC_shiftLow(RangeEncoder* const rc);
|
||||
|
||||
void ShiftLow(RangeEncoder* const rc);
|
||||
void RC_encodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol);
|
||||
|
||||
void EncodeBitTree(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol);
|
||||
void RC_encodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol);
|
||||
|
||||
void EncodeBitTreeReverse(RangeEncoder* const rc, Probability *const probs, unsigned bit_count, unsigned symbol);
|
||||
|
||||
void EncodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count);
|
||||
void FORCE_NOINLINE RC_encodeDirect(RangeEncoder* const rc, unsigned value, unsigned bit_count);
|
||||
|
||||
HINT_INLINE
|
||||
void EncodeBit0(RangeEncoder* const rc, Probability *const rprob)
|
||||
void RC_encodeBit0(RangeEncoder* const rc, Probability *const rprob)
|
||||
{
|
||||
unsigned prob = *rprob;
|
||||
rc->range = (rc->range >> kNumBitModelTotalBits) * prob;
|
||||
@@ -66,12 +68,12 @@ void EncodeBit0(RangeEncoder* const rc, Probability *const rprob)
|
||||
*rprob = (Probability)prob;
|
||||
if (rc->range < kTopValue) {
|
||||
rc->range <<= 8;
|
||||
ShiftLow(rc);
|
||||
RC_shiftLow(rc);
|
||||
}
|
||||
}
|
||||
|
||||
HINT_INLINE
|
||||
void EncodeBit1(RangeEncoder* const rc, Probability *const rprob)
|
||||
void RC_encodeBit1(RangeEncoder* const rc, Probability *const rprob)
|
||||
{
|
||||
unsigned prob = *rprob;
|
||||
U32 new_bound = (rc->range >> kNumBitModelTotalBits) * prob;
|
||||
@@ -81,16 +83,16 @@ void EncodeBit1(RangeEncoder* const rc, Probability *const rprob)
|
||||
*rprob = (Probability)prob;
|
||||
if (rc->range < kTopValue) {
|
||||
rc->range <<= 8;
|
||||
ShiftLow(rc);
|
||||
RC_shiftLow(rc);
|
||||
}
|
||||
}
|
||||
|
||||
HINT_INLINE
|
||||
void EncodeBit(RangeEncoder* const rc, Probability *const rprob, unsigned const bit)
|
||||
void RC_encodeBit(RangeEncoder* const rc, Probability *const rprob, unsigned const bit)
|
||||
{
|
||||
unsigned prob = *rprob;
|
||||
if (bit != 0) {
|
||||
U32 new_bound = (rc->range >> kNumBitModelTotalBits) * prob;
|
||||
U32 const new_bound = (rc->range >> kNumBitModelTotalBits) * prob;
|
||||
rc->low += new_bound;
|
||||
rc->range -= new_bound;
|
||||
prob -= prob >> kNumMoveBits;
|
||||
@@ -102,52 +104,56 @@ void EncodeBit(RangeEncoder* const rc, Probability *const rprob, unsigned const
|
||||
*rprob = (Probability)prob;
|
||||
if (rc->range < kTopValue) {
|
||||
rc->range <<= 8;
|
||||
ShiftLow(rc);
|
||||
RC_shiftLow(rc);
|
||||
}
|
||||
}
|
||||
|
||||
#define GET_PRICE(rc, prob, symbol) \
|
||||
price_table[((prob) ^ ((-(int)(symbol)) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
|
||||
#define GET_PRICE(prob, symbol) \
|
||||
price_table[symbol][(prob) >> kNumMoveReducingBits]
|
||||
|
||||
#define GET_PRICE_0(rc, prob) price_table[(prob) >> kNumMoveReducingBits]
|
||||
#define GET_PRICE_0(prob) price_table[0][(prob) >> kNumMoveReducingBits]
|
||||
|
||||
#define GET_PRICE_1(rc, prob) price_table[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
|
||||
#define GET_PRICE_1(prob) price_table[1][(prob) >> kNumMoveReducingBits]
|
||||
|
||||
#define kMinLitPrice 8U
|
||||
|
||||
HINT_INLINE
|
||||
unsigned GetTreePrice(RangeEncoder* const rc, const Probability* const prob_table, unsigned const bit_count, size_t symbol)
|
||||
unsigned RC_getTreePrice(const Probability* const prob_table, unsigned bit_count, size_t symbol)
|
||||
{
|
||||
unsigned price = 0;
|
||||
symbol |= ((size_t)1 << bit_count);
|
||||
while (symbol != 1) {
|
||||
size_t next_symbol = symbol >> 1;
|
||||
do {
|
||||
size_t const next_symbol = symbol >> 1;
|
||||
unsigned prob = prob_table[next_symbol];
|
||||
unsigned bit = (unsigned)symbol & 1;
|
||||
price += GET_PRICE(rc, prob, bit);
|
||||
size_t bit = symbol & 1;
|
||||
price += GET_PRICE(prob, bit);
|
||||
symbol = next_symbol;
|
||||
}
|
||||
} while (symbol != 1);
|
||||
return price;
|
||||
}
|
||||
|
||||
HINT_INLINE
|
||||
unsigned GetReverseTreePrice(RangeEncoder* const rc, const Probability* const prob_table, unsigned const bit_count, size_t symbol)
|
||||
unsigned RC_getReverseTreePrice(const Probability* const prob_table, unsigned bit_count, size_t symbol)
|
||||
{
|
||||
unsigned price = 0;
|
||||
unsigned prob = prob_table[1];
|
||||
size_t bit = symbol & 1;
|
||||
unsigned price = GET_PRICE(prob, bit);
|
||||
size_t m = 1;
|
||||
for (unsigned i = bit_count; i != 0; --i) {
|
||||
unsigned prob = prob_table[m];
|
||||
unsigned bit = symbol & 1;
|
||||
symbol >>= 1;
|
||||
price += GET_PRICE(rc, prob, bit);
|
||||
while (--bit_count != 0) {
|
||||
m = (m << 1) | bit;
|
||||
symbol >>= 1;
|
||||
prob = prob_table[m];
|
||||
bit = symbol & 1;
|
||||
price += GET_PRICE(prob, bit);
|
||||
}
|
||||
return price;
|
||||
}
|
||||
|
||||
HINT_INLINE
|
||||
void Flush(RangeEncoder* const rc)
|
||||
void RC_flush(RangeEncoder* const rc)
|
||||
{
|
||||
for (int i = 0; i < 5; ++i)
|
||||
ShiftLow(rc);
|
||||
RC_shiftLow(rc);
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
|
||||
707
C/fast-lzma2/util.c
Normal file
707
C/fast-lzma2/util.c
Normal file
@@ -0,0 +1,707 @@
|
||||
/*
|
||||
* Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under both the BSD-style license (found in the
|
||||
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||
* in the COPYING file in the root directory of this source tree).
|
||||
* You may select, at your option, one of the above-listed licenses.
|
||||
*/
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include "util.h" /* note : ensure that platform.h is included first ! */
|
||||
#include <errno.h>
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
int UTIL_fileExist(const char* filename)
|
||||
{
|
||||
stat_t statbuf;
|
||||
#if defined(_MSC_VER)
|
||||
int const stat_error = _stat64(filename, &statbuf);
|
||||
#else
|
||||
int const stat_error = stat(filename, &statbuf);
|
||||
#endif
|
||||
return !stat_error;
|
||||
}
|
||||
|
||||
int UTIL_isRegularFile(const char* infilename)
|
||||
{
|
||||
stat_t statbuf;
|
||||
return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */
|
||||
}
|
||||
|
||||
int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
|
||||
{
|
||||
int r;
|
||||
#if defined(_MSC_VER)
|
||||
r = _stat64(infilename, statbuf);
|
||||
if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */
|
||||
#else
|
||||
r = stat(infilename, statbuf);
|
||||
if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
int UTIL_setFileStat(const char *filename, stat_t *statbuf)
|
||||
{
|
||||
int res = 0;
|
||||
struct utimbuf timebuf;
|
||||
|
||||
if (!UTIL_isRegularFile(filename))
|
||||
return -1;
|
||||
|
||||
timebuf.actime = time(NULL);
|
||||
timebuf.modtime = statbuf->st_mtime;
|
||||
res += utime(filename, &timebuf); /* set access and modification times */
|
||||
|
||||
#if !defined(_WIN32)
|
||||
res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */
|
||||
#endif
|
||||
|
||||
res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */
|
||||
|
||||
errno = 0;
|
||||
return -res; /* number of errors is returned */
|
||||
}
|
||||
|
||||
U32 UTIL_isDirectory(const char* infilename)
|
||||
{
|
||||
int r;
|
||||
stat_t statbuf;
|
||||
#if defined(_MSC_VER)
|
||||
r = _stat64(infilename, &statbuf);
|
||||
if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
|
||||
#else
|
||||
r = stat(infilename, &statbuf);
|
||||
if (!r && S_ISDIR(statbuf.st_mode)) return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
U32 UTIL_isLink(const char* infilename)
|
||||
{
|
||||
/* macro guards, as defined in : https://linux.die.net/man/2/lstat */
|
||||
#ifndef __STRICT_ANSI__
|
||||
#if defined(_BSD_SOURCE) \
|
||||
|| (defined(_XOPEN_SOURCE) && (_XOPEN_SOURCE >= 500)) \
|
||||
|| (defined(_XOPEN_SOURCE) && defined(_XOPEN_SOURCE_EXTENDED)) \
|
||||
|| (defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)) \
|
||||
|| (defined(__APPLE__) && defined(__MACH__)) \
|
||||
|| defined(__OpenBSD__) \
|
||||
|| defined(__FreeBSD__)
|
||||
int r;
|
||||
stat_t statbuf;
|
||||
r = lstat(infilename, &statbuf);
|
||||
if (!r && S_ISLNK(statbuf.st_mode)) return 1;
|
||||
#endif
|
||||
#endif
|
||||
(void)infilename;
|
||||
return 0;
|
||||
}
|
||||
|
||||
U64 UTIL_getFileSize(const char* infilename)
|
||||
{
|
||||
if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN;
|
||||
{ int r;
|
||||
#if defined(_MSC_VER)
|
||||
struct __stat64 statbuf;
|
||||
r = _stat64(infilename, &statbuf);
|
||||
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
|
||||
#elif defined(__MINGW32__) && defined (__MSVCRT__)
|
||||
struct _stati64 statbuf;
|
||||
r = _stati64(infilename, &statbuf);
|
||||
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
|
||||
#else
|
||||
struct stat statbuf;
|
||||
r = stat(infilename, &statbuf);
|
||||
if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN;
|
||||
#endif
|
||||
return (U64)statbuf.st_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
U64 total = 0;
|
||||
int error = 0;
|
||||
unsigned n;
|
||||
for (n=0; n<nbFiles; n++) {
|
||||
U64 const size = UTIL_getFileSize(fileNamesTable[n]);
|
||||
error |= (size == UTIL_FILESIZE_UNKNOWN);
|
||||
total += size;
|
||||
}
|
||||
return error ? UTIL_FILESIZE_UNKNOWN : total;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
{
|
||||
char* path;
|
||||
int dirLength, fnameLength, pathLength, nbFiles = 0;
|
||||
WIN32_FIND_DATAA cFile;
|
||||
HANDLE hFile;
|
||||
|
||||
dirLength = (int)strlen(dirName);
|
||||
path = (char*) malloc(dirLength + 3);
|
||||
if (!path) return 0;
|
||||
|
||||
memcpy(path, dirName, dirLength);
|
||||
path[dirLength] = '\\';
|
||||
path[dirLength+1] = '*';
|
||||
path[dirLength+2] = 0;
|
||||
|
||||
hFile=FindFirstFileA(path, &cFile);
|
||||
if (hFile == INVALID_HANDLE_VALUE) {
|
||||
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName);
|
||||
return 0;
|
||||
}
|
||||
free(path);
|
||||
|
||||
do {
|
||||
fnameLength = (int)strlen(cFile.cFileName);
|
||||
path = (char*) malloc(dirLength + fnameLength + 2);
|
||||
if (!path) { FindClose(hFile); return 0; }
|
||||
memcpy(path, dirName, dirLength);
|
||||
path[dirLength] = '\\';
|
||||
memcpy(path+dirLength+1, cFile.cFileName, fnameLength);
|
||||
pathLength = dirLength+1+fnameLength;
|
||||
path[pathLength] = 0;
|
||||
if (cFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
|
||||
if ( strcmp (cFile.cFileName, "..") == 0
|
||||
|| strcmp (cFile.cFileName, ".") == 0 )
|
||||
continue;
|
||||
/* Recursively call "UTIL_prepareFileList" with the new path. */
|
||||
nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks);
|
||||
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
|
||||
} else if ( (cFile.dwFileAttributes & FILE_ATTRIBUTE_NORMAL)
|
||||
|| (cFile.dwFileAttributes & FILE_ATTRIBUTE_ARCHIVE)
|
||||
|| (cFile.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED) ) {
|
||||
if (*bufStart + *pos + pathLength >= *bufEnd) {
|
||||
ptrdiff_t const newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
|
||||
*bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
|
||||
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
|
||||
*bufEnd = *bufStart + newListSize;
|
||||
}
|
||||
if (*bufStart + *pos + pathLength < *bufEnd) {
|
||||
memcpy(*bufStart + *pos, path, pathLength+1 /* include final \0 */);
|
||||
*pos += pathLength + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
}
|
||||
free(path);
|
||||
} while (FindNextFileA(hFile, &cFile));
|
||||
|
||||
FindClose(hFile);
|
||||
return nbFiles;
|
||||
}
|
||||
|
||||
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
|
||||
|
||||
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
{
|
||||
DIR *dir;
|
||||
struct dirent *entry;
|
||||
char* path;
|
||||
int dirLength, fnameLength, pathLength, nbFiles = 0;
|
||||
|
||||
if (!(dir = opendir(dirName))) {
|
||||
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
|
||||
return 0;
|
||||
}
|
||||
|
||||
dirLength = (int)strlen(dirName);
|
||||
errno = 0;
|
||||
while ((entry = readdir(dir)) != NULL) {
|
||||
if (strcmp (entry->d_name, "..") == 0 ||
|
||||
strcmp (entry->d_name, ".") == 0) continue;
|
||||
fnameLength = (int)strlen(entry->d_name);
|
||||
path = (char*) malloc(dirLength + fnameLength + 2);
|
||||
if (!path) { closedir(dir); return 0; }
|
||||
memcpy(path, dirName, dirLength);
|
||||
|
||||
path[dirLength] = '/';
|
||||
memcpy(path+dirLength+1, entry->d_name, fnameLength);
|
||||
pathLength = dirLength+1+fnameLength;
|
||||
path[pathLength] = 0;
|
||||
|
||||
if (!followLinks && UTIL_isLink(path)) {
|
||||
UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (UTIL_isDirectory(path)) {
|
||||
nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */
|
||||
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
|
||||
} else {
|
||||
if (*bufStart + *pos + pathLength >= *bufEnd) {
|
||||
ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
|
||||
*bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
|
||||
*bufEnd = *bufStart + newListSize;
|
||||
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
|
||||
}
|
||||
if (*bufStart + *pos + pathLength < *bufEnd) {
|
||||
memcpy(*bufStart + *pos, path, pathLength + 1); /* with final \0 */
|
||||
*pos += pathLength + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
}
|
||||
free(path);
|
||||
errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */
|
||||
}
|
||||
|
||||
if (errno != 0) {
|
||||
UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno));
|
||||
free(*bufStart);
|
||||
*bufStart = NULL;
|
||||
}
|
||||
closedir(dir);
|
||||
return nbFiles;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
{
|
||||
(void)bufStart; (void)bufEnd; (void)pos; (void)followLinks;
|
||||
UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* #ifdef _WIN32 */
|
||||
|
||||
/*
|
||||
* UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories,
|
||||
* and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb).
|
||||
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
|
||||
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
|
||||
*/
|
||||
const char**
|
||||
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
|
||||
char** allocatedBuffer, unsigned* allocatedNamesNb,
|
||||
int followLinks)
|
||||
{
|
||||
size_t pos;
|
||||
unsigned i, nbFiles;
|
||||
char* buf = (char*)malloc(LIST_SIZE_INCREASE);
|
||||
char* bufend = buf + LIST_SIZE_INCREASE;
|
||||
const char** fileTable;
|
||||
|
||||
if (!buf) return NULL;
|
||||
|
||||
for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
|
||||
if (!UTIL_isDirectory(inputNames[i])) {
|
||||
size_t const len = strlen(inputNames[i]);
|
||||
if (buf + pos + len >= bufend) {
|
||||
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
|
||||
buf = (char*)UTIL_realloc(buf, newListSize);
|
||||
bufend = buf + newListSize;
|
||||
if (!buf) return NULL;
|
||||
}
|
||||
if (buf + pos + len < bufend) {
|
||||
memcpy(buf+pos, inputNames[i], len+1); /* with final \0 */
|
||||
pos += len + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
} else {
|
||||
nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
|
||||
if (buf == NULL) return NULL;
|
||||
} }
|
||||
|
||||
if (nbFiles == 0) { free(buf); return NULL; }
|
||||
|
||||
fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*));
|
||||
if (!fileTable) { free(buf); return NULL; }
|
||||
|
||||
for (i=0, pos=0; i<nbFiles; i++) {
|
||||
fileTable[i] = buf + pos;
|
||||
pos += strlen(fileTable[i]) + 1;
|
||||
}
|
||||
|
||||
if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
|
||||
|
||||
*allocatedBuffer = buf;
|
||||
*allocatedNamesNb = nbFiles;
|
||||
|
||||
return fileTable;
|
||||
}
|
||||
|
||||
/*-****************************************
|
||||
* Console log
|
||||
******************************************/
|
||||
int g_utilDisplayLevel;
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Time functions
|
||||
******************************************/
|
||||
#if defined(_WIN32) /* Windows */
|
||||
|
||||
UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
|
||||
|
||||
U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
|
||||
{
|
||||
static LARGE_INTEGER ticksPerSecond;
|
||||
static int init = 0;
|
||||
if (!init) {
|
||||
if (!QueryPerformanceFrequency(&ticksPerSecond))
|
||||
UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
|
||||
init = 1;
|
||||
}
|
||||
return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
|
||||
}
|
||||
|
||||
U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
|
||||
{
|
||||
static LARGE_INTEGER ticksPerSecond;
|
||||
static int init = 0;
|
||||
if (!init) {
|
||||
if (!QueryPerformanceFrequency(&ticksPerSecond))
|
||||
UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
|
||||
init = 1;
|
||||
}
|
||||
return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
|
||||
UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
|
||||
|
||||
U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
|
||||
{
|
||||
static mach_timebase_info_data_t rate;
|
||||
static int init = 0;
|
||||
if (!init) {
|
||||
mach_timebase_info(&rate);
|
||||
init = 1;
|
||||
}
|
||||
return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL;
|
||||
}
|
||||
|
||||
U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
|
||||
{
|
||||
static mach_timebase_info_data_t rate;
|
||||
static int init = 0;
|
||||
if (!init) {
|
||||
mach_timebase_info(&rate);
|
||||
init = 1;
|
||||
}
|
||||
return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
|
||||
}
|
||||
|
||||
#elif (PLATFORM_POSIX_VERSION >= 200112L) \
|
||||
&& (defined(__UCLIBC__) \
|
||||
|| (defined(__GLIBC__) \
|
||||
&& ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) \
|
||||
|| (__GLIBC__ > 2))))
|
||||
|
||||
UTIL_time_t UTIL_getTime(void)
|
||||
{
|
||||
UTIL_time_t time;
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &time))
|
||||
UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n"); /* we could also exit() */
|
||||
return time;
|
||||
}
|
||||
|
||||
UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
|
||||
{
|
||||
UTIL_time_t diff;
|
||||
if (end.tv_nsec < begin.tv_nsec) {
|
||||
diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
|
||||
diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
|
||||
} else {
|
||||
diff.tv_sec = end.tv_sec - begin.tv_sec;
|
||||
diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
|
||||
{
|
||||
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
|
||||
U64 micro = 0;
|
||||
micro += 1000000ULL * diff.tv_sec;
|
||||
micro += diff.tv_nsec / 1000ULL;
|
||||
return micro;
|
||||
}
|
||||
|
||||
U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
|
||||
{
|
||||
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
|
||||
U64 nano = 0;
|
||||
nano += 1000000000ULL * diff.tv_sec;
|
||||
nano += diff.tv_nsec;
|
||||
return nano;
|
||||
}
|
||||
|
||||
#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
|
||||
|
||||
UTIL_time_t UTIL_getTime(void) { return clock(); }
|
||||
U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
|
||||
U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
|
||||
|
||||
#endif
|
||||
|
||||
/* returns time span in microseconds */
|
||||
U64 UTIL_clockSpanMicro(UTIL_time_t clockStart )
|
||||
{
|
||||
UTIL_time_t const clockEnd = UTIL_getTime();
|
||||
return UTIL_getSpanTimeMicro(clockStart, clockEnd);
|
||||
}
|
||||
|
||||
/* returns time span in microseconds */
|
||||
U64 UTIL_clockSpanNano(UTIL_time_t clockStart )
|
||||
{
|
||||
UTIL_time_t const clockEnd = UTIL_getTime();
|
||||
return UTIL_getSpanTimeNano(clockStart, clockEnd);
|
||||
}
|
||||
|
||||
void UTIL_waitForNextTick(void)
|
||||
{
|
||||
UTIL_time_t const clockStart = UTIL_getTime();
|
||||
UTIL_time_t clockEnd;
|
||||
do {
|
||||
clockEnd = UTIL_getTime();
|
||||
} while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
|
||||
}
|
||||
|
||||
/* count the number of physical cores */
|
||||
#if defined(_WIN32) || defined(WIN32)
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
|
||||
|
||||
int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static int numPhysicalCores = 0;
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
{ LPFN_GLPI glpi;
|
||||
BOOL done = FALSE;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL;
|
||||
DWORD returnLength = 0;
|
||||
size_t byteOffset = 0;
|
||||
|
||||
glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")),
|
||||
"GetLogicalProcessorInformation");
|
||||
|
||||
if (glpi == NULL) {
|
||||
goto failed;
|
||||
}
|
||||
|
||||
while(!done) {
|
||||
DWORD rc = glpi(buffer, &returnLength);
|
||||
if (FALSE == rc) {
|
||||
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
|
||||
if (buffer)
|
||||
free(buffer);
|
||||
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
|
||||
|
||||
if (buffer == NULL) {
|
||||
perror("zstd");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
/* some other error */
|
||||
goto failed;
|
||||
}
|
||||
} else {
|
||||
done = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
ptr = buffer;
|
||||
|
||||
while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
|
||||
|
||||
if (ptr->Relationship == RelationProcessorCore) {
|
||||
numPhysicalCores++;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
failed:
|
||||
/* try to fall back on GetSystemInfo */
|
||||
{ SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo(&sysinfo);
|
||||
numPhysicalCores = sysinfo.dwNumberOfProcessors;
|
||||
if (numPhysicalCores == 0) numPhysicalCores = 1; /* just in case */
|
||||
}
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
/* Use apple-provided syscall
|
||||
* see: man 3 sysctl */
|
||||
int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static S32 numPhysicalCores = 0; /* apple specifies int32_t */
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
{ size_t size = sizeof(S32);
|
||||
int const ret = sysctlbyname("hw.physicalcpu", &numPhysicalCores, &size, NULL, 0);
|
||||
if (ret != 0) {
|
||||
if (errno == ENOENT) {
|
||||
/* entry not present, fall back on 1 */
|
||||
numPhysicalCores = 1;
|
||||
} else {
|
||||
perror("zstd: can't get number of physical cpus");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
return numPhysicalCores;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__linux__)
|
||||
|
||||
/* parse /proc/cpuinfo
|
||||
* siblings / cpu cores should give hyperthreading ratio
|
||||
* otherwise fall back on sysconf */
|
||||
int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static int numPhysicalCores = 0;
|
||||
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (numPhysicalCores == -1) {
|
||||
/* value not queryable, fall back on 1 */
|
||||
return numPhysicalCores = 1;
|
||||
}
|
||||
|
||||
/* try to determine if there's hyperthreading */
|
||||
{ FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
|
||||
#define BUF_SIZE 80
|
||||
char buff[BUF_SIZE];
|
||||
|
||||
int siblings = 0;
|
||||
int cpu_cores = 0;
|
||||
int ratio = 1;
|
||||
|
||||
if (cpuinfo == NULL) {
|
||||
/* fall back on the sysconf value */
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
/* assume the cpu cores/siblings values will be constant across all
|
||||
* present processors */
|
||||
while (!feof(cpuinfo)) {
|
||||
if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) {
|
||||
if (strncmp(buff, "siblings", 8) == 0) {
|
||||
const char* const sep = strchr(buff, ':');
|
||||
if (*sep == '\0') {
|
||||
/* formatting was broken? */
|
||||
goto failed;
|
||||
}
|
||||
|
||||
siblings = atoi(sep + 1);
|
||||
}
|
||||
if (strncmp(buff, "cpu cores", 9) == 0) {
|
||||
const char* const sep = strchr(buff, ':');
|
||||
if (*sep == '\0') {
|
||||
/* formatting was broken? */
|
||||
goto failed;
|
||||
}
|
||||
|
||||
cpu_cores = atoi(sep + 1);
|
||||
}
|
||||
} else if (ferror(cpuinfo)) {
|
||||
/* fall back on the sysconf value */
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
if (siblings && cpu_cores) {
|
||||
ratio = siblings / cpu_cores;
|
||||
}
|
||||
failed:
|
||||
fclose(cpuinfo);
|
||||
return numPhysicalCores = numPhysicalCores / ratio;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__FreeBSD__)
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
/* Use physical core sysctl when available
|
||||
* see: man 4 smp, man 3 sysctl */
|
||||
int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static int numPhysicalCores = 0; /* freebsd sysctl is native int sized */
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
#if __FreeBSD_version >= 1300008
|
||||
{ size_t size = sizeof(numPhysicalCores);
|
||||
int ret = sysctlbyname("kern.smp.cores", &numPhysicalCores, &size, NULL, 0);
|
||||
if (ret == 0) return numPhysicalCores;
|
||||
if (errno != ENOENT) {
|
||||
perror("zstd: can't get number of physical cpus");
|
||||
exit(1);
|
||||
}
|
||||
/* sysctl not present, fall through to older sysconf method */
|
||||
}
|
||||
#endif
|
||||
|
||||
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (numPhysicalCores == -1) {
|
||||
/* value not queryable, fall back on 1 */
|
||||
numPhysicalCores = 1;
|
||||
}
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
#elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
|
||||
|
||||
/* Use POSIX sysconf
|
||||
* see: man 3 sysconf */
|
||||
int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static int numPhysicalCores = 0;
|
||||
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (numPhysicalCores == -1) {
|
||||
/* value not queryable, fall back on 1 */
|
||||
return numPhysicalCores = 1;
|
||||
}
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
/* assume 1 */
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
@@ -16,17 +16,15 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Dependencies
|
||||
******************************************/
|
||||
#include "platform.h" /* PLATFORM_POSIX_VERSION */
|
||||
#include <stdlib.h> /* malloc */
|
||||
#include "platform.h" /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */
|
||||
#include <stdlib.h> /* malloc, realloc, free */
|
||||
#include <stddef.h> /* size_t, ptrdiff_t */
|
||||
#include <stdio.h> /* fprintf */
|
||||
#include <string.h> /* strncmp */
|
||||
#include <sys/types.h> /* stat, utime */
|
||||
#include <sys/stat.h> /* stat */
|
||||
#include <sys/stat.h> /* stat, chmod */
|
||||
#if defined(_MSC_VER)
|
||||
# include <sys/utime.h> /* utime */
|
||||
# include <io.h> /* _chmod */
|
||||
@@ -34,13 +32,12 @@ extern "C" {
|
||||
# include <unistd.h> /* chown, stat */
|
||||
# include <utime.h> /* utime */
|
||||
#endif
|
||||
#include <time.h> /* time */
|
||||
#include <errno.h>
|
||||
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */
|
||||
#include "mem.h" /* U32, U64 */
|
||||
|
||||
|
||||
/* ************************************************************
|
||||
* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
|
||||
/*-************************************************************
|
||||
* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
|
||||
***************************************************************/
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
# define UTIL_fseek _fseeki64
|
||||
@@ -53,37 +50,38 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*-****************************************
|
||||
* Sleep functions: Windows - Posix - others
|
||||
******************************************/
|
||||
/*-*************************************************
|
||||
* Sleep & priority functions: Windows - Posix - others
|
||||
***************************************************/
|
||||
#if defined(_WIN32)
|
||||
# include <windows.h>
|
||||
# define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)
|
||||
# define UTIL_sleep(s) Sleep(1000*s)
|
||||
# define UTIL_sleepMilli(milli) Sleep(milli)
|
||||
#elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */
|
||||
# include <unistd.h>
|
||||
# include <sys/resource.h> /* setpriority */
|
||||
# include <time.h> /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */
|
||||
# if defined(PRIO_PROCESS)
|
||||
# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
|
||||
# else
|
||||
# define SET_REALTIME_PRIORITY /* disabled */
|
||||
# endif
|
||||
|
||||
#elif PLATFORM_POSIX_VERSION > 0 /* Unix-like operating system */
|
||||
# include <unistd.h> /* sleep */
|
||||
# define UTIL_sleep(s) sleep(s)
|
||||
# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L) /* nanosleep requires POSIX.1-2001 */
|
||||
# if ZSTD_NANOSLEEP_SUPPORT /* necessarily defined in platform.h */
|
||||
# define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); }
|
||||
# else
|
||||
# define UTIL_sleepMilli(milli) /* disabled */
|
||||
# endif
|
||||
#else
|
||||
# if ZSTD_SETPRIORITY_SUPPORT
|
||||
# include <sys/resource.h> /* setpriority */
|
||||
# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20)
|
||||
# else
|
||||
# define SET_REALTIME_PRIORITY /* disabled */
|
||||
# endif
|
||||
|
||||
#else /* unknown non-unix operating systen */
|
||||
# define UTIL_sleep(s) /* disabled */
|
||||
# define UTIL_sleepMilli(milli) /* disabled */
|
||||
# define SET_REALTIME_PRIORITY /* disabled */
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
/*-*************************************
|
||||
* Constants
|
||||
***************************************/
|
||||
#define LIST_SIZE_INCREASE (8*1024)
|
||||
@@ -101,8 +99,6 @@ extern "C" {
|
||||
# define UTIL_STATIC static inline
|
||||
#elif defined(_MSC_VER)
|
||||
# define UTIL_STATIC static __inline
|
||||
# pragma warning(disable : 4996) /* disable: C4996: 'strncpy': This function or variable may be unsafe. */
|
||||
# pragma warning(disable : 4389) /* disable: C4389: '==' : signed/unsigned mismatch */
|
||||
#else
|
||||
# define UTIL_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
|
||||
#endif
|
||||
@@ -111,7 +107,7 @@ extern "C" {
|
||||
/*-****************************************
|
||||
* Console log
|
||||
******************************************/
|
||||
static int g_utilDisplayLevel;
|
||||
extern int g_utilDisplayLevel;
|
||||
#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } }
|
||||
|
||||
@@ -120,119 +116,47 @@ static int g_utilDisplayLevel;
|
||||
* Time functions
|
||||
******************************************/
|
||||
#if defined(_WIN32) /* Windows */
|
||||
|
||||
#define UTIL_TIME_INITIALIZER { { 0, 0 } }
|
||||
typedef LARGE_INTEGER UTIL_time_t;
|
||||
UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; }
|
||||
UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
|
||||
{
|
||||
static LARGE_INTEGER ticksPerSecond;
|
||||
static int init = 0;
|
||||
if (!init) {
|
||||
if (!QueryPerformanceFrequency(&ticksPerSecond))
|
||||
UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
|
||||
init = 1;
|
||||
}
|
||||
return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
|
||||
}
|
||||
UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
|
||||
{
|
||||
static LARGE_INTEGER ticksPerSecond;
|
||||
static int init = 0;
|
||||
if (!init) {
|
||||
if (!QueryPerformanceFrequency(&ticksPerSecond))
|
||||
UTIL_DISPLAYLEVEL(1, "ERROR: QueryPerformanceFrequency() failure\n");
|
||||
init = 1;
|
||||
}
|
||||
return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__) && defined(__MACH__)
|
||||
|
||||
#include <mach/mach_time.h>
|
||||
#define UTIL_TIME_INITIALIZER 0
|
||||
typedef U64 UTIL_time_t;
|
||||
UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); }
|
||||
UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd)
|
||||
{
|
||||
static mach_timebase_info_data_t rate;
|
||||
static int init = 0;
|
||||
if (!init) {
|
||||
mach_timebase_info(&rate);
|
||||
init = 1;
|
||||
}
|
||||
return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom))/1000ULL;
|
||||
}
|
||||
UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd)
|
||||
{
|
||||
static mach_timebase_info_data_t rate;
|
||||
static int init = 0;
|
||||
if (!init) {
|
||||
mach_timebase_info(&rate);
|
||||
init = 1;
|
||||
}
|
||||
return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
|
||||
}
|
||||
#elif (PLATFORM_POSIX_VERSION >= 200112L)
|
||||
#include <time.h>
|
||||
|
||||
#elif (PLATFORM_POSIX_VERSION >= 200112L) \
|
||||
&& (defined(__UCLIBC__) \
|
||||
|| (defined(__GLIBC__) \
|
||||
&& ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) \
|
||||
|| (__GLIBC__ > 2))))
|
||||
|
||||
#define UTIL_TIME_INITIALIZER { 0, 0 }
|
||||
typedef struct timespec UTIL_freq_t;
|
||||
typedef struct timespec UTIL_time_t;
|
||||
UTIL_STATIC UTIL_time_t UTIL_getTime(void)
|
||||
{
|
||||
UTIL_time_t time;
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &time))
|
||||
UTIL_DISPLAYLEVEL(1, "ERROR: Failed to get time\n"); /* we could also exit() */
|
||||
return time;
|
||||
}
|
||||
UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end)
|
||||
{
|
||||
UTIL_time_t diff;
|
||||
if (end.tv_nsec < begin.tv_nsec) {
|
||||
diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec;
|
||||
diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec;
|
||||
} else {
|
||||
diff.tv_sec = end.tv_sec - begin.tv_sec;
|
||||
diff.tv_nsec = end.tv_nsec - begin.tv_nsec;
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end)
|
||||
{
|
||||
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
|
||||
U64 micro = 0;
|
||||
micro += 1000000ULL * diff.tv_sec;
|
||||
micro += diff.tv_nsec / 1000ULL;
|
||||
return micro;
|
||||
}
|
||||
UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end)
|
||||
{
|
||||
UTIL_time_t const diff = UTIL_getSpanTime(begin, end);
|
||||
U64 nano = 0;
|
||||
nano += 1000000000ULL * diff.tv_sec;
|
||||
nano += diff.tv_nsec;
|
||||
return nano;
|
||||
}
|
||||
|
||||
UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end);
|
||||
|
||||
#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */
|
||||
|
||||
typedef clock_t UTIL_time_t;
|
||||
UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); }
|
||||
UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
|
||||
UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; }
|
||||
#define UTIL_TIME_INITIALIZER 0
|
||||
|
||||
#endif
|
||||
|
||||
UTIL_time_t UTIL_getTime(void);
|
||||
U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd);
|
||||
U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd);
|
||||
|
||||
#define SEC_TO_MICRO 1000000
|
||||
|
||||
/* returns time span in microseconds */
|
||||
UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart )
|
||||
{
|
||||
UTIL_time_t const clockEnd = UTIL_getTime();
|
||||
return UTIL_getSpanTimeMicro(clockStart, clockEnd);
|
||||
}
|
||||
|
||||
|
||||
UTIL_STATIC void UTIL_waitForNextTick(void)
|
||||
{
|
||||
UTIL_time_t const clockStart = UTIL_getTime();
|
||||
UTIL_time_t clockEnd;
|
||||
do {
|
||||
clockEnd = UTIL_getTime();
|
||||
} while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0);
|
||||
}
|
||||
|
||||
U64 UTIL_clockSpanMicro(UTIL_time_t clockStart);
|
||||
|
||||
/* returns time span in microseconds */
|
||||
U64 UTIL_clockSpanNano(UTIL_time_t clockStart);
|
||||
void UTIL_waitForNextTick(void);
|
||||
|
||||
/*-****************************************
|
||||
* File functions
|
||||
@@ -245,118 +169,23 @@ UTIL_STATIC void UTIL_waitForNextTick(void)
|
||||
#endif
|
||||
|
||||
|
||||
UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf)
|
||||
{
|
||||
int res = 0;
|
||||
struct utimbuf timebuf;
|
||||
|
||||
timebuf.actime = time(NULL);
|
||||
timebuf.modtime = statbuf->st_mtime;
|
||||
res += utime(filename, &timebuf); /* set access and modification times */
|
||||
|
||||
#if !defined(_WIN32)
|
||||
res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */
|
||||
#endif
|
||||
|
||||
res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */
|
||||
|
||||
errno = 0;
|
||||
return -res; /* number of errors is returned */
|
||||
}
|
||||
|
||||
|
||||
UTIL_STATIC int UTIL_getFileStat(const char* infilename, stat_t *statbuf)
|
||||
{
|
||||
int r;
|
||||
#if defined(_MSC_VER)
|
||||
r = _stat64(infilename, statbuf);
|
||||
if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */
|
||||
#else
|
||||
r = stat(infilename, statbuf);
|
||||
if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
UTIL_STATIC int UTIL_isRegularFile(const char* infilename)
|
||||
{
|
||||
stat_t statbuf;
|
||||
return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */
|
||||
}
|
||||
|
||||
|
||||
UTIL_STATIC U32 UTIL_isDirectory(const char* infilename)
|
||||
{
|
||||
int r;
|
||||
stat_t statbuf;
|
||||
#if defined(_MSC_VER)
|
||||
r = _stat64(infilename, &statbuf);
|
||||
if (!r && (statbuf.st_mode & _S_IFDIR)) return 1;
|
||||
#else
|
||||
r = stat(infilename, &statbuf);
|
||||
if (!r && S_ISDIR(statbuf.st_mode)) return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
UTIL_STATIC U32 UTIL_isLink(const char* infilename)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
/* no symlinks on windows */
|
||||
(void)infilename;
|
||||
#else
|
||||
int r;
|
||||
stat_t statbuf;
|
||||
r = lstat(infilename, &statbuf);
|
||||
if (!r && S_ISLNK(statbuf.st_mode)) return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
int UTIL_fileExist(const char* filename);
|
||||
int UTIL_isRegularFile(const char* infilename);
|
||||
int UTIL_setFileStat(const char* filename, stat_t* statbuf);
|
||||
U32 UTIL_isDirectory(const char* infilename);
|
||||
int UTIL_getFileStat(const char* infilename, stat_t* statbuf);
|
||||
|
||||
U32 UTIL_isLink(const char* infilename);
|
||||
#define UTIL_FILESIZE_UNKNOWN ((U64)(-1))
|
||||
UTIL_STATIC U64 UTIL_getFileSize(const char* infilename)
|
||||
{
|
||||
if (!UTIL_isRegularFile(infilename)) return UTIL_FILESIZE_UNKNOWN;
|
||||
{ int r;
|
||||
#if defined(_MSC_VER)
|
||||
struct __stat64 statbuf;
|
||||
r = _stat64(infilename, &statbuf);
|
||||
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
|
||||
#elif defined(__MINGW32__) && defined (__MSVCRT__)
|
||||
struct _stati64 statbuf;
|
||||
r = _stati64(infilename, &statbuf);
|
||||
if (r || !(statbuf.st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN;
|
||||
#else
|
||||
struct stat statbuf;
|
||||
r = stat(infilename, &statbuf);
|
||||
if (r || !S_ISREG(statbuf.st_mode)) return UTIL_FILESIZE_UNKNOWN;
|
||||
#endif
|
||||
return (U64)statbuf.st_size;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
UTIL_STATIC U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles)
|
||||
{
|
||||
U64 total = 0;
|
||||
int error = 0;
|
||||
unsigned n;
|
||||
for (n=0; n<nbFiles; n++) {
|
||||
U64 const size = UTIL_getFileSize(fileNamesTable[n]);
|
||||
error |= (size == UTIL_FILESIZE_UNKNOWN);
|
||||
total += size;
|
||||
}
|
||||
return error ? UTIL_FILESIZE_UNKNOWN : total;
|
||||
}
|
||||
U64 UTIL_getFileSize(const char* infilename);
|
||||
|
||||
U64 UTIL_getTotalFileSize(const char* const * const fileNamesTable, unsigned nbFiles);
|
||||
|
||||
/*
|
||||
* A modified version of realloc().
|
||||
* If UTIL_realloc() fails the original block is freed.
|
||||
*/
|
||||
UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
|
||||
UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size)
|
||||
{
|
||||
void *newptr = realloc(ptr, size);
|
||||
if (newptr) return newptr;
|
||||
@@ -364,143 +193,14 @@ UTIL_STATIC void *UTIL_realloc(void *ptr, size_t size)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks);
|
||||
#ifdef _WIN32
|
||||
# define UTIL_HAS_CREATEFILELIST
|
||||
|
||||
UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
{
|
||||
char* path;
|
||||
int dirLength, fnameLength, pathLength, nbFiles = 0;
|
||||
WIN32_FIND_DATAA cFile;
|
||||
HANDLE hFile;
|
||||
|
||||
dirLength = (int)strlen(dirName);
|
||||
path = (char*) malloc(dirLength + 3);
|
||||
if (!path) return 0;
|
||||
|
||||
memcpy(path, dirName, dirLength);
|
||||
path[dirLength] = '\\';
|
||||
path[dirLength+1] = '*';
|
||||
path[dirLength+2] = 0;
|
||||
|
||||
hFile=FindFirstFileA(path, &cFile);
|
||||
if (hFile == INVALID_HANDLE_VALUE) {
|
||||
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName);
|
||||
return 0;
|
||||
}
|
||||
free(path);
|
||||
|
||||
do {
|
||||
fnameLength = (int)strlen(cFile.cFileName);
|
||||
path = (char*) malloc(dirLength + fnameLength + 2);
|
||||
if (!path) { FindClose(hFile); return 0; }
|
||||
memcpy(path, dirName, dirLength);
|
||||
path[dirLength] = '\\';
|
||||
memcpy(path+dirLength+1, cFile.cFileName, fnameLength);
|
||||
pathLength = dirLength+1+fnameLength;
|
||||
path[pathLength] = 0;
|
||||
if (cFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
|
||||
if (strcmp (cFile.cFileName, "..") == 0 ||
|
||||
strcmp (cFile.cFileName, ".") == 0) continue;
|
||||
|
||||
nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */
|
||||
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
|
||||
}
|
||||
else if ((cFile.dwFileAttributes & FILE_ATTRIBUTE_NORMAL) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_ARCHIVE) || (cFile.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED)) {
|
||||
if (*bufStart + *pos + pathLength >= *bufEnd) {
|
||||
ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
|
||||
*bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
|
||||
*bufEnd = *bufStart + newListSize;
|
||||
if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; }
|
||||
}
|
||||
if (*bufStart + *pos + pathLength < *bufEnd) {
|
||||
strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos));
|
||||
*pos += pathLength + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
}
|
||||
free(path);
|
||||
} while (FindNextFileA(hFile, &cFile));
|
||||
|
||||
FindClose(hFile);
|
||||
return nbFiles;
|
||||
}
|
||||
|
||||
#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */
|
||||
# define UTIL_HAS_CREATEFILELIST
|
||||
# include <dirent.h> /* opendir, readdir */
|
||||
# include <string.h> /* strerror, memcpy */
|
||||
|
||||
UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
{
|
||||
DIR *dir;
|
||||
struct dirent *entry;
|
||||
char* path;
|
||||
int dirLength, fnameLength, pathLength, nbFiles = 0;
|
||||
|
||||
if (!(dir = opendir(dirName))) {
|
||||
UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno));
|
||||
return 0;
|
||||
}
|
||||
|
||||
dirLength = (int)strlen(dirName);
|
||||
errno = 0;
|
||||
while ((entry = readdir(dir)) != NULL) {
|
||||
if (strcmp (entry->d_name, "..") == 0 ||
|
||||
strcmp (entry->d_name, ".") == 0) continue;
|
||||
fnameLength = (int)strlen(entry->d_name);
|
||||
path = (char*) malloc(dirLength + fnameLength + 2);
|
||||
if (!path) { closedir(dir); return 0; }
|
||||
memcpy(path, dirName, dirLength);
|
||||
|
||||
path[dirLength] = '/';
|
||||
memcpy(path+dirLength+1, entry->d_name, fnameLength);
|
||||
pathLength = dirLength+1+fnameLength;
|
||||
path[pathLength] = 0;
|
||||
|
||||
if (!followLinks && UTIL_isLink(path)) {
|
||||
UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (UTIL_isDirectory(path)) {
|
||||
nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */
|
||||
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
|
||||
} else {
|
||||
if (*bufStart + *pos + pathLength >= *bufEnd) {
|
||||
ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE;
|
||||
*bufStart = (char*)UTIL_realloc(*bufStart, newListSize);
|
||||
*bufEnd = *bufStart + newListSize;
|
||||
if (*bufStart == NULL) { free(path); closedir(dir); return 0; }
|
||||
}
|
||||
if (*bufStart + *pos + pathLength < *bufEnd) {
|
||||
strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos));
|
||||
*pos += pathLength + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
}
|
||||
free(path);
|
||||
errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */
|
||||
}
|
||||
|
||||
if (errno != 0) {
|
||||
UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s\n", dirName, strerror(errno));
|
||||
free(*bufStart);
|
||||
*bufStart = NULL;
|
||||
}
|
||||
closedir(dir);
|
||||
return nbFiles;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd, int followLinks)
|
||||
{
|
||||
(void)bufStart; (void)bufEnd; (void)pos;
|
||||
UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* #ifdef _WIN32 */
|
||||
|
||||
/*
|
||||
@@ -509,53 +209,10 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_
|
||||
* After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer)
|
||||
* In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called.
|
||||
*/
|
||||
UTIL_STATIC const char** UTIL_createFileList(const char **inputNames, unsigned inputNamesNb, char** allocatedBuffer, unsigned* allocatedNamesNb, int followLinks)
|
||||
{
|
||||
size_t pos;
|
||||
unsigned i, nbFiles;
|
||||
char* buf = (char*)malloc(LIST_SIZE_INCREASE);
|
||||
char* bufend = buf + LIST_SIZE_INCREASE;
|
||||
const char** fileTable;
|
||||
|
||||
if (!buf) return NULL;
|
||||
|
||||
for (i=0, pos=0, nbFiles=0; i<inputNamesNb; i++) {
|
||||
if (!UTIL_isDirectory(inputNames[i])) {
|
||||
size_t const len = strlen(inputNames[i]);
|
||||
if (buf + pos + len >= bufend) {
|
||||
ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE;
|
||||
buf = (char*)UTIL_realloc(buf, newListSize);
|
||||
bufend = buf + newListSize;
|
||||
if (!buf) return NULL;
|
||||
}
|
||||
if (buf + pos + len < bufend) {
|
||||
strncpy(buf + pos, inputNames[i], bufend - (buf + pos));
|
||||
pos += len + 1;
|
||||
nbFiles++;
|
||||
}
|
||||
} else {
|
||||
nbFiles += UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend, followLinks);
|
||||
if (buf == NULL) return NULL;
|
||||
} }
|
||||
|
||||
if (nbFiles == 0) { free(buf); return NULL; }
|
||||
|
||||
fileTable = (const char**)malloc((nbFiles+1) * sizeof(const char*));
|
||||
if (!fileTable) { free(buf); return NULL; }
|
||||
|
||||
for (i=0, pos=0; i<nbFiles; i++) {
|
||||
fileTable[i] = buf + pos;
|
||||
pos += strlen(fileTable[i]) + 1;
|
||||
}
|
||||
|
||||
if (buf + pos > bufend) { free(buf); free((void*)fileTable); return NULL; }
|
||||
|
||||
*allocatedBuffer = buf;
|
||||
*allocatedNamesNb = nbFiles;
|
||||
|
||||
return fileTable;
|
||||
}
|
||||
|
||||
const char**
|
||||
UTIL_createFileList(const char **inputNames, unsigned inputNamesNb,
|
||||
char** allocatedBuffer, unsigned* allocatedNamesNb,
|
||||
int followLinks);
|
||||
|
||||
UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer)
|
||||
{
|
||||
@@ -563,201 +220,7 @@ UTIL_STATIC void UTIL_freeFileList(const char** filenameTable, char* allocatedBu
|
||||
if (filenameTable) free((void*)filenameTable);
|
||||
}
|
||||
|
||||
/* count the number of physical cores */
|
||||
#if defined(_WIN32) || defined(WIN32)
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
|
||||
|
||||
UTIL_STATIC int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static int numPhysicalCores = 0;
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
{ LPFN_GLPI glpi;
|
||||
BOOL done = FALSE;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL;
|
||||
DWORD returnLength = 0;
|
||||
size_t byteOffset = 0;
|
||||
|
||||
glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")),
|
||||
"GetLogicalProcessorInformation");
|
||||
|
||||
if (glpi == NULL) {
|
||||
goto failed;
|
||||
}
|
||||
|
||||
while(!done) {
|
||||
DWORD rc = glpi(buffer, &returnLength);
|
||||
if (FALSE == rc) {
|
||||
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
|
||||
if (buffer)
|
||||
free(buffer);
|
||||
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
|
||||
|
||||
if (buffer == NULL) {
|
||||
perror("zstd");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
/* some other error */
|
||||
goto failed;
|
||||
}
|
||||
} else {
|
||||
done = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
ptr = buffer;
|
||||
|
||||
while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) {
|
||||
|
||||
if (ptr->Relationship == RelationProcessorCore) {
|
||||
numPhysicalCores++;
|
||||
}
|
||||
|
||||
ptr++;
|
||||
byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
|
||||
}
|
||||
|
||||
free(buffer);
|
||||
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
failed:
|
||||
/* try to fall back on GetSystemInfo */
|
||||
{ SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo(&sysinfo);
|
||||
numPhysicalCores = sysinfo.dwNumberOfProcessors;
|
||||
if (numPhysicalCores == 0) numPhysicalCores = 1; /* just in case */
|
||||
}
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
/* Use apple-provided syscall
|
||||
* see: man 3 sysctl */
|
||||
UTIL_STATIC int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static S32 numPhysicalCores = 0; /* apple specifies int32_t */
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
{ size_t size = sizeof(S32);
|
||||
int const ret = sysctlbyname("hw.physicalcpu", &numPhysicalCores, &size, NULL, 0);
|
||||
if (ret != 0) {
|
||||
if (errno == ENOENT) {
|
||||
/* entry not present, fall back on 1 */
|
||||
numPhysicalCores = 1;
|
||||
} else {
|
||||
perror("zstd: can't get number of physical cpus");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
return numPhysicalCores;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__linux__)
|
||||
|
||||
/* parse /proc/cpuinfo
|
||||
* siblings / cpu cores should give hyperthreading ratio
|
||||
* otherwise fall back on sysconf */
|
||||
UTIL_STATIC int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static int numPhysicalCores = 0;
|
||||
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (numPhysicalCores == -1) {
|
||||
/* value not queryable, fall back on 1 */
|
||||
return numPhysicalCores = 1;
|
||||
}
|
||||
|
||||
/* try to determine if there's hyperthreading */
|
||||
{ FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
|
||||
#define BUF_SIZE 80
|
||||
char buff[BUF_SIZE];
|
||||
|
||||
int siblings = 0;
|
||||
int cpu_cores = 0;
|
||||
int ratio = 1;
|
||||
|
||||
if (cpuinfo == NULL) {
|
||||
/* fall back on the sysconf value */
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
/* assume the cpu cores/siblings values will be constant across all
|
||||
* present processors */
|
||||
while (!feof(cpuinfo)) {
|
||||
if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) {
|
||||
if (strncmp(buff, "siblings", 8) == 0) {
|
||||
const char* const sep = strchr(buff, ':');
|
||||
if (*sep == '\0') {
|
||||
/* formatting was broken? */
|
||||
goto failed;
|
||||
}
|
||||
|
||||
siblings = atoi(sep + 1);
|
||||
}
|
||||
if (strncmp(buff, "cpu cores", 9) == 0) {
|
||||
const char* const sep = strchr(buff, ':');
|
||||
if (*sep == '\0') {
|
||||
/* formatting was broken? */
|
||||
goto failed;
|
||||
}
|
||||
|
||||
cpu_cores = atoi(sep + 1);
|
||||
}
|
||||
} else if (ferror(cpuinfo)) {
|
||||
/* fall back on the sysconf value */
|
||||
goto failed;
|
||||
}
|
||||
}
|
||||
if (siblings && cpu_cores) {
|
||||
ratio = siblings / cpu_cores;
|
||||
}
|
||||
failed:
|
||||
fclose(cpuinfo);
|
||||
return numPhysicalCores = numPhysicalCores / ratio;
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
|
||||
|
||||
/* Use apple-provided syscall
|
||||
* see: man 3 sysctl */
|
||||
UTIL_STATIC int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
static int numPhysicalCores = 0;
|
||||
|
||||
if (numPhysicalCores != 0) return numPhysicalCores;
|
||||
|
||||
numPhysicalCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (numPhysicalCores == -1) {
|
||||
/* value not queryable, fall back on 1 */
|
||||
return numPhysicalCores = 1;
|
||||
}
|
||||
return numPhysicalCores;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
UTIL_STATIC int UTIL_countPhysicalCores(void)
|
||||
{
|
||||
/* assume 1 */
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
int UTIL_countPhysicalCores(void);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
||||
@@ -212,7 +212,7 @@ $(ZSTDMT_OBJS): ../../../../C/zstdmt/$(*B).c
|
||||
|
||||
!IFDEF FASTLZMA2_OBJS
|
||||
$(FASTLZMA2_OBJS): ../../../../C/fast-lzma2/$(*B).c
|
||||
$(COMPL_O2) -DNO_XXHASH
|
||||
$(COMPL_O2) -DNO_XXHASH -DFL2_7ZIP_BUILD
|
||||
!ENDIF
|
||||
|
||||
|
||||
@@ -298,7 +298,7 @@ $(FASTLZMA2_OBJS): ../../../../C/fast-lzma2/$(*B).c
|
||||
-I ../../../../C/lz5 \
|
||||
-I ../../../../C/zstd
|
||||
{../../../../C/fast-lzma2}.c{$O}.obj::
|
||||
$(COMPLB_O2) -DNO_XXHASH
|
||||
$(COMPLB_O2) -DNO_XXHASH -DFL2_7ZIP_BUILD
|
||||
|
||||
!ENDIF
|
||||
|
||||
|
||||
@@ -322,16 +322,17 @@ ZSTDMT_OBJS = \
|
||||
$O\zstd-mt_threading.obj \
|
||||
|
||||
FASTLZMA2_OBJS = \
|
||||
$O\fl2_error_private.obj \
|
||||
$O\fl2pool.obj \
|
||||
$O\fl2threading.obj \
|
||||
$O\dict_buffer.obj \
|
||||
$O\fl2_common.obj \
|
||||
$O\fl2_compress.obj \
|
||||
$O\fl2_pool.obj \
|
||||
$O\fl2_threading.obj \
|
||||
$O\lzma2_enc.obj \
|
||||
$O\radix_bitpack.obj \
|
||||
$O\radix_mf.obj \
|
||||
$O\radix_struct.obj \
|
||||
$O\range_enc.obj \
|
||||
$O\util.obj \
|
||||
|
||||
!include "../../UI/Console/Console.mak"
|
||||
|
||||
|
||||
@@ -36,15 +36,16 @@ COMPRESS_OBJS = $(COMPRESS_OBJS) \
|
||||
$O\FastLzma2Register.obj \
|
||||
|
||||
FASTLZMA2_OBJS = \
|
||||
$O\fl2_error_private.obj \
|
||||
$O\fl2pool.obj \
|
||||
$O\fl2threading.obj \
|
||||
$O\dict_buffer.obj \
|
||||
$O\fl2_common.obj \
|
||||
$O\fl2_compress.obj \
|
||||
$O\fl2_pool.obj \
|
||||
$O\fl2_threading.obj \
|
||||
$O\lzma2_enc.obj \
|
||||
$O\radix_bitpack.obj \
|
||||
$O\radix_mf.obj \
|
||||
$O\radix_struct.obj \
|
||||
$O\range_enc.obj \
|
||||
$O\util.obj \
|
||||
|
||||
!include "../../7zip.mak"
|
||||
|
||||
@@ -244,16 +244,17 @@ ZSTDMT_OBJS = \
|
||||
$O\zstd-mt_threading.obj \
|
||||
|
||||
FASTLZMA2_OBJS = \
|
||||
$O\fl2_error_private.obj \
|
||||
$O\fl2pool.obj \
|
||||
$O\fl2threading.obj \
|
||||
$O\dict_buffer.obj \
|
||||
$O\fl2_common.obj \
|
||||
$O\fl2_compress.obj \
|
||||
$O\fl2_pool.obj \
|
||||
$O\fl2_threading.obj \
|
||||
$O\lzma2_enc.obj \
|
||||
$O\radix_bitpack.obj \
|
||||
$O\radix_mf.obj \
|
||||
$O\radix_struct.obj \
|
||||
$O\range_enc.obj \
|
||||
$O\util.obj \
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -119,15 +119,16 @@ ZSTDMT_OBJS = \
|
||||
$O\zstd-mt_threading.obj \
|
||||
|
||||
FASTLZMA2_OBJS = \
|
||||
$O\fl2_error_private.obj \
|
||||
$O\fl2pool.obj \
|
||||
$O\fl2threading.obj \
|
||||
$O\dict_buffer.obj \
|
||||
$O\fl2_common.obj \
|
||||
$O\fl2_compress.obj \
|
||||
$O\fl2_pool.obj \
|
||||
$O\fl2_threading.obj \
|
||||
$O\lzma2_enc.obj \
|
||||
$O\radix_bitpack.obj \
|
||||
$O\radix_mf.obj \
|
||||
$O\radix_struct.obj \
|
||||
$O\range_enc.obj \
|
||||
$O\util.obj \
|
||||
|
||||
!include "../../7zip.mak"
|
||||
|
||||
@@ -119,15 +119,16 @@ ZSTDMT_OBJS = \
|
||||
$O\zstd-mt_threading.obj \
|
||||
|
||||
FASTLZMA2_OBJS = \
|
||||
$O\fl2_error_private.obj \
|
||||
$O\fl2pool.obj \
|
||||
$O\fl2threading.obj \
|
||||
$O\dict_buffer.obj \
|
||||
$O\fl2_common.obj \
|
||||
$O\fl2_compress.obj \
|
||||
$O\fl2_pool.obj \
|
||||
$O\fl2_threading.obj \
|
||||
$O\lzma2_enc.obj \
|
||||
$O\radix_bitpack.obj \
|
||||
$O\radix_mf.obj \
|
||||
$O\radix_struct.obj \
|
||||
$O\range_enc.obj \
|
||||
$O\util.obj \
|
||||
|
||||
!include "../../7zip.mak"
|
||||
|
||||
@@ -236,15 +236,16 @@ ZSTDMT_OBJS = \
|
||||
$O\zstd-mt_threading.obj \
|
||||
|
||||
FASTLZMA2_OBJS = \
|
||||
$O\fl2_error_private.obj \
|
||||
$O\fl2pool.obj \
|
||||
$O\fl2threading.obj \
|
||||
$O\dict_buffer.obj \
|
||||
$O\fl2_common.obj \
|
||||
$O\fl2_compress.obj \
|
||||
$O\fl2_pool.obj \
|
||||
$O\fl2_threading.obj \
|
||||
$O\lzma2_enc.obj \
|
||||
$O\radix_bitpack.obj \
|
||||
$O\radix_mf.obj \
|
||||
$O\radix_struct.obj \
|
||||
$O\range_enc.obj \
|
||||
$O\util.obj \
|
||||
|
||||
!include "../../7zip.mak"
|
||||
|
||||
@@ -121,23 +121,39 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream, ISequentialOutStream
|
||||
return SResToHRESULT(res);
|
||||
}
|
||||
|
||||
CFastEncoder::CFastEncoder()
|
||||
static HRESULT TranslateError(size_t res)
|
||||
{
|
||||
_encoder = NULL;
|
||||
reduceSize = 0;
|
||||
if (FL2_getErrorCode(res) == FL2_error_memory_allocation)
|
||||
return E_OUTOFMEMORY;
|
||||
return S_FALSE;
|
||||
}
|
||||
|
||||
CFastEncoder::~CFastEncoder()
|
||||
#define CHECK_S(f_) do { \
|
||||
size_t r_ = f_; \
|
||||
if (FL2_isError(r_)) \
|
||||
return TranslateError(r_); \
|
||||
} while (false)
|
||||
|
||||
#define CHECK_H(f_) do { \
|
||||
HRESULT r_ = f_; \
|
||||
if (r_ != S_OK) \
|
||||
return r_; \
|
||||
} while (false)
|
||||
|
||||
#define CHECK_P(f) if (FL2_isError(f)) return E_INVALIDARG; /* check and convert error code */
|
||||
|
||||
CFastEncoder::FastLzma2::FastLzma2()
|
||||
: fcs(NULL),
|
||||
dict_pos(0)
|
||||
{
|
||||
if (_encoder)
|
||||
FL2_freeCCtx(_encoder);
|
||||
}
|
||||
|
||||
CFastEncoder::FastLzma2::~FastLzma2()
|
||||
{
|
||||
FL2_freeCCtx(fcs);
|
||||
}
|
||||
|
||||
#define CHECK_F(f) if (FL2_isError(f)) return E_INVALIDARG; /* check and convert error code */
|
||||
|
||||
STDMETHODIMP CFastEncoder::SetCoderProperties(const PROPID *propIDs,
|
||||
const PROPVARIANT *coderProps, UInt32 numProps)
|
||||
HRESULT CFastEncoder::FastLzma2::SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *coderProps, UInt32 numProps)
|
||||
{
|
||||
CLzma2EncProps lzma2Props;
|
||||
Lzma2EncProps_Init(&lzma2Props);
|
||||
@@ -146,56 +162,165 @@ STDMETHODIMP CFastEncoder::SetCoderProperties(const PROPID *propIDs,
|
||||
{
|
||||
RINOK(SetLzma2Prop(propIDs[i], coderProps[i], lzma2Props));
|
||||
}
|
||||
if (_encoder == NULL) {
|
||||
_encoder = FL2_createCCtxMt(lzma2Props.numTotalThreads);
|
||||
if (_encoder == NULL)
|
||||
if (fcs == NULL) {
|
||||
fcs = FL2_createCStreamMt(lzma2Props.numTotalThreads, 1);
|
||||
if (fcs == NULL)
|
||||
return E_OUTOFMEMORY;
|
||||
}
|
||||
if (lzma2Props.lzmaProps.algo > 2) {
|
||||
if (lzma2Props.lzmaProps.algo > 3)
|
||||
return E_INVALIDARG;
|
||||
lzma2Props.lzmaProps.algo = 2;
|
||||
FL2_CCtx_setParameter(_encoder, FL2_p_highCompression, 1);
|
||||
FL2_CCtx_setParameter(_encoder, FL2_p_compressionLevel, lzma2Props.lzmaProps.level);
|
||||
FL2_CCtx_setParameter(fcs, FL2_p_highCompression, 1);
|
||||
FL2_CCtx_setParameter(fcs, FL2_p_compressionLevel, lzma2Props.lzmaProps.level);
|
||||
}
|
||||
else {
|
||||
FL2_CCtx_setParameter(_encoder, FL2_p_7zLevel, lzma2Props.lzmaProps.level);
|
||||
FL2_CCtx_setParameter(fcs, FL2_p_compressionLevel, lzma2Props.lzmaProps.level);
|
||||
}
|
||||
dictSize = lzma2Props.lzmaProps.dictSize;
|
||||
size_t dictSize = lzma2Props.lzmaProps.dictSize;
|
||||
if (!dictSize) {
|
||||
dictSize = (UInt32)1 << FL2_CCtx_setParameter(_encoder, FL2_p_dictionaryLog, 0);
|
||||
dictSize = (UInt32)FL2_CCtx_getParameter(fcs, FL2_p_dictionarySize);
|
||||
}
|
||||
reduceSize = lzma2Props.lzmaProps.reduceSize;
|
||||
size_t reduceSize = lzma2Props.lzmaProps.reduceSize;
|
||||
reduceSize += (reduceSize < (UInt64)-1); /* prevent extra buffer shift after read */
|
||||
dictSize = (UInt32)min(dictSize, reduceSize);
|
||||
unsigned dictLog = FL2_DICTLOG_MIN;
|
||||
while (((UInt32)1 << dictLog) < dictSize)
|
||||
++dictLog;
|
||||
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_dictionaryLog, dictLog));
|
||||
dictSize = max(dictSize, FL2_DICTSIZE_MIN);
|
||||
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_dictionarySize, dictSize));
|
||||
if (lzma2Props.lzmaProps.algo >= 0) {
|
||||
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_strategy, (unsigned)lzma2Props.lzmaProps.algo));
|
||||
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_strategy, (unsigned)lzma2Props.lzmaProps.algo));
|
||||
}
|
||||
if (lzma2Props.lzmaProps.fb > 0)
|
||||
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_fastLength, lzma2Props.lzmaProps.fb));
|
||||
if (lzma2Props.lzmaProps.mc) {
|
||||
unsigned ml = 0;
|
||||
while (((UInt32)1 << ml) < lzma2Props.lzmaProps.mc)
|
||||
++ml;
|
||||
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_searchLog, ml));
|
||||
}
|
||||
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_fastLength, lzma2Props.lzmaProps.fb));
|
||||
if (lzma2Props.lzmaProps.mc > 0)
|
||||
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_hybridCycles, lzma2Props.lzmaProps.mc));
|
||||
if (lzma2Props.lzmaProps.lc >= 0)
|
||||
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_literalCtxBits, lzma2Props.lzmaProps.lc));
|
||||
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_literalCtxBits, lzma2Props.lzmaProps.lc));
|
||||
if (lzma2Props.lzmaProps.lp >= 0)
|
||||
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_literalPosBits, lzma2Props.lzmaProps.lp));
|
||||
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_literalPosBits, lzma2Props.lzmaProps.lp));
|
||||
if (lzma2Props.lzmaProps.pb >= 0)
|
||||
CHECK_F(FL2_CCtx_setParameter(_encoder, FL2_p_posBits, lzma2Props.lzmaProps.pb));
|
||||
FL2_CCtx_setParameter(_encoder, FL2_p_omitProperties, 1);
|
||||
#ifndef NO_XXHASH
|
||||
FL2_CCtx_setParameter(_encoder, FL2_p_doXXHash, 0);
|
||||
#endif
|
||||
CHECK_P(FL2_CCtx_setParameter(fcs, FL2_p_posBits, lzma2Props.lzmaProps.pb));
|
||||
FL2_CCtx_setParameter(fcs, FL2_p_omitProperties, 1);
|
||||
FL2_setCStreamTimeout(fcs, 500);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
size_t CFastEncoder::FastLzma2::GetDictSize() const
|
||||
{
|
||||
return FL2_CCtx_getParameter(fcs, FL2_p_dictionarySize);
|
||||
}
|
||||
|
||||
HRESULT CFastEncoder::FastLzma2::Begin()
|
||||
{
|
||||
CHECK_S(FL2_initCStream(fcs, 0));
|
||||
CHECK_S(FL2_getDictionaryBuffer(fcs, &dict));
|
||||
dict_pos = 0;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
BYTE* CFastEncoder::FastLzma2::GetAvailableBuffer(unsigned long& size)
|
||||
{
|
||||
size = static_cast<unsigned long>(dict.size - dict_pos);
|
||||
return reinterpret_cast<BYTE*>(dict.dst) + dict_pos;
|
||||
}
|
||||
|
||||
HRESULT CFastEncoder::FastLzma2::WaitAndReport(size_t& res, ICompressProgressInfo *progress)
|
||||
{
|
||||
while (FL2_isTimedOut(res)) {
|
||||
if (!UpdateProgress(progress))
|
||||
return S_FALSE;
|
||||
res = FL2_waitCStream(fcs);
|
||||
}
|
||||
CHECK_S(res);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
HRESULT CFastEncoder::FastLzma2::AddByteCount(size_t count, ISequentialOutStream *outStream, ICompressProgressInfo *progress)
|
||||
{
|
||||
dict_pos += count;
|
||||
if (dict_pos == dict.size) {
|
||||
size_t res = FL2_updateDictionary(fcs, dict_pos);
|
||||
CHECK_H(WaitAndReport(res, progress));
|
||||
if (res != 0)
|
||||
CHECK_H(WriteBuffers(outStream));
|
||||
do {
|
||||
res = FL2_getDictionaryBuffer(fcs, &dict);
|
||||
} while (FL2_isTimedOut(res));
|
||||
CHECK_S(res);
|
||||
dict_pos = 0;
|
||||
}
|
||||
if (!UpdateProgress(progress))
|
||||
return S_FALSE;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
bool CFastEncoder::FastLzma2::UpdateProgress(ICompressProgressInfo *progress)
|
||||
{
|
||||
if (progress) {
|
||||
UInt64 outProcessed;
|
||||
UInt64 inProcessed = FL2_getCStreamProgress(fcs, &outProcessed);
|
||||
HRESULT err = progress->SetRatioInfo(&inProcessed, &outProcessed);
|
||||
if (err != S_OK) {
|
||||
FL2_cancelCStream(fcs);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
HRESULT CFastEncoder::FastLzma2::WriteBuffers(ISequentialOutStream *outStream)
|
||||
{
|
||||
size_t csize;
|
||||
for (;;) {
|
||||
FL2_cBuffer cbuf;
|
||||
// Waits if compression in progress
|
||||
csize = FL2_getNextCStreamBuffer(fcs, &cbuf);
|
||||
CHECK_S(csize);
|
||||
if (csize == 0)
|
||||
break;
|
||||
HRESULT err = WriteStream(outStream, cbuf.src, cbuf.size);
|
||||
if (err != S_OK)
|
||||
return err;
|
||||
}
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
HRESULT CFastEncoder::FastLzma2::End(ISequentialOutStream *outStream, ICompressProgressInfo *progress)
|
||||
{
|
||||
if (dict_pos) {
|
||||
size_t res = FL2_updateDictionary(fcs, dict_pos);
|
||||
CHECK_H(WaitAndReport(res, progress));
|
||||
}
|
||||
|
||||
size_t res = FL2_endStream(fcs, nullptr);
|
||||
CHECK_H(WaitAndReport(res, progress));
|
||||
while (res) {
|
||||
WriteBuffers(outStream);
|
||||
res = FL2_endStream(fcs, nullptr);
|
||||
CHECK_H(WaitAndReport(res, progress));
|
||||
}
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
void CFastEncoder::FastLzma2::Cancel()
|
||||
{
|
||||
FL2_cancelCStream(fcs);
|
||||
}
|
||||
|
||||
CFastEncoder::CFastEncoder()
|
||||
{
|
||||
}
|
||||
|
||||
CFastEncoder::~CFastEncoder()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
STDMETHODIMP CFastEncoder::SetCoderProperties(const PROPID *propIDs,
|
||||
const PROPVARIANT *coderProps, UInt32 numProps)
|
||||
{
|
||||
return _encoder.SetCoderProperties(propIDs, coderProps, numProps);
|
||||
}
|
||||
|
||||
|
||||
#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
|
||||
|
||||
@@ -203,6 +328,7 @@ STDMETHODIMP CFastEncoder::WriteCoderProperties(ISequentialOutStream *outStream)
|
||||
{
|
||||
Byte prop;
|
||||
unsigned i;
|
||||
size_t dictSize = _encoder.GetDictSize();
|
||||
for (i = 0; i < 40; i++)
|
||||
if (dictSize <= LZMA2_DIC_SIZE_FROM_PROP(i))
|
||||
break;
|
||||
@@ -211,79 +337,29 @@ STDMETHODIMP CFastEncoder::WriteCoderProperties(ISequentialOutStream *outStream)
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISequentialOutStream* outStream;
|
||||
ICompressProgressInfo* progress;
|
||||
UInt64 in_processed;
|
||||
UInt64 out_processed;
|
||||
HRESULT res;
|
||||
} EncodingObjects;
|
||||
|
||||
static int FL2LIB_CALL Progress(size_t done, void* opaque)
|
||||
{
|
||||
EncodingObjects* p = (EncodingObjects*)opaque;
|
||||
if (p && p->progress) {
|
||||
UInt64 in_processed = p->in_processed + done;
|
||||
p->res = p->progress->SetRatioInfo(&in_processed, &p->out_processed);
|
||||
return p->res != S_OK;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int FL2LIB_CALL Write(const void* src, size_t srcSize, void* opaque)
|
||||
{
|
||||
EncodingObjects* p = (EncodingObjects*)opaque;
|
||||
p->res = WriteStream(p->outStream, src, srcSize);
|
||||
return p->res != S_OK;
|
||||
}
|
||||
|
||||
STDMETHODIMP CFastEncoder::Code(ISequentialInStream *inStream, ISequentialOutStream *outStream,
|
||||
const UInt64 * /* inSize */, const UInt64 * /* outSize */, ICompressProgressInfo *progress)
|
||||
{
|
||||
HRESULT err = S_OK;
|
||||
inBuffer.AllocAtLeast(dictSize);
|
||||
EncodingObjects objs = { outStream, progress, 0, 0, S_OK };
|
||||
FL2_blockBuffer block = { inBuffer, 0, 0, dictSize };
|
||||
CHECK_H(_encoder.Begin());
|
||||
size_t inSize;
|
||||
unsigned long dSize;
|
||||
do
|
||||
{
|
||||
FL2_shiftBlock(_encoder, &block);
|
||||
size_t inSize = dictSize - block.start;
|
||||
err = ReadStream(inStream, inBuffer + block.start, &inSize);
|
||||
if (err != S_OK)
|
||||
break;
|
||||
block.end += inSize;
|
||||
if (inSize) {
|
||||
size_t cSize = FL2_compressCCtxBlock_toFn(_encoder, Write, &objs, &block, Progress);
|
||||
if (FL2_isError(cSize)) {
|
||||
if (FL2_getErrorCode(cSize) == FL2_error_memory_allocation)
|
||||
return E_OUTOFMEMORY;
|
||||
return objs.res != S_OK ? objs.res : S_FALSE;
|
||||
}
|
||||
if (objs.res != S_OK)
|
||||
return objs.res;
|
||||
objs.out_processed += cSize;
|
||||
objs.in_processed += inSize;
|
||||
if (progress) {
|
||||
err = progress->SetRatioInfo(&objs.in_processed, &objs.out_processed);
|
||||
if (err != S_OK)
|
||||
break;
|
||||
}
|
||||
if (block.end < dictSize)
|
||||
break;
|
||||
}
|
||||
else break;
|
||||
BYTE* dict = _encoder.GetAvailableBuffer(dSize);
|
||||
|
||||
} while (err == S_OK);
|
||||
|
||||
if (err == S_OK) {
|
||||
size_t cSize = FL2_endFrame_toFn(_encoder, Write, &objs);
|
||||
if (FL2_isError(cSize))
|
||||
return S_FALSE;
|
||||
objs.out_processed += cSize;
|
||||
err = objs.res;
|
||||
}
|
||||
inSize = dSize;
|
||||
HRESULT err = ReadStream(inStream, dict, &inSize);
|
||||
if (err != S_OK) {
|
||||
_encoder.Cancel();
|
||||
return err;
|
||||
}
|
||||
CHECK_H(_encoder.AddByteCount(inSize, outStream, progress));
|
||||
|
||||
} while (inSize == dSize);
|
||||
|
||||
CHECK_H(_encoder.End(outStream, progress));
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
}}
|
||||
|
||||
@@ -45,10 +45,33 @@ class CFastEncoder :
|
||||
public ICompressWriteCoderProperties,
|
||||
public CMyUnknownImp
|
||||
{
|
||||
FL2_CCtx* _encoder;
|
||||
CByteBuffer inBuffer;
|
||||
UInt64 reduceSize;
|
||||
UInt32 dictSize;
|
||||
class FastLzma2
|
||||
{
|
||||
public:
|
||||
FastLzma2();
|
||||
~FastLzma2();
|
||||
HRESULT SetCoderProperties(const PROPID *propIDs, const PROPVARIANT *props, UInt32 numProps);
|
||||
size_t GetDictSize() const;
|
||||
HRESULT Begin();
|
||||
BYTE* GetAvailableBuffer(unsigned long& size);
|
||||
HRESULT AddByteCount(size_t count, ISequentialOutStream *outStream, ICompressProgressInfo *progress);
|
||||
HRESULT End(ISequentialOutStream *outStream, ICompressProgressInfo *progress);
|
||||
void Cancel();
|
||||
|
||||
private:
|
||||
bool UpdateProgress(ICompressProgressInfo *progress);
|
||||
HRESULT WaitAndReport(size_t& res, ICompressProgressInfo *progress);
|
||||
HRESULT WriteBuffers(ISequentialOutStream *outStream);
|
||||
|
||||
FL2_CStream* fcs;
|
||||
FL2_dictBuffer dict;
|
||||
size_t dict_pos;
|
||||
|
||||
FastLzma2(const FastLzma2&) = delete;
|
||||
FastLzma2& operator=(const FastLzma2&) = delete;
|
||||
};
|
||||
|
||||
FastLzma2 _encoder;
|
||||
|
||||
public:
|
||||
MY_UNKNOWN_IMP3(
|
||||
|
||||
@@ -1410,7 +1410,7 @@ typedef enum {
|
||||
} FL2_strategy;
|
||||
|
||||
typedef struct {
|
||||
unsigned dictionaryLog; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory, slower */
|
||||
UInt32 dictionarySize; /* largest match distance : larger == more compression, more memory needed during decompression; >= 27 == more memory per byte, slower */
|
||||
unsigned overlapFraction; /* overlap between consecutive blocks in 1/16 units: larger == more compression, slower */
|
||||
unsigned chainLog; /* fully searched segment : larger == more compression, slower, more memory; hybrid mode only (ultra) */
|
||||
unsigned searchLog; /* nb of searches : larger == more compression, slower; hybrid mode only (ultra) */
|
||||
@@ -1424,19 +1424,23 @@ typedef struct {
|
||||
|
||||
#define FL2_MAX_7Z_CLEVEL 9
|
||||
|
||||
#define MB *(1U<<20)
|
||||
|
||||
static const FL2_compressionParameters FL2_7zCParameters[FL2_MAX_7Z_CLEVEL + 1] = {
|
||||
{ 0,0,0,0,0,0,0 },
|
||||
{ 20, 1, 7, 0, 6, 32, 1, 8, FL2_fast }, /* 1 */
|
||||
{ 20, 2, 7, 0, 12, 32, 1, 8, FL2_fast }, /* 2 */
|
||||
{ 21, 2, 7, 0, 16, 32, 1, 8, FL2_fast }, /* 3 */
|
||||
{ 20, 2, 7, 0, 16, 32, 1, 8, FL2_opt }, /* 4 */
|
||||
{ 24, 2, 9, 0, 40, 48, 1, 8, FL2_ultra }, /* 5 */
|
||||
{ 25, 2, 10, 0, 48, 64, 1, 8, FL2_ultra }, /* 6 */
|
||||
{ 26, 2, 11, 1, 60, 96, 1, 9, FL2_ultra }, /* 7 */
|
||||
{ 27, 2, 12, 2, 128, 128, 1, 10, FL2_ultra }, /* 8 */
|
||||
{ 27, 3, 14, 3, 252, 160, 0, 10, FL2_ultra } /* 9 */
|
||||
{ 0,0,0,0,0,0,0,0,FL2_fast },
|
||||
{ 1 MB, 1, 7, 0, 6, 32, 1, 4, FL2_fast }, /* 1 */
|
||||
{ 2 MB, 2, 7, 0, 10, 32, 1, 4, FL2_fast }, /* 2 */
|
||||
{ 2 MB, 2, 7, 0, 10, 32, 1, 4, FL2_opt }, /* 3 */
|
||||
{ 4 MB, 2, 7, 0, 14, 32, 1, 4, FL2_opt }, /* 4 */
|
||||
{ 16 MB, 2, 9, 0, 42, 48, 1, 4, FL2_ultra }, /* 5 */
|
||||
{ 32 MB, 2, 10, 0, 50, 64, 1, 4, FL2_ultra }, /* 6 */
|
||||
{ 64 MB, 2, 11, 1, 62, 96, 1, 3, FL2_ultra }, /* 7 */
|
||||
{ 64 MB, 4, 12, 2, 90, 273, 1, 3, FL2_ultra }, /* 8 */
|
||||
{ 128 MB, 2, 14, 3, 254, 273, 0, 2, FL2_ultra } /* 9 */
|
||||
};
|
||||
|
||||
#undef MB
|
||||
|
||||
#define RMF_BUILDER_SIZE (8 * 0x40100U)
|
||||
|
||||
void CCompressDialog::SetDictionary()
|
||||
@@ -1512,7 +1516,7 @@ void CCompressDialog::SetDictionary()
|
||||
if (level > FL2_MAX_7Z_CLEVEL)
|
||||
level = FL2_MAX_7Z_CLEVEL;
|
||||
if (defaultDict == (UInt32)(Int32)-1)
|
||||
defaultDict = (UInt32)1 << FL2_7zCParameters[level].dictionaryLog;
|
||||
defaultDict = FL2_7zCParameters[level].dictionarySize;
|
||||
|
||||
m_Dictionary.SetCurSel(0);
|
||||
|
||||
@@ -2020,11 +2024,11 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory)
|
||||
{
|
||||
if (level > FL2_MAX_7Z_CLEVEL)
|
||||
level = FL2_MAX_7Z_CLEVEL;
|
||||
size += dict * 5 + (1UL << 18) * numThreads;
|
||||
unsigned depth = FL2_7zCParameters[level].searchDepth;
|
||||
UInt32 bufSize = UInt32(1) << (FL2_7zCParameters[level].dictionaryLog - FL2_7zCParameters[level].bufferLog);
|
||||
/* dual buffer is enabled in Lzma2Encoder.cpp so size is dict * 6 */
|
||||
size += dict * 6 + (1UL << 18) * numThreads;
|
||||
UInt32 bufSize = dict >> (12 - FL2_7zCParameters[level].bufferLog);
|
||||
size += (bufSize * 12 + RMF_BUILDER_SIZE) * numThreads;
|
||||
if (dict > (UInt32(1) << 26) || depth > 63)
|
||||
if (dict > (UInt32(1) << 26))
|
||||
size += dict;
|
||||
if (FL2_7zCParameters[level].strategy == FL2_ultra)
|
||||
size += (UInt32(4) << 14) + (UInt32(4) << FL2_7zCParameters[level].chainLog);
|
||||
|
||||
Reference in New Issue
Block a user