Update to 7-Zip Version 21.03

This commit is contained in:
Tino Reichardt
2021-08-25 22:33:02 +02:00
parent 27d965fd99
commit df06f31a42
90 changed files with 6003 additions and 1882 deletions

View File

@@ -1,5 +1,5 @@
/* 7zTypes.h -- Basic types
2021-04-25 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
@@ -62,6 +62,8 @@ typedef int SRes;
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
#else // _WIN32
// #define ENV_HAVE_LSTAT
@@ -95,6 +97,7 @@ typedef int WRes;
#define ERROR_DIRECTORY 267L
#define ERROR_TOO_MANY_POSTS 298L
#define ERROR_INTERNAL_ERROR 1359L
#define ERROR_INVALID_REPARSE_DATA 4392L
#define ERROR_REPARSE_TAG_INVALID 4393L
#define ERROR_REPARSE_TAG_MISMATCH 4394L
@@ -206,6 +209,8 @@ typedef size_t SIZE_T;
#endif // _WIN32
#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)
#ifdef _SZ_NO_INT_64

View File

@@ -1,7 +1,7 @@
#define MY_VER_MAJOR 21
#define MY_VER_MINOR 02
#define MY_VER_MINOR 03
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "21.02 ZS v1.5.0 R1"
#define MY_VERSION_NUMBERS "21.03 ZS v1.5.0 R1"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@@ -10,7 +10,7 @@
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2021-05-16"
#define MY_DATE "2021-08-26"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov, Tino Reichardt"

10
C/Aes.c
View File

@@ -1,5 +1,5 @@
/* Aes.c -- AES encryption / decryption
2021-04-01 : Igor Pavlov : Public domain */
2021-05-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -365,10 +365,10 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
#ifdef MY_CPU_LE_UNALIGN
*((UInt32 *)(void *)data) ^= t;
#else
data[0] ^= (t & 0xFF);
data[1] ^= ((t >> 8) & 0xFF);
data[2] ^= ((t >> 16) & 0xFF);
data[3] ^= ((t >> 24));
data[0] = (Byte)(data[0] ^ (t & 0xFF));
data[1] = (Byte)(data[1] ^ ((t >> 8) & 0xFF));
data[2] = (Byte)(data[2] ^ ((t >> 16) & 0xFF));
data[3] = (Byte)(data[3] ^ ((t >> 24)));
#endif
}
}

View File

@@ -1,5 +1,5 @@
/* Alloc.c -- Memory allocation functions
2020-10-29 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -247,14 +247,14 @@ static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
#ifdef _WIN32
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
#endif
/*
uintptr_t : <stdint.h> C99 (optional)

View File

@@ -1,5 +1,5 @@
/* Alloc.h -- Memory allocation functions
2021-02-08 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H
#define __COMMON_ALLOC_H
@@ -30,8 +30,15 @@ void BigFree(void *address);
#endif
extern const ISzAlloc g_Alloc;
#ifdef _WIN32
extern const ISzAlloc g_BigAlloc;
extern const ISzAlloc g_MidAlloc;
#else
#define g_BigAlloc g_AlignedAlloc
#define g_MidAlloc g_AlignedAlloc
#endif
extern const ISzAlloc g_AlignedAlloc;

View File

@@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code
2021-04-28 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -278,6 +278,30 @@ BoolInt CPU_IsSupported_SHA()
#include <Windows.h>
#endif
BoolInt CPU_IsSupported_AVX2()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
#ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False;
#endif
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5); // avx2
}
}
BoolInt CPU_IsSupported_VAES_AVX2()
{
Cx86cpuid p;
@@ -329,10 +353,9 @@ BoolInt CPU_IsSupported_PageGB()
#include <Windows.h>
BoolInt CPU_IsSupported_CRC32()
{ return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRYPTO()
{ return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
#else
@@ -356,17 +379,27 @@ static void Print_sysctlbyname(const char *name)
}
*/
BoolInt CPU_IsSupported_CRC32(void)
static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
{
UInt32 val = 0;
if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
return 1;
return 0;
}
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
UInt32 val = 0;
if (My_sysctlbyname_Get_UInt32("hw.optional.armv8_crc32", &val) == 0 && val == 1)
return 1;
return 0;
BoolInt CPU_IsSupported_CRC32(void)
{
return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
}
BoolInt CPU_IsSupported_NEON(void)
{
return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
}
#ifdef MY_CPU_ARM64
@@ -390,18 +423,25 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
#include <asm/hwcap.h>
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
#ifdef MY_CPU_ARM64
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name)) ? 1 : 0; }
MY_HWCAP_CHECK_FUNC_2(name, name)
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
// MY_HWCAP_CHECK_FUNC (ASIMD)
#elif defined(MY_CPU_ARM)
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
#endif
#else // USE_HWCAP
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return 0; }
MY_HWCAP_CHECK_FUNC(NEON)
#endif // USE_HWCAP

View File

@@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
2021-04-25 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
@@ -225,7 +225,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#else
#ifdef __xlC__
// for XLC compiler:
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
#define MY_CPU_pragma_pop _Pragma("pack()")
#else
@@ -253,8 +252,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64) \
|| defined(__ARM_FEATURE_UNALIGNED)
|| defined(MY_CPU_ARM64)
#define MY_CPU_LE_UNALIGN
#define MY_CPU_LE_UNALIGN_64
#elif defined(__ARM_FEATURE_UNALIGNED)
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
#endif
#endif
@@ -264,11 +267,15 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#endif
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#ifdef MY_CPU_LE_UNALIGN_64
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#else
@@ -282,8 +289,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
((UInt32)((const Byte *)(p))[2] << 16) | \
((UInt32)((const Byte *)(p))[3] << 24))
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); }
@@ -294,12 +299,22 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
_ppp_[2] = (Byte)(_vvv_ >> 16); \
_ppp_[3] = (Byte)(_vvv_ >> 24); }
#endif
#ifndef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
#endif
#ifdef __has_builtin
#define MY__has_builtin(x) __has_builtin(x)
#else
@@ -392,6 +407,7 @@ int x86cpuid_GetFirm(const Cx86cpuid *p);
BoolInt CPU_Is_InOrder(void);
BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void);
@@ -401,6 +417,7 @@ BoolInt CPU_IsSupported_PageGB(void);
#elif defined(MY_CPU_ARM_OR_ARM64)
BoolInt CPU_IsSupported_CRC32(void);
BoolInt CPU_IsSupported_NEON(void);
#if defined(_WIN32)
BoolInt CPU_IsSupported_CRYPTO(void);

1000
C/LzFind.c
View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/* LzFind.h -- Match finder for LZ algorithms
2021-02-09 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
@@ -15,7 +15,7 @@ typedef struct _CMatchFinder
Byte *buffer;
UInt32 pos;
UInt32 posLimit;
UInt32 streamPos;
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
UInt32 lenLimit;
UInt32 cyclicBufferPos;
@@ -51,17 +51,19 @@ typedef struct _CMatchFinder
UInt64 expectedDataSize;
} CMatchFinder;
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
/*
#define Inline_MatchFinder_IsFinishedOK(p) \
((p)->streamEndWasReached \
&& (p)->streamPos == (p)->pos \
&& (!(p)->directInput || (p)->directInputRem == 0))
*/
int MatchFinder_NeedMove(CMatchFinder *p);
// Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);
@@ -76,10 +78,21 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
/*
#define Inline_MatchFinder_InitPos(p, val) \
(p)->pos = (val); \
(p)->streamPos = (val);
*/
#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
(p)->pos -= (subValue); \
(p)->streamPos -= (subValue);
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 maxLen);
/*
@@ -91,7 +104,7 @@ Conditions:
typedef void (*Mf_Init_Func)(void *object);
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder
@@ -101,21 +114,23 @@ typedef struct _IMatchFinder
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
Mf_GetMatches_Func GetMatches;
Mf_Skip_Func Skip;
} IMatchFinder;
} IMatchFinder2;
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_3(CMatchFinder *p, int readData);
void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void LzFindPrepare(void);
EXTERN_C_END
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2019-11-05 : Igor Pavlov : Public domain */
2021-07-12 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
@@ -11,22 +11,24 @@ EXTERN_C_BEGIN
typedef struct _CMtSync
{
UInt32 numProcessedBlocks;
CThread thread;
UInt64 affinity;
BoolInt wasCreated;
BoolInt needStart;
BoolInt csWasInitialized;
BoolInt csWasEntered;
BoolInt exit;
BoolInt stopWriting;
CThread thread;
CAutoResetEvent canStart;
CAutoResetEvent wasStarted;
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
BoolInt csWasInitialized;
BoolInt csWasEntered;
CCriticalSection cs;
UInt32 numProcessedBlocks;
UInt64 affinity;
// UInt32 numBlocks_Sent;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
@@ -42,8 +44,8 @@ typedef struct _CMatchFinderMt
/* LZ */
const Byte *pointerToCurPos;
UInt32 *btBuf;
UInt32 btBufPos;
UInt32 btBufPosLimit;
const UInt32 *btBufPos;
const UInt32 *btBufPosLimit;
UInt32 lzPos;
UInt32 btNumAvailBytes;
@@ -54,6 +56,10 @@ typedef struct _CMatchFinderMt
const UInt32 *crc;
Mf_Mix_Matches MixMatchesFunc;
UInt32 failure_LZ_BT; // failure in BT transfered to LZ
// UInt32 failure_LZ_LZ; // failure in LZ tables
UInt32 failureBuf[1];
// UInt32 crc[256];
/* LZ + BT */
CMtSync btSync;
@@ -64,6 +70,8 @@ typedef struct _CMatchFinderMt
UInt32 hashBufPos;
UInt32 hashBufPosLimit;
UInt32 hashNumAvail;
UInt32 failure_BT;
CLzRef *son;
UInt32 matchMaxLen;
@@ -71,7 +79,7 @@ typedef struct _CMatchFinderMt
UInt32 pos;
const Byte *buffer;
UInt32 cyclicBufferPos;
UInt32 cyclicBufferSize; /* it must be historySize + 1 */
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
UInt32 cutValue;
/* BT + Hash */
@@ -81,13 +89,19 @@ typedef struct _CMatchFinderMt
/* Hash */
Mf_GetHeads GetHeadsFunc;
CMatchFinder *MatchFinder;
// CMatchFinder MatchFinder;
} CMatchFinderMt;
// only for Mt part
void MatchFinderMt_Construct(CMatchFinderMt *p);
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable);
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
EXTERN_C_END

578
C/LzFindOpt.c Normal file
View File

@@ -0,0 +1,578 @@
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "LzFind.h"
// #include "LzFindMt.h"
// #define LOG_ITERS
// #define LOG_THREAD
#ifdef LOG_THREAD
#include <stdio.h>
#define PRF(x) x
#else
// #define PRF(x)
#endif
#ifdef LOG_ITERS
#include <stdio.h>
UInt64 g_NumIters_Tree;
UInt64 g_NumIters_Loop;
UInt64 g_NumIters_Bytes;
#define LOG_ITER(x) x
#else
#define LOG_ITER(x)
#endif
// ---------- BT THREAD ----------
#define USE_SON_PREFETCH
#define USE_LONG_MATCH_OPT
#define kEmptyHashValue 0
// #define CYC_TO_POS_OFFSET 0
// #define CYC_TO_POS_OFFSET 1 // for debug
/*
MY_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
{
do
{
UInt32 delta;
if (hash == size)
break;
delta = *hash++;
if (delta == 0 || delta > (UInt32)pos)
return NULL;
lenLimit++;
if (delta == (UInt32)pos)
{
CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
UInt32 *_distances = ++d;
CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
const Byte *len0 = cur, *len1 = cur;
UInt32 cutValue = _cutValue;
const Byte *maxLen = cur + _maxLen;
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
const Byte *len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (len[diff] == len[0])
{
if (++len != lenLimit && len[diff] == len[0])
while (++len != lenLimit)
{
LOG_ITER(g_NumIters_Bytes++);
if (len[diff] != len[0])
break;
}
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (len == lenLimit)
{
const UInt32 pair1 = pair[1];
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
*ptr0 = pair1;
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
{
for (;;)
{
hash++;
pos++;
cur++;
lenLimit++;
{
CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
#if 0
*(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
#else
const UInt32 p0 = ptr[0 + (diff * 2)];
const UInt32 p1 = ptr[1 + (diff * 2)];
ptr[0] = p0;
ptr[1] = p1;
// ptr[0] = ptr[0 + (diff * 2)];
// ptr[1] = ptr[1 + (diff * 2)];
#endif
}
// PrintSon(son + 2, pos - 1);
// printf("\npos = %x delta = %x\n", pos, delta);
len++;
*d++ = 2;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
}
}
#endif
break;
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
if (len[diff] < len[0])
{
delta = pair[1];
if (delta >= curMatch)
return NULL;
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = *pair;
if (delta >= curMatch)
return NULL;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
delta = (UInt32)pos - delta;
if (--cutValue == 0 || delta >= pos)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
} // for (tree iterations)
}
pos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
*/
/* define cbs if you use 2 functions.
GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
do not define cbs if you use 1 function:
GetMatchesSpecN_2()
*/
// #define cbs _cyclicBufferSize
/*
we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes);
MY_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes)
{
do // while (hash != size)
{
UInt32 delta;
#ifndef cbs
UInt32 cbs;
#endif
if (hash == size)
break;
delta = *hash++;
if (delta == 0)
return NULL;
lenLimit++;
#ifndef cbs
cbs = _cyclicBufferSize;
if ((UInt32)pos < cbs)
{
if (delta > (UInt32)pos)
return NULL;
cbs = (UInt32)pos;
}
#endif
if (delta >= cbs)
{
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
UInt32 *_distances = ++d;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
UInt32 cutValue = _cutValue;
const Byte *len0 = cur, *len1 = cur;
const Byte *maxLen = cur + _maxLen;
// if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
// SPEC code
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
) << 1);
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
const Byte *len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (len[diff] == len[0])
{
if (++len != lenLimit && len[diff] == len[0])
while (++len != lenLimit)
{
LOG_ITER(g_NumIters_Bytes++);
if (len[diff] != len[0])
break;
}
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (len == lenLimit)
{
const UInt32 pair1 = pair[1];
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
*ptr0 = pair1;
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
{
for (;;)
{
*d++ = 2;
*d++ = (UInt32)(lenLimit - cur);
*d++ = delta - 1;
cur++;
lenLimit++;
// SPEC
_cyclicBufferPos++;
{
// SPEC code
CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
const CLzRef *src = dest + ((diff
+ (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
// CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
#if 0
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
#else
const UInt32 p0 = src[0];
const UInt32 p1 = src[1];
dest[0] = p0;
dest[1] = p1;
#endif
}
pos++;
hash++;
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
} // for() end for long matches
}
#endif
break; // break from TREE iterations
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
if (len[diff] < len[0])
{
delta = pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
if (delta >= curMatch)
return NULL;
}
else
{
delta = *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
if (delta >= curMatch)
return NULL;
}
delta = (UInt32)pos - delta;
if (--cutValue == 0 || delta >= cbs)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
} // for (tree iterations)
}
pos++;
_cyclicBufferPos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
/*
typedef UInt32 uint32plus; // size_t
UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes)
{
do // while (hash != size)
{
UInt32 delta;
#ifndef cbs
UInt32 cbs;
#endif
if (hash == size)
break;
delta = *hash++;
if (delta == 0)
return NULL;
#ifndef cbs
cbs = _cyclicBufferSize;
if ((UInt32)pos < cbs)
{
if (delta > (UInt32)pos)
return NULL;
cbs = (UInt32)pos;
}
#endif
if (delta >= cbs)
{
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
UInt32 *_distances = ++d;
uint32plus len0 = 0, len1 = 0;
UInt32 cutValue = _cutValue;
uint32plus maxLen = _maxLen;
// lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
// const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
) << 1);
const Byte *pb = cur - delta;
uint32plus len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
while (++len != lenLimit)
if (pb[len] != cur[len])
break;
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)len;
*d++ = delta - 1;
if (len == lenLimit)
{
{
const UInt32 pair1 = pair[1];
*ptr0 = pair1;
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
}
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
break;
{
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
for (;;)
{
*d++ = 2;
*d++ = (UInt32)lenLimit;
*d++ = delta - 1;
_cyclicBufferPos++;
{
CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
const CLzRef *src = dest + ((diff +
(ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
#if 0
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
#else
const UInt32 p0 = src[0];
const UInt32 p1 = src[1];
dest[0] = p0;
dest[1] = p1;
#endif
}
hash++;
pos++;
cur++;
pb++;
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
break;
}
}
#endif
break;
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta;
if (pb[len] < cur[len])
{
delta = pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
{
if (delta >= curMatch)
return NULL;
delta = (UInt32)pos - delta;
if (delta >= cbs
// delta >= _cyclicBufferSize || delta >= pos
|| --cutValue == 0)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
}
} // for (tree iterations)
}
pos++;
_cyclicBufferPos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
*/

View File

@@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder
2021-04-01: Igor Pavlov : Public domain */
2021-07-10: Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -12,6 +12,7 @@
#include <stdio.h>
#endif
#include "CpuArch.h"
#include "LzmaEnc.h"
#include "LzFind.h"
@@ -36,8 +37,8 @@ void LzmaEnc_RestoreState(CLzmaEncHandle pp);
static unsigned g_STAT_OFFSET = 0;
#endif
#define kLzmaMaxHistorySize ((UInt32)3 << 29)
/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
/* for good normalization speed we still reserve 256 MB before 4 GB range */
#define kLzmaMaxHistorySize ((UInt32)15 << 28)
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
@@ -78,13 +79,12 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (p->dictSize > p->reduceSize)
{
unsigned i;
UInt32 reduceSize = (UInt32)p->reduceSize;
for (i = 11; i <= 30; i++)
{
if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
}
UInt32 v = (UInt32)p->reduceSize;
const UInt32 kReduceMin = ((UInt32)1 << 12);
if (v < kReduceMin)
v = kReduceMin;
if (p->dictSize > v)
p->dictSize = v;
}
if (p->lc < 0) p->lc = 3;
@@ -113,18 +113,85 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
return props.dictSize;
}
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
/* BSR code is fast for some new CPUs */
/* #define LZMA_LOG_BSR */
/*
x86/x64:
BSR:
IF (SRC == 0) ZF = 1, DEST is undefined;
AMD : DEST is unchanged;
IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
BSR is slow in some processors
LZCNT:
IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
IF (DEST == 0) ZF = 1;
LZCNT works only in new processors starting from Haswell.
if LZCNT is not supported by processor, then it's executed as BSR.
LZCNT can be faster than BSR, if supported.
*/
// #define LZMA_LOG_BSR
#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
#if (defined(__clang__) && (__clang_major__ >= 6)) \
|| (defined(__GNUC__) && (__GNUC__ >= 6))
#define LZMA_LOG_BSR
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
// #if defined(MY_CPU_ARM_OR_ARM64)
#define LZMA_LOG_BSR
// #endif
#endif
#endif
// #include <intrin.h>
#ifdef LZMA_LOG_BSR
#define kDicLogSizeMaxCompress 32
#if defined(__clang__) \
|| defined(__GNUC__)
#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
/*
C code: : (30 - __builtin_clz(x))
gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
clang10 for x64 : 31 + (bsr(x) xor -32)
*/
static unsigned GetPosSlot1(UInt32 pos)
#define MY_clz(x) ((unsigned)__builtin_clz(x))
// __lzcnt32
// __builtin_ia32_lzcnt_u32
#else // #if defined(_MSC_VER)
#ifdef MY_CPU_ARM_OR_ARM64
#define MY_clz _CountLeadingZeros
#else // if defined(MY_CPU_X86_OR_AMD64)
// #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
// _BitScanReverse code is not optimal for some MSVC compilers
#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
res = (zz + zz) + (pos >> zz); }
#endif // MY_CPU_X86_OR_AMD64
#endif // _MSC_VER
#ifndef BSR2_RET
#define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
res = (zz + zz) + (pos >> zz); }
#endif
unsigned GetPosSlot1(UInt32 pos);
unsigned GetPosSlot1(UInt32 pos)
{
unsigned res;
BSR2_RET(pos, res);
@@ -133,10 +200,10 @@ static unsigned GetPosSlot1(UInt32 pos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
#else
#define kNumLogBits (9 + sizeof(size_t) / 2)
/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
#else // ! LZMA_LOG_BSR
#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
@@ -183,7 +250,7 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
#endif
#endif // LZMA_LOG_BSR
#define LZMA_NUM_REPS 4
@@ -319,7 +386,7 @@ typedef UInt32 CProbPrice;
typedef struct
{
void *matchFinderObj;
IMatchFinder matchFinder;
IMatchFinder2 matchFinder;
unsigned optCur;
unsigned optEnd;
@@ -364,10 +431,14 @@ typedef struct
// begin of CMatchFinderMt is used in LZ thread
CMatchFinderMt matchFinderMt;
// end of CMatchFinderMt is used in BT and HASH threads
// #else
// CMatchFinder matchFinderBase;
#endif
CMatchFinder matchFinderBase;
// we suppose that we have 8-bytes alignment after CMatchFinder
#ifndef _7ZIP_ST
Byte pad[128];
#endif
@@ -375,8 +446,10 @@ typedef struct
// LZ thread
CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
// we want {len , dist} pairs to be 8-bytes aligned in matches array
UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
// we want 8-bytes alignment here
UInt32 alignPrices[kAlignTableSize];
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
@@ -405,12 +478,19 @@ typedef struct
CSaveState saveState;
// BoolInt mf_Failure;
#ifndef _7ZIP_ST
Byte pad2[128];
#endif
} CLzmaEnc;
#define MFB (p->matchFinderBase)
/*
#ifndef _7ZIP_ST
#define MFB (p->matchFinderMt.MatchFinder)
#endif
*/
#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
@@ -475,11 +555,21 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
if (props.lc > LZMA_LC_MAX
|| props.lp > LZMA_LP_MAX
|| props.pb > LZMA_PB_MAX
|| props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
|| props.dictSize > kLzmaMaxHistorySize)
|| props.pb > LZMA_PB_MAX)
return SZ_ERROR_PARAM;
if (props.dictSize > kLzmaMaxHistorySize)
props.dictSize = kLzmaMaxHistorySize;
#ifndef LZMA_LOG_BSR
{
const UInt64 dict64 = props.dictSize;
if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
return SZ_ERROR_PARAM;
}
#endif
p->dictSize = props.dictSize;
{
unsigned fb = (unsigned)props.fb;
@@ -494,7 +584,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
p->pb = (unsigned)props.pb;
p->fastMode = (props.algo == 0);
// p->_maxMode = True;
p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
MFB.btMode = (Byte)(props.btMode ? 1 : 0);
{
unsigned numHashBytes = 4;
if (props.btMode)
@@ -504,10 +594,10 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
}
if (props.numHashBytes >= 5) numHashBytes = 5;
p->matchFinderBase.numHashBytes = numHashBytes;
MFB.numHashBytes = numHashBytes;
}
p->matchFinderBase.cutValue = props.mc;
MFB.cutValue = props.mc;
p->writeEndMark = (BoolInt)props.writeEndMark;
@@ -531,7 +621,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
p->matchFinderBase.expectedDataSize = expectedDataSiize;
MFB.expectedDataSize = expectedDataSiize;
}
@@ -1007,7 +1097,11 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
p->additionalOffset++;
p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
{
const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
// if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
numPairs = (unsigned)(d - p->matches);
}
*numPairsRes = numPairs;
#ifdef SHOW_STAT
@@ -1023,7 +1117,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
if (numPairs == 0)
return 0;
{
unsigned len = p->matches[(size_t)numPairs - 2];
const unsigned len = p->matches[(size_t)numPairs - 2];
if (len != p->numFastBytes)
return len;
{
@@ -1033,7 +1127,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
{
const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
const Byte *p2 = p1 + len;
ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
const Byte *lim = p1 + numAvail;
for (; p2 != lim && *p2 == p2[dif]; p2++)
{}
@@ -1189,6 +1283,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
repLens[i] = len;
if (len > repLens[repMaxIndex])
repMaxIndex = i;
if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
break;
}
if (repLens[repMaxIndex] >= p->numFastBytes)
@@ -1201,10 +1297,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
matches = p->matches;
#define MATCHES matches
// #define MATCHES p->matches
if (mainLen >= p->numFastBytes)
{
p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
MOVE_POS(p, mainLen - 1)
return mainLen;
}
@@ -1298,13 +1396,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (len < 2)
len = 2;
else
while (len > matches[offs])
while (len > MATCHES[offs])
offs += 2;
for (; ; len++)
{
COptimal *opt;
UInt32 dist = matches[(size_t)offs + 1];
UInt32 dist = MATCHES[(size_t)offs + 1];
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
unsigned lenToPosState = GetLenToPosState(len);
@@ -1328,7 +1426,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
opt->extra = 0;
}
if (len == matches[offs])
if (len == MATCHES[offs])
{
offs += 2;
if (offs == numPairs)
@@ -1749,8 +1847,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (newLen > numAvail)
{
newLen = numAvail;
for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
matches[numPairs] = (UInt32)newLen;
for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
MATCHES[numPairs] = (UInt32)newLen;
numPairs += 2;
}
@@ -1769,9 +1867,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
offs = 0;
while (startLen > matches[offs])
while (startLen > MATCHES[offs])
offs += 2;
dist = matches[(size_t)offs + 1];
dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
@@ -1798,7 +1896,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
}
if (len == matches[offs])
if (len == MATCHES[offs])
{
// if (p->_maxMode) {
// MATCH : LIT : REP_0
@@ -1863,7 +1961,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
offs += 2;
if (offs == numPairs)
break;
dist = matches[(size_t)offs + 1];
dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
}
@@ -2081,8 +2179,23 @@ static SRes CheckErrors(CLzmaEnc *p)
return p->result;
if (p->rc.res != SZ_OK)
p->result = SZ_ERROR_WRITE;
if (p->matchFinderBase.result != SZ_OK)
#ifndef _7ZIP_ST
if (
// p->mf_Failure ||
(p->mtMode &&
( // p->matchFinderMt.failure_LZ_LZ ||
p->matchFinderMt.failure_LZ_BT))
)
{
p->result = MY_HRES_ERROR__INTERNAL_ERROR;
// printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
}
#endif
if (MFB.result != SZ_OK)
p->result = SZ_ERROR_READ;
if (p->result != SZ_OK)
p->finished = True;
return p->result;
@@ -2223,11 +2336,11 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
static void LzmaEnc_Construct(CLzmaEnc *p)
{
RangeEnc_Construct(&p->rc);
MatchFinder_Construct(&p->matchFinderBase);
MatchFinder_Construct(&MFB);
#ifndef _7ZIP_ST
p->matchFinderMt.MatchFinder = &MFB;
MatchFinderMt_Construct(&p->matchFinderMt);
p->matchFinderMt.MatchFinder = &p->matchFinderBase;
#endif
{
@@ -2243,7 +2356,6 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
LzmaEnc_InitPriceTables(p->ProbPrices);
p->litProbs = NULL;
p->saveState.litProbs = NULL;
}
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
@@ -2269,7 +2381,7 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
#endif
MatchFinder_Free(&p->matchFinderBase, allocBig);
MatchFinder_Free(&MFB, allocBig);
LzmaEnc_FreeLits(p, alloc);
RangeEnc_Free(&p->rc, alloc);
}
@@ -2287,6 +2399,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
UInt32 nowPos32, startPos32;
if (p->needInit)
{
#ifndef _7ZIP_ST
if (p->mtMode)
{
RINOK(MatchFinderMt_InitMt(&p->matchFinderMt));
}
#endif
p->matchFinder.Init(p->matchFinderObj);
p->needInit = 0;
}
@@ -2582,11 +2700,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
UInt32 beforeSize = kNumOpts;
UInt32 dictSize;
if (!RangeEnc_Alloc(&p->rc, alloc))
return SZ_ERROR_MEM;
#ifndef _7ZIP_ST
p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
#endif
{
@@ -2605,30 +2725,50 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
}
}
p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
if (beforeSize + p->dictSize < keepWindowSize)
beforeSize = keepWindowSize - p->dictSize;
dictSize = p->dictSize;
if (dictSize == ((UInt32)2 << 30) ||
dictSize == ((UInt32)3 << 30))
{
/* 21.03 : here we reduce the dictionary for 2 reasons:
1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
where data size is aligned for 1 GB: 5/6/8 GB.
That reducing must be >= 1 for such corner cases. */
dictSize -= 1;
}
if (beforeSize + dictSize < keepWindowSize)
beforeSize = keepWindowSize - dictSize;
/* in worst case we can look ahead for
max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
we send larger value for (keepAfter) to MantchFinder_Create():
(numFastBytes + LZMA_MATCH_LEN_MAX + 1)
*/
#ifndef _7ZIP_ST
if (p->mtMode)
{
RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
LZMA_MATCH_LEN_MAX
+ 1 /* 18.04 */
RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
, allocBig));
p->matchFinderObj = &p->matchFinderMt;
p->matchFinderBase.bigHash = (Byte)(
(p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
MFB.bigHash = (Byte)(
(p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
}
else
#endif
{
if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
, allocBig))
return SZ_ERROR_MEM;
p->matchFinderObj = &p->matchFinderBase;
MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
p->matchFinderObj = &MFB;
MatchFinder_CreateVTable(&MFB, &p->matchFinder);
}
return SZ_OK;
@@ -2700,6 +2840,8 @@ static void LzmaEnc_Init(CLzmaEnc *p)
p->pbMask = ((unsigned)1 << p->pb) - 1;
p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
// p->mf_Failure = False;
}
@@ -2742,7 +2884,7 @@ static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInS
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
p->matchFinderBase.stream = inStream;
MFB.stream = inStream;
p->needInit = 1;
p->rc.outStream = outStream;
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
@@ -2753,16 +2895,16 @@ SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
p->matchFinderBase.stream = inStream;
MFB.stream = inStream;
p->needInit = 1;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
}
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
{
p->matchFinderBase.directInput = 1;
p->matchFinderBase.bufferBase = (Byte *)src;
p->matchFinderBase.directInputRem = srcLen;
MFB.directInput = 1;
MFB.bufferBase = (Byte *)src;
MFB.directInputRem = srcLen;
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
@@ -2895,7 +3037,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
LzmaEnc_Finish(p);
/*
if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase))
if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
res = SZ_ERROR_FAIL;
}
*/
@@ -2914,29 +3056,37 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
unsigned i;
UInt32 dictSize = p->dictSize;
if (*size < LZMA_PROPS_SIZE)
return SZ_ERROR_PARAM;
*size = LZMA_PROPS_SIZE;
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
if (dictSize >= ((UInt32)1 << 22))
{
const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
dictSize = (dictSize + kDictMask) & ~kDictMask;
}
else for (i = 11; i <= 30; i++)
{
if (dictSize <= ((UInt32)2 << i)) { dictSize = ((UInt32)2 << i); break; }
if (dictSize <= ((UInt32)3 << i)) { dictSize = ((UInt32)3 << i); break; }
}
const CLzmaEnc *p = (const CLzmaEnc *)pp;
const UInt32 dictSize = p->dictSize;
UInt32 v;
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
// we write aligned dictionary value to properties for lzma decoder
if (dictSize >= ((UInt32)1 << 21))
{
const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
v = (dictSize + kDictMask) & ~kDictMask;
if (v < dictSize)
v = dictSize;
}
else
{
unsigned i = 11 * 2;
do
{
v = (UInt32)(2 + (i & 1)) << (i >> 1);
i++;
}
while (v < dictSize);
}
for (i = 0; i < 4; i++)
props[1 + i] = (Byte)(dictSize >> (8 * i));
return SZ_OK;
SetUi32(props + 1, v);
return SZ_OK;
}
}

View File

@@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder
2021-02-09 : Igor Pavlov : Public domain */
2021-07-12 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -495,12 +495,7 @@ SRes MtCoder_Code(CMtCoder *p)
{
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent));
if (Semaphore_IsCreated(&p->blocksSemaphore))
{
RINOK_THREAD(Semaphore_Close(&p->blocksSemaphore));
}
RINOK_THREAD(Semaphore_Create(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
}
for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++)

View File

@@ -1,5 +1,5 @@
/* Sha1.c -- SHA-1 Hash
2021-04-01 : Igor Pavlov : Public domain
2021-07-13 : Igor Pavlov : Public domain
This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -34,7 +34,7 @@ This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ l
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
#if _MSC_VER >= 1910
#if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
@@ -435,7 +435,37 @@ void Sha1Prepare()
#endif
{
// printf("\n========== HW SHA1 ======== \n");
f = f_hw = Sha1_UpdateBlocks_HW;
#if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER)
/* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037).
It generated incorrect SHA-1 code.
21.03 : we test sha1-hardware code at runtime initialization */
#pragma message("== SHA1 code: MSC compiler : failure-check code was inserted")
UInt32 state[5] = { 0, 1, 2, 3, 4 } ;
Byte data[64];
unsigned i;
for (i = 0; i < sizeof(data); i += 2)
{
data[i ] = (Byte)(i);
data[i + 1] = (Byte)(i + 1);
}
Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64);
if ( state[0] != 0x9acd7297
|| state[1] != 0x4624d898
|| state[2] != 0x0bf079f0
|| state[3] != 0x031e61b3
|| state[4] != 0x8323fe20)
{
// printf("\n========== SHA-1 hardware version failure ======== \n");
}
else
#endif
{
f = f_hw = Sha1_UpdateBlocks_HW;
}
}
g_FUNC_UPDATE_BLOCKS = f;
g_FUNC_UPDATE_BLOCKS_HW = f_hw;

View File

@@ -1,5 +1,5 @@
/* Threads.c -- multithreading library
2021-04-25 : Igor Pavlov : Public domain */
2021-07-12 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -150,6 +150,17 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
return HandleToWRes(*p);
}
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
// if (Semaphore_IsCreated(p))
{
WRes wres = Semaphore_Close(p);
if (wres != 0)
return wres;
}
return Semaphore_Create(p, initCount, maxCount);
}
static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
{ return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
@@ -158,7 +169,9 @@ WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
WRes CriticalSection_Init(CCriticalSection *p)
{
/* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */
/* InitializeCriticalSection() can raise exception:
Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
Windows Vista+ : no exceptions */
#ifdef _MSC_VER
__try
#endif
@@ -167,7 +180,7 @@ WRes CriticalSection_Init(CCriticalSection *p)
/* InitializeCriticalSectionAndSpinCount(p, 0); */
}
#ifdef _MSC_VER
__except (EXCEPTION_EXECUTE_HANDLER) { return 1; }
__except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
#endif
return 0;
}
@@ -406,6 +419,27 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
return 0;
}
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
if (Semaphore_IsCreated(p))
{
/*
WRes wres = Semaphore_Close(p);
if (wres != 0)
return wres;
*/
if (initCount > maxCount || maxCount < 1)
return EINVAL;
// return EINVAL; // for debug
p->_count = initCount;
p->_maxCount = maxCount;
return 0;
}
return Semaphore_Create(p, initCount, maxCount);
}
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
{
UInt32 newCount;

View File

@@ -1,5 +1,5 @@
/* Threads.h -- multithreading library
2021-04-25 : Igor Pavlov : Public domain */
2021-07-12 : Igor Pavlov : Public domain */
#ifndef __7Z_THREADS_H
#define __7Z_THREADS_H
@@ -8,14 +8,18 @@
#include <Windows.h>
#else
#if !defined(__APPLE__) && !defined(_AIX)
#if defined(__linux__)
#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef _7ZIP_AFFINITY_DISABLE
#define _7ZIP_AFFINITY_SUPPORTED
// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED")
// #define _GNU_SOURCE
#endif
#endif
#endif
#include <pthread.h>
#endif
#include "7zTypes.h"
@@ -122,6 +126,7 @@ typedef HANDLE CSemaphore;
#define Semaphore_Close(p) HandlePtr_Close(p)
#define Semaphore_Wait(p) Handle_WaitObject(*(p))
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
WRes Semaphore_Release1(CSemaphore *p);
@@ -172,6 +177,7 @@ typedef struct _CSemaphore
#define Semaphore_IsCreated(p) ((p)->_created)
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
WRes Semaphore_Wait(CSemaphore *p);

View File

@@ -136,6 +136,10 @@ SOURCE=..\..\LzFindMt.h
# End Source File
# Begin Source File
SOURCE=..\..\LzFindOpt.c
# End Source File
# Begin Source File
SOURCE=..\..\LzHash.h
# End Source File
# Begin Source File

View File

@@ -13,6 +13,7 @@ C_OBJS = \
$O\Alloc.obj \
$O\LzFind.obj \
$O\LzFindMt.obj \
$O\LzFindOpt.obj \
$O\LzmaDec.obj \
$O\LzmaEnc.obj \
$O\LzmaLib.obj \

View File

@@ -9,4 +9,3 @@ USE_ASM=1
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1

View File

@@ -9,4 +9,3 @@ USE_ASM=1
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1

View File

@@ -8,4 +8,3 @@ MY_ARCH=-m32
USE_ASM=1
CC=$(CROSS_COMPILE)gcc
CXX=$(CROSS_COMPILE)g++

View File

@@ -49,5 +49,3 @@ CFLAGS_WARN_GCC_PPMD_UNALIGNED = \
CFLAGS_WARN = $(CFLAGS_WARN_GCC_9) \
# $(CFLAGS_WARN_GCC_PPMD_UNALIGNED)