This commit is contained in:
Igor Pavlov
2021-07-22 23:00:14 +01:00
committed by Kornel
parent 4a960640a3
commit 585698650f
619 changed files with 34904 additions and 10859 deletions

181
Asm/arm64/7zAsm.S Normal file
View File

@@ -0,0 +1,181 @@
// 7zAsm.S -- ASM macros for arm64
// 2021-04-25 : Igor Pavlov : Public domain
#define r0 x0
#define r1 x1
#define r2 x2
#define r3 x3
#define r4 x4
#define r5 x5
#define r6 x6
#define r7 x7
#define r8 x8
#define r9 x9
#define r10 x10
#define r11 x11
#define r12 x12
#define r13 x13
#define r14 x14
#define r15 x15
#define r16 x16
#define r17 x17
#define r18 x18
#define r19 x19
#define r20 x20
#define r21 x21
#define r22 x22
#define r23 x23
#define r24 x24
#define r25 x25
#define r26 x26
#define r27 x27
#define r28 x28
#define r29 x29
#define r30 x30
#define REG_ABI_PARAM_0 r0
#define REG_ABI_PARAM_1 r1
#define REG_ABI_PARAM_2 r2
.macro p2_add reg:req, param:req
add \reg, \reg, \param
.endm
.macro p2_sub reg:req, param:req
sub \reg, \reg, \param
.endm
.macro p2_sub_s reg:req, param:req
subs \reg, \reg, \param
.endm
.macro p2_and reg:req, param:req
and \reg, \reg, \param
.endm
.macro xor reg:req, param:req
eor \reg, \reg, \param
.endm
.macro or reg:req, param:req
orr \reg, \reg, \param
.endm
.macro shl reg:req, param:req
lsl \reg, \reg, \param
.endm
.macro shr reg:req, param:req
lsr \reg, \reg, \param
.endm
.macro sar reg:req, param:req
asr \reg, \reg, \param
.endm
.macro p1_neg reg:req
neg \reg, \reg
.endm
.macro dec reg:req
sub \reg, \reg, 1
.endm
.macro dec_s reg:req
subs \reg, \reg, 1
.endm
.macro inc reg:req
add \reg, \reg, 1
.endm
.macro inc_s reg:req
adds \reg, \reg, 1
.endm
.macro imul reg:req, param:req
mul \reg, \reg, \param
.endm
/*
arm64 and arm use reverted c flag after subs/cmp instructions:
arm64-arm : x86
b.lo / b.cc : jb / jc
b.hs / b.cs : jae / jnc
*/
.macro jmp lab:req
b \lab
.endm
.macro je lab:req
b.eq \lab
.endm
.macro jz lab:req
b.eq \lab
.endm
.macro jnz lab:req
b.ne \lab
.endm
.macro jne lab:req
b.ne \lab
.endm
.macro jb lab:req
b.lo \lab
.endm
.macro jbe lab:req
b.ls \lab
.endm
.macro ja lab:req
b.hi \lab
.endm
.macro jae lab:req
b.hs \lab
.endm
.macro cmove dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, eq
.endm
.macro cmovne dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, ne
.endm
.macro cmovs dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, mi
.endm
.macro cmovns dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, pl
.endm
.macro cmovb dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, lo
.endm
.macro cmovae dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, hs
.endm
.macro MY_ALIGN_16 macro
.p2align 4,, (1 << 4) - 1
.endm
.macro MY_ALIGN_32 macro
.p2align 5,, (1 << 5) - 1
.endm
.macro MY_ALIGN_64 macro
.p2align 6,, (1 << 6) - 1
.endm

1487
Asm/arm64/LzmaDecOpt.S Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,32 @@
; 7zAsm.asm -- ASM macros
; 2018-02-03 : Igor Pavlov : Public domain
; 2021-02-07 : Igor Pavlov : Public domain
ifdef RAX
x64 equ 1
endif
ifdef x64
IS_X64 equ 1
else
IS_X64 equ 0
endif
ifdef ABI_LINUX
IS_LINUX equ 1
else
IS_LINUX equ 0
endif
ifndef x64
; Use ABI_CDECL for x86 (32-bit) only
; if ABI_CDECL is not defined, we use fastcall abi
ifdef ABI_CDECL
IS_CDECL equ 1
else
IS_CDECL equ 0
endif
endif
MY_ASM_START macro
ifdef x64
@@ -14,8 +41,12 @@ endm
MY_PROC macro name:req, numParams:req
align 16
proc_numParams = numParams
ifdef x64
if (IS_X64 gt 0)
proc_name equ name
elseif (IS_LINUX gt 0)
proc_name equ name
elseif (IS_CDECL gt 0)
proc_name equ @CatStr(_,name)
else
proc_name equ @CatStr(@,name,@, %numParams * 4)
endif
@@ -23,18 +54,19 @@ MY_PROC macro name:req, numParams:req
endm
MY_ENDP macro
ifdef x64
if (IS_X64 gt 0)
ret
else
if proc_numParams LT 3
elseif (IS_CDECL gt 0)
ret
elseif (proc_numParams LT 3)
ret
else
ret (proc_numParams - 2) * 4
endif
endif
proc_name ENDP
endm
ifdef x64
REG_SIZE equ 8
REG_LOGAR_SIZE equ 3
@@ -103,6 +135,24 @@ else
r7 equ x7
endif
ifdef x64
ifdef ABI_LINUX
MY_PUSH_2_REGS macro
push r3
push r5
endm
MY_POP_2_REGS macro
pop r5
pop r3
endm
endif
endif
MY_PUSH_4_REGS macro
push r3
push r5
@@ -118,17 +168,74 @@ MY_POP_4_REGS macro
endm
ifdef x64
; for WIN64-x64 ABI:
; for fastcall and for WIN-x64
REG_PARAM_0_x equ x1
REG_PARAM_0 equ r1
REG_PARAM_1 equ r2
ifndef x64
; for x86-fastcall
REG_ABI_PARAM_0_x equ REG_PARAM_0_x
REG_ABI_PARAM_0 equ REG_PARAM_0
REG_ABI_PARAM_1 equ REG_PARAM_1
else
; x64
if (IS_LINUX eq 0)
; for WIN-x64:
REG_PARAM_2 equ r8
REG_PARAM_3 equ r9
MY_PUSH_PRESERVED_REGS macro
REG_ABI_PARAM_0_x equ REG_PARAM_0_x
REG_ABI_PARAM_0 equ REG_PARAM_0
REG_ABI_PARAM_1 equ REG_PARAM_1
REG_ABI_PARAM_2 equ REG_PARAM_2
REG_ABI_PARAM_3 equ REG_PARAM_3
else
; for LINUX-x64:
REG_LINUX_PARAM_0_x equ x7
REG_LINUX_PARAM_0 equ r7
REG_LINUX_PARAM_1 equ r6
REG_LINUX_PARAM_2 equ r2
REG_LINUX_PARAM_3 equ r1
REG_ABI_PARAM_0_x equ REG_LINUX_PARAM_0_x
REG_ABI_PARAM_0 equ REG_LINUX_PARAM_0
REG_ABI_PARAM_1 equ REG_LINUX_PARAM_1
REG_ABI_PARAM_2 equ REG_LINUX_PARAM_2
REG_ABI_PARAM_3 equ REG_LINUX_PARAM_3
MY_ABI_LINUX_TO_WIN_2 macro
mov r2, r6
mov r1, r7
endm
MY_ABI_LINUX_TO_WIN_3 macro
mov r8, r2
mov r2, r6
mov r1, r7
endm
MY_ABI_LINUX_TO_WIN_4 macro
mov r9, r1
mov r8, r2
mov r2, r6
mov r1, r7
endm
endif ; IS_LINUX
MY_PUSH_PRESERVED_ABI_REGS macro
if (IS_LINUX gt 0)
MY_PUSH_2_REGS
else
MY_PUSH_4_REGS
endif
push r12
push r13
push r14
@@ -136,12 +243,16 @@ MY_PUSH_PRESERVED_REGS macro
endm
MY_POP_PRESERVED_REGS macro
MY_POP_PRESERVED_ABI_REGS macro
pop r15
pop r14
pop r13
pop r12
if (IS_LINUX gt 0)
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
endm
endif
endif ; x64

View File

@@ -1,5 +1,5 @@
; 7zCrcOpt.asm -- CRC32 calculation : optimized version
; 2009-12-12 : Igor Pavlov : Public domain
; 2021-02-07 : Igor Pavlov : Public domain
include 7zAsm.asm
@@ -7,21 +7,28 @@ MY_ASM_START
rD equ r2
rN equ r7
rT equ r5
ifdef x64
num_VAR equ r8
table_VAR equ r9
else
data_size equ (REG_SIZE * 5)
crc_table equ (REG_SIZE + data_size)
num_VAR equ [r4 + data_size]
table_VAR equ [r4 + crc_table]
if (IS_CDECL gt 0)
crc_OFFS equ (REG_SIZE * 5)
data_OFFS equ (REG_SIZE + crc_OFFS)
size_OFFS equ (REG_SIZE + data_OFFS)
else
size_OFFS equ (REG_SIZE * 5)
endif
table_OFFS equ (REG_SIZE + size_OFFS)
num_VAR equ [r4 + size_OFFS]
table_VAR equ [r4 + table_OFFS]
endif
SRCDAT equ rN + rD + 4 *
SRCDAT equ rD + rN * 1 + 4 *
CRC macro op:req, dest:req, src:req, t:req
op dest, DWORD PTR [r5 + src * 4 + 0400h * t]
op dest, DWORD PTR [rT + src * 4 + 0400h * t]
endm
CRC_XOR macro dest:req, src:req, t:req
@@ -43,11 +50,33 @@ CRC1b macro
endm
MY_PROLOG macro crc_end:req
MY_PUSH_4_REGS
mov x0, x1
ifdef x64
if (IS_LINUX gt 0)
MY_PUSH_2_REGS
mov x0, REG_ABI_PARAM_0_x ; x0 = x7
mov rT, REG_ABI_PARAM_3 ; r5 = r1
mov rN, REG_ABI_PARAM_2 ; r7 = r2
mov rD, REG_ABI_PARAM_1 ; r2 = r6
else
MY_PUSH_4_REGS
mov x0, REG_ABI_PARAM_0_x ; x0 = x1
mov rT, REG_ABI_PARAM_3 ; r5 = r9
mov rN, REG_ABI_PARAM_2 ; r7 = r8
; mov rD, REG_ABI_PARAM_1 ; r2 = r2
endif
else
MY_PUSH_4_REGS
if (IS_CDECL gt 0)
mov x0, [r4 + crc_OFFS]
mov rD, [r4 + data_OFFS]
else
mov x0, REG_ABI_PARAM_0_x
endif
mov rN, num_VAR
mov r5, table_VAR
mov rT, table_VAR
endif
test rN, rN
jz crc_end
@@:
@@ -77,7 +106,11 @@ MY_EPILOG macro crc_end:req
CRC1b
jmp crc_end
@@:
if (IS_X64 gt 0) and (IS_LINUX gt 0)
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
endm
MY_PROC CrcUpdateT8, 4

View File

@@ -1,237 +1,734 @@
; AesOpt.asm -- Intel's AES.
; 2009-12-12 : Igor Pavlov : Public domain
; AesOpt.asm -- AES optimized code for x86 AES hardware instructions
; 2021-03-10 : Igor Pavlov : Public domain
include 7zAsm.asm
ifdef ymm0
use_vaes_256 equ 1
ECHO "++ VAES 256"
else
ECHO "-- NO VAES 256"
endif
ifdef x64
ECHO "x86-64"
else
ECHO "x86"
if (IS_CDECL gt 0)
ECHO "ABI : CDECL"
else
ECHO "ABI : no CDECL : FASTCALL"
endif
endif
if (IS_LINUX gt 0)
ECHO "ABI : LINUX"
else
ECHO "ABI : WINDOWS"
endif
MY_ASM_START
ifndef x64
.686
.xmm
endif
ifdef x64
num equ r8
else
num equ [r4 + REG_SIZE * 4]
; MY_ALIGN EQU ALIGN(64)
MY_ALIGN EQU
SEG_ALIGN EQU MY_ALIGN
MY_SEG_PROC macro name:req, numParams:req
; seg_name equ @CatStr(_TEXT$, name)
; seg_name SEGMENT SEG_ALIGN 'CODE'
MY_PROC name, numParams
endm
MY_SEG_ENDP macro
; seg_name ENDS
endm
NUM_AES_KEYS_MAX equ 15
; the number of push operators in function PROLOG
if (IS_LINUX eq 0) or (IS_X64 eq 0)
num_regs_push equ 2
stack_param_offset equ (REG_SIZE * (1 + num_regs_push))
endif
rD equ r2
ifdef x64
num_param equ REG_ABI_PARAM_2
else
if (IS_CDECL gt 0)
; size_t size
; void * data
; UInt32 * aes
; ret-ip <- (r4)
aes_OFFS equ (stack_param_offset)
data_OFFS equ (REG_SIZE + aes_OFFS)
size_OFFS equ (REG_SIZE + data_OFFS)
num_param equ [r4 + size_OFFS]
else
num_param equ [r4 + stack_param_offset]
endif
endif
keys equ REG_PARAM_0 ; r1
rD equ REG_PARAM_1 ; r2
rN equ r0
MY_PROLOG macro reg:req
ifdef x64
movdqa [r4 + 8], xmm6
movdqa [r4 + 8 + 16], xmm7
endif
koffs_x equ x7
koffs_r equ r7
push r3
push r5
push r6
ksize_x equ x6
ksize_r equ r6
mov rN, num
mov x6, [r1 + 16]
shl x6, 5
keys2 equ r3
movdqa reg, [r1]
add r1, 32
endm
state equ xmm0
key equ xmm0
key_ymm equ ymm0
key_ymm_n equ 0
MY_EPILOG macro
pop r6
pop r5
pop r3
ifdef x64
ways = 11
else
ways = 4
endif
ifdef x64
movdqa xmm6, [r4 + 8]
movdqa xmm7, [r4 + 8 + 16]
endif
ways_start_reg equ 1
MY_ENDP
endm
iv equ @CatStr(xmm, %(ways_start_reg + ways))
iv_ymm equ @CatStr(ymm, %(ways_start_reg + ways))
ways equ 4
ways16 equ (ways * 16)
OP_W macro op, op2
WOP macro op, op2
i = 0
rept ways
op @CatStr(xmm,%i), op2
op @CatStr(xmm, %(ways_start_reg + i)), op2
i = i + 1
endm
endm
LOAD_OP macro op:req, offs:req
op xmm0, [r1 + r3 offs]
ifndef ABI_LINUX
ifdef x64
; we use 32 bytes of home space in stack in WIN64-x64
NUM_HOME_MM_REGS equ (32 / 16)
; we preserve xmm registers starting from xmm6 in WIN64-x64
MM_START_SAVE_REG equ 6
SAVE_XMM macro num_used_mm_regs:req
num_save_mm_regs = num_used_mm_regs - MM_START_SAVE_REG
if num_save_mm_regs GT 0
num_save_mm_regs2 = num_save_mm_regs - NUM_HOME_MM_REGS
; RSP is (16*x + 8) after entering the function in WIN64-x64
stack_offset = 16 * num_save_mm_regs2 + (stack_param_offset mod 16)
i = 0
rept num_save_mm_regs
if i eq NUM_HOME_MM_REGS
sub r4, stack_offset
endif
if i lt NUM_HOME_MM_REGS
movdqa [r4 + stack_param_offset + i * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
else
movdqa [r4 + (i - NUM_HOME_MM_REGS) * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
endif
i = i + 1
endm
endif
endm
LOAD_OP_W macro op:req, offs:req
movdqa xmm7, [r1 + r3 offs]
OP_W op, xmm7
RESTORE_XMM macro num_used_mm_regs:req
if num_save_mm_regs GT 0
i = 0
if num_save_mm_regs2 GT 0
rept num_save_mm_regs2
movdqa @CatStr(xmm, %(MM_START_SAVE_REG + NUM_HOME_MM_REGS + i)), [r4 + i * 16]
i = i + 1
endm
add r4, stack_offset
endif
num_low_regs = num_save_mm_regs - i
i = 0
rept num_low_regs
movdqa @CatStr(xmm, %(MM_START_SAVE_REG + i)), [r4 + stack_param_offset + i * 16]
i = i + 1
endm
endif
endm
endif ; x64
endif ; ABI_LINUX
MY_PROLOG macro num_used_mm_regs:req
; num_regs_push: must be equal to the number of push operators
; push r3
; push r5
if (IS_LINUX eq 0) or (IS_X64 eq 0)
push r6
push r7
endif
mov rN, num_param ; don't move it; num_param can use stack pointer (r4)
if (IS_X64 eq 0)
if (IS_CDECL gt 0)
mov rD, [r4 + data_OFFS]
mov keys, [r4 + aes_OFFS]
endif
elseif (IS_LINUX gt 0)
MY_ABI_LINUX_TO_WIN_2
endif
ifndef ABI_LINUX
ifdef x64
SAVE_XMM num_used_mm_regs
endif
endif
mov ksize_x, [keys + 16]
shl ksize_x, 5
endm
MY_EPILOG macro
ifndef ABI_LINUX
ifdef x64
RESTORE_XMM num_save_mm_regs
endif
endif
if (IS_LINUX eq 0) or (IS_X64 eq 0)
pop r7
pop r6
endif
; pop r5
; pop r3
MY_ENDP
endm
OP_KEY macro op:req, offs:req
op state, [keys + offs]
endm
WOP_KEY macro op:req, offs:req
movdqa key, [keys + offs]
WOP op, key
endm
; ---------- AES-CBC Decode ----------
CBC_DEC_UPDATE macro reg, offs
pxor reg, xmm6
movdqa xmm6, [rD + offs]
movdqa [rD + offs], reg
XOR_WITH_DATA macro reg, _ppp_
pxor reg, [rD + i * 16]
endm
DECODE macro op:req
op aesdec, +16
@@:
op aesdec, +0
op aesdec, -16
sub x3, 32
jnz @B
op aesdeclast, +0
WRITE_TO_DATA macro reg, _ppp_
movdqa [rD + i * 16], reg
endm
MY_PROC AesCbc_Decode_Intel, 3
MY_PROLOG xmm6
sub x6, 32
; state0 equ @CatStr(xmm, %(ways_start_reg))
key0 equ @CatStr(xmm, %(ways_start_reg + ways + 1))
key0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1))
key_last equ @CatStr(xmm, %(ways_start_reg + ways + 2))
key_last_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2))
key_last_ymm_n equ (ways_start_reg + ways + 2)
NUM_CBC_REGS equ (ways_start_reg + ways + 3)
MY_SEG_PROC AesCbc_Decode_HW, 3
AesCbc_Decode_HW_start::
MY_PROLOG NUM_CBC_REGS
AesCbc_Decode_HW_start_2::
movdqa iv, [keys]
add keys, 32
movdqa key0, [keys + 1 * ksize_r]
movdqa key_last, [keys]
sub ksize_x, 16
jmp check2
align 16
nextBlocks2:
mov x3, x6
OP_W movdqa, [rD + i * 16]
LOAD_OP_W pxor, +32
DECODE LOAD_OP_W
OP_W CBC_DEC_UPDATE, i * 16
add rD, ways16
WOP movdqa, [rD + i * 16]
mov koffs_x, ksize_x
; WOP_KEY pxor, ksize_r + 16
WOP pxor, key0
; align 16
@@:
WOP_KEY aesdec, 1 * koffs_r
sub koffs_r, 16
jnz @B
; WOP_KEY aesdeclast, 0
WOP aesdeclast, key_last
pxor @CatStr(xmm, %(ways_start_reg)), iv
i = 1
rept ways - 1
pxor @CatStr(xmm, %(ways_start_reg + i)), [rD + i * 16 - 16]
i = i + 1
endm
movdqa iv, [rD + ways * 16 - 16]
WOP WRITE_TO_DATA
add rD, ways * 16
AesCbc_Decode_HW_start_3::
check2:
sub rN, ways
jnc nextBlocks2
add rN, ways
jmp check
sub ksize_x, 16
jmp check
nextBlock:
mov x3, x6
movdqa xmm1, [rD]
LOAD_OP movdqa, +32
pxor xmm0, xmm1
DECODE LOAD_OP
pxor xmm0, xmm6
movdqa [rD], xmm0
movdqa xmm6, xmm1
movdqa state, [rD]
mov koffs_x, ksize_x
; OP_KEY pxor, 1 * ksize_r + 32
pxor state, key0
; movdqa state0, [rD]
; movdqa state, key0
; pxor state, state0
@@:
OP_KEY aesdec, 1 * koffs_r + 16
OP_KEY aesdec, 1 * koffs_r
sub koffs_r, 32
jnz @B
OP_KEY aesdec, 16
; OP_KEY aesdeclast, 0
aesdeclast state, key_last
pxor state, iv
movdqa iv, [rD]
; movdqa iv, state0
movdqa [rD], state
add rD, 16
check:
sub rN, 1
jnc nextBlock
movdqa [r1 - 32], xmm6
MY_EPILOG
movdqa [keys - 32], iv
MY_EPILOG
; ---------- AVX ----------
AVX__WOP_n macro op
i = 0
rept ways
op (ways_start_reg + i)
i = i + 1
endm
endm
AVX__WOP macro op
i = 0
rept ways
op @CatStr(ymm, %(ways_start_reg + i))
i = i + 1
endm
endm
AVX__WOP_KEY macro op:req, offs:req
vmovdqa key_ymm, ymmword ptr [keys2 + offs]
AVX__WOP_n op
endm
AVX__CBC_START macro reg
; vpxor reg, key_ymm, ymmword ptr [rD + 32 * i]
vpxor reg, key0_ymm, ymmword ptr [rD + 32 * i]
endm
AVX__CBC_END macro reg
if i eq 0
vpxor reg, reg, iv_ymm
else
vpxor reg, reg, ymmword ptr [rD + i * 32 - 16]
endif
endm
AVX__WRITE_TO_DATA macro reg
vmovdqu ymmword ptr [rD + 32 * i], reg
endm
AVX__XOR_WITH_DATA macro reg
vpxor reg, reg, ymmword ptr [rD + 32 * i]
endm
AVX__CTR_START macro reg
vpaddq iv_ymm, iv_ymm, one_ymm
; vpxor reg, iv_ymm, key_ymm
vpxor reg, iv_ymm, key0_ymm
endm
MY_VAES_INSTR_2 macro cmd, dest, a1, a2
db 0c4H
db 2 + 040H + 020h * (1 - (a2) / 8) + 080h * (1 - (dest) / 8)
db 5 + 8 * ((not (a1)) and 15)
db cmd
db 0c0H + 8 * ((dest) and 7) + ((a2) and 7)
endm
MY_VAES_INSTR macro cmd, dest, a
MY_VAES_INSTR_2 cmd, dest, dest, a
endm
MY_vaesenc macro dest, a
MY_VAES_INSTR 0dcH, dest, a
endm
MY_vaesenclast macro dest, a
MY_VAES_INSTR 0ddH, dest, a
endm
MY_vaesdec macro dest, a
MY_VAES_INSTR 0deH, dest, a
endm
MY_vaesdeclast macro dest, a
MY_VAES_INSTR 0dfH, dest, a
endm
AVX__VAES_DEC macro reg
MY_vaesdec reg, key_ymm_n
endm
AVX__VAES_DEC_LAST_key_last macro reg
; MY_vaesdeclast reg, key_ymm_n
MY_vaesdeclast reg, key_last_ymm_n
endm
AVX__VAES_ENC macro reg
MY_vaesenc reg, key_ymm_n
endm
AVX__VAES_ENC_LAST macro reg
MY_vaesenclast reg, key_ymm_n
endm
AVX__vinserti128_TO_HIGH macro dest, src
vinserti128 dest, dest, src, 1
endm
MY_PROC AesCbc_Decode_HW_256, 3
ifdef use_vaes_256
MY_PROLOG NUM_CBC_REGS
cmp rN, ways * 2
jb AesCbc_Decode_HW_start_2
vmovdqa iv, xmmword ptr [keys]
add keys, 32
vbroadcasti128 key0_ymm, xmmword ptr [keys + 1 * ksize_r]
vbroadcasti128 key_last_ymm, xmmword ptr [keys]
sub ksize_x, 16
mov koffs_x, ksize_x
add ksize_x, ksize_x
AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 2) * 32)
push keys2
sub r4, AVX_STACK_SUB
; sub r4, 32
; sub r4, ksize_r
; lea keys2, [r4 + 32]
mov keys2, r4
and keys2, -32
broad:
vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r]
vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm
sub koffs_r, 16
; jnc broad
jnz broad
sub rN, ways * 2
align 16
avx_cbcdec_nextBlock2:
mov koffs_x, ksize_x
; AVX__WOP_KEY AVX__CBC_START, 1 * koffs_r + 32
AVX__WOP AVX__CBC_START
@@:
AVX__WOP_KEY AVX__VAES_DEC, 1 * koffs_r
sub koffs_r, 32
jnz @B
; AVX__WOP_KEY AVX__VAES_DEC_LAST, 0
AVX__WOP_n AVX__VAES_DEC_LAST_key_last
AVX__vinserti128_TO_HIGH iv_ymm, xmmword ptr [rD]
AVX__WOP AVX__CBC_END
vmovdqa iv, xmmword ptr [rD + ways * 32 - 16]
AVX__WOP AVX__WRITE_TO_DATA
add rD, ways * 32
sub rN, ways * 2
jnc avx_cbcdec_nextBlock2
add rN, ways * 2
shr ksize_x, 1
; lea r4, [r4 + 1 * ksize_r + 32]
add r4, AVX_STACK_SUB
pop keys2
vzeroupper
jmp AesCbc_Decode_HW_start_3
else
jmp AesCbc_Decode_HW_start
endif
MY_ENDP
MY_SEG_ENDP
; ---------- AES-CBC Encode ----------
ENCODE macro op:req
op aesenc, -16
@@:
op aesenc, +0
op aesenc, +16
add r3, 32
jnz @B
op aesenclast, +0
endm
e0 equ xmm1
MY_PROC AesCbc_Encode_Intel, 3
MY_PROLOG xmm0
CENC_START_KEY equ 2
CENC_NUM_REG_KEYS equ (3 * 2)
; last_key equ @CatStr(xmm, %(CENC_START_KEY + CENC_NUM_REG_KEYS))
add r1, r6
neg r6
add r6, 32
MY_SEG_PROC AesCbc_Encode_HW, 3
MY_PROLOG (CENC_START_KEY + CENC_NUM_REG_KEYS + 0)
movdqa state, [keys]
add keys, 32
i = 0
rept CENC_NUM_REG_KEYS
movdqa @CatStr(xmm, %(CENC_START_KEY + i)), [keys + i * 16]
i = i + 1
endm
add keys, ksize_r
neg ksize_r
add ksize_r, (16 * CENC_NUM_REG_KEYS)
; movdqa last_key, [keys]
jmp check_e
align 16
nextBlock_e:
mov r3, r6
pxor xmm0, [rD]
pxor xmm0, [r1 + r3 - 32]
ENCODE LOAD_OP
movdqa [rD], xmm0
movdqa e0, [rD]
mov koffs_r, ksize_r
pxor e0, @CatStr(xmm, %(CENC_START_KEY))
pxor state, e0
i = 1
rept (CENC_NUM_REG_KEYS - 1)
aesenc state, @CatStr(xmm, %(CENC_START_KEY + i))
i = i + 1
endm
@@:
OP_KEY aesenc, 1 * koffs_r
OP_KEY aesenc, 1 * koffs_r + 16
add koffs_r, 32
jnz @B
OP_KEY aesenclast, 0
; aesenclast state, last_key
movdqa [rD], state
add rD, 16
check_e:
sub rN, 1
jnc nextBlock_e
movdqa [r1 + r6 - 64], xmm0
MY_EPILOG
; movdqa [keys - 32], state
movdqa [keys + 1 * ksize_r - (16 * CENC_NUM_REG_KEYS) - 32], state
MY_EPILOG
MY_SEG_ENDP
; ---------- AES-CTR ----------
XOR_UPD_1 macro reg, offs
pxor reg, [rD + offs]
ifdef x64
; ways = 11
endif
one equ @CatStr(xmm, %(ways_start_reg + ways + 1))
one_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1))
key0 equ @CatStr(xmm, %(ways_start_reg + ways + 2))
key0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2))
NUM_CTR_REGS equ (ways_start_reg + ways + 3)
INIT_CTR macro reg, _ppp_
paddq iv, one
movdqa reg, iv
endm
XOR_UPD_2 macro reg, offs
movdqa [rD + offs], reg
endm
MY_PROC AesCtr_Code_Intel, 3
MY_PROLOG xmm6
MY_SEG_PROC AesCtr_Code_HW, 3
Ctr_start::
MY_PROLOG NUM_CTR_REGS
mov r5, r4
shr r5, 4
dec r5
shl r5, 4
Ctr_start_2::
movdqa iv, [keys]
add keys, 32
movdqa key0, [keys]
mov DWORD PTR [r5], 1
mov DWORD PTR [r5 + 4], 0
mov DWORD PTR [r5 + 8], 0
mov DWORD PTR [r5 + 12], 0
add r1, r6
neg r6
add r6, 32
add keys, ksize_r
neg ksize_r
add ksize_r, 16
Ctr_start_3::
mov koffs_x, 1
movd one, koffs_x
jmp check2_c
align 16
nextBlocks2_c:
movdqa xmm7, [r5]
WOP INIT_CTR, 0
mov koffs_r, ksize_r
; WOP_KEY pxor, 1 * koffs_r -16
WOP pxor, key0
@@:
WOP_KEY aesenc, 1 * koffs_r
add koffs_r, 16
jnz @B
WOP_KEY aesenclast, 0
i = 0
rept ways
paddq xmm6, xmm7
movdqa @CatStr(xmm,%i), xmm6
i = i + 1
endm
mov r3, r6
LOAD_OP_W pxor, -32
ENCODE LOAD_OP_W
OP_W XOR_UPD_1, i * 16
OP_W XOR_UPD_2, i * 16
add rD, ways16
WOP XOR_WITH_DATA
WOP WRITE_TO_DATA
add rD, ways * 16
check2_c:
sub rN, ways
jnc nextBlocks2_c
add rN, ways
sub keys, 16
add ksize_r, 16
jmp check_c
; align 16
nextBlock_c:
paddq xmm6, [r5]
mov r3, r6
movdqa xmm0, [r1 + r3 - 32]
pxor xmm0, xmm6
ENCODE LOAD_OP
XOR_UPD_1 xmm0, 0
XOR_UPD_2 xmm0, 0
paddq iv, one
; movdqa state, [keys + 1 * koffs_r - 16]
movdqa state, key0
mov koffs_r, ksize_r
pxor state, iv
@@:
OP_KEY aesenc, 1 * koffs_r
OP_KEY aesenc, 1 * koffs_r + 16
add koffs_r, 32
jnz @B
OP_KEY aesenc, 0
OP_KEY aesenclast, 16
pxor state, [rD]
movdqa [rD], state
add rD, 16
check_c:
sub rN, 1
jnc nextBlock_c
movdqa [r1 + r6 - 64], xmm6
MY_EPILOG
; movdqa [keys - 32], iv
movdqa [keys + 1 * ksize_r - 16 - 32], iv
MY_EPILOG
MY_PROC AesCtr_Code_HW_256, 3
ifdef use_vaes_256
MY_PROLOG NUM_CTR_REGS
cmp rN, ways * 2
jb Ctr_start_2
vbroadcasti128 iv_ymm, xmmword ptr [keys]
add keys, 32
vbroadcasti128 key0_ymm, xmmword ptr [keys]
mov koffs_x, 1
vmovd one, koffs_x
vpsubq iv_ymm, iv_ymm, one_ymm
vpaddq one, one, one
AVX__vinserti128_TO_HIGH one_ymm, one
add keys, ksize_r
sub ksize_x, 16
neg ksize_r
mov koffs_r, ksize_r
add ksize_r, ksize_r
AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 1) * 32)
push keys2
lea keys2, [r4 - 32]
sub r4, AVX_STACK_SUB
and keys2, -32
vbroadcasti128 key_ymm, xmmword ptr [keys]
vmovdqa ymmword ptr [keys2], key_ymm
@@:
vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r]
vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm
add koffs_r, 16
jnz @B
sub rN, ways * 2
align 16
avx_ctr_nextBlock2:
mov koffs_r, ksize_r
AVX__WOP AVX__CTR_START
; AVX__WOP_KEY AVX__CTR_START, 1 * koffs_r - 32
@@:
AVX__WOP_KEY AVX__VAES_ENC, 1 * koffs_r
add koffs_r, 32
jnz @B
AVX__WOP_KEY AVX__VAES_ENC_LAST, 0
AVX__WOP AVX__XOR_WITH_DATA
AVX__WOP AVX__WRITE_TO_DATA
add rD, ways * 32
sub rN, ways * 2
jnc avx_ctr_nextBlock2
add rN, ways * 2
vextracti128 iv, iv_ymm, 1
sar ksize_r, 1
add r4, AVX_STACK_SUB
pop keys2
vzeroupper
jmp Ctr_start_3
else
jmp Ctr_start
endif
MY_ENDP
MY_SEG_ENDP
end

View File

@@ -1,5 +1,5 @@
; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
; 2018-02-06: Igor Pavlov : Public domain
; 2021-02-23: Igor Pavlov : Public domain
;
; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
; function for check at link time.
@@ -62,6 +62,7 @@ PMULT equ (1 SHL PSHIFT)
PMULT_HALF equ (1 SHL (PSHIFT - 1))
PMULT_2 equ (1 SHL (PSHIFT + 1))
kMatchSpecLen_Error_Data equ (1 SHL 9)
; x0 range
; x1 pbPos / (prob) TREE
@@ -416,7 +417,7 @@ REV_1_VAR macro prob:req
NORM_CALC prob
cmovae range, t0
lea t0_R, [sym_R + sym2_R]
lea t0_R, [sym_R + 1 * sym2_R]
cmovae sym_R, t0_R
mov t0, kBitModelOffset
cmovb cod, t1
@@ -583,7 +584,7 @@ IsMatchBranch_Pre macro reg
mov pbPos, LOC pbMask
and pbPos, processedPos
shl pbPos, (kLenNumLowBits + 1 + PSHIFT)
lea probs_state_R, [probs + state_R]
lea probs_state_R, [probs + 1 * state_R]
endm
@@ -605,13 +606,13 @@ endm
; RSP is (16x + 8) bytes aligned in WIN64-x64
; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)
PARAM_lzma equ REG_PARAM_0
PARAM_limit equ REG_PARAM_1
PARAM_bufLimit equ REG_PARAM_2
PARAM_lzma equ REG_ABI_PARAM_0
PARAM_limit equ REG_ABI_PARAM_1
PARAM_bufLimit equ REG_ABI_PARAM_2
; MY_ALIGN_64
MY_PROC LzmaDec_DecodeReal_3, 3
MY_PUSH_PRESERVED_REGS
MY_PUSH_PRESERVED_ABI_REGS
lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]
and r0, -128
@@ -777,7 +778,7 @@ len8_loop:
jb len8_loop
mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen
jmp len_mid_2
jmp short len_mid_2 ; we use short here for MASM that doesn't optimize that code as another assembler programs
MY_ALIGN_32
len_mid_0:
@@ -890,11 +891,16 @@ decode_dist_end:
; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
mov t1, LOC rep0
mov x1, LOC rep1
mov x2, LOC rep2
mov t0, LOC checkDicSize
test t0, t0
cmove t0, processedPos
cmp sym, t0
jae end_of_payload
; jmp end_of_payload ; for debug
; rep3 = rep2;
; rep2 = rep1;
@@ -902,15 +908,12 @@ decode_dist_end:
; rep0 = distance + 1;
inc sym
mov t0, LOC rep0
mov t1, LOC rep1
mov x1, LOC rep2
mov LOC rep0, sym
; mov sym, LOC remainLen
mov sym, len_temp
mov LOC rep1, t0
mov LOC rep2, t1
mov LOC rep3, x1
mov LOC rep1, t1
mov LOC rep2, x1
mov LOC rep3, x2
; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
cmp state, (kNumStates + kNumLitStates) * PMULT
@@ -932,7 +935,7 @@ copy_match:
; }
mov cnt_R, LOC limit
sub cnt_R, dicPos
jz fin_ERROR
jz fin_dicPos_LIMIT
; curLen = ((rem < len) ? (unsigned)rem : len);
cmp cnt_R, sym_R
@@ -1091,11 +1094,23 @@ IsRep0Short_label:
sub t0_R, dic
sub probs, RepLenCoder * PMULT
inc processedPos
; state = state < kNumLitStates ? 9 : 11;
or state, 1 * PMULT
; the caller doesn't allow (dicPos >= limit) case for REP_SHORT
; so we don't need the following (dicPos == limit) check here:
; cmp dicPos, LOC limit
; jae fin_dicPos_LIMIT_REP_SHORT
inc processedPos
IsMatchBranch_Pre
; xor sym, sym
; sub t0_R, probBranch_R
; cmovb sym_R, LOC dicBufSize
; add t0_R, sym_R
sub t0_R, probBranch_R
jae @f
add t0_R, LOC dicBufSize
@@ -1210,15 +1225,45 @@ copy_match_cross:
fin_ERROR:
; fin_dicPos_LIMIT_REP_SHORT:
; mov sym, 1
fin_dicPos_LIMIT:
mov LOC remainLen, sym
jmp fin_OK
; For more strict mode we can stop decoding with error
; mov sym, 1
; jmp fin
fin_ERROR_MATCH_DIST:
; rep3 = rep2;
; rep2 = rep1;
; rep1 = rep0;
; rep0 = distance + 1;
add len_temp, kMatchSpecLen_Error_Data
mov LOC remainLen, len_temp
; fin_ERROR_2:
mov LOC rep0, sym
mov LOC rep1, t1
mov LOC rep2, x1
mov LOC rep3, x2
; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
cmp state, (kNumStates + kNumLitStates) * PMULT
mov state, kNumLitStates * PMULT
mov t0, (kNumLitStates + 3) * PMULT
cmovae state, t0
; jmp fin_OK
mov sym, 1
jmp fin
end_of_payload:
cmp sym, 0FFFFFFFFh ; -1
jne fin_ERROR
inc sym
jnz fin_ERROR_MATCH_DIST
mov LOC remainLen, kMatchSpecLenStart
sub state, kNumStates * PMULT
@@ -1250,7 +1295,7 @@ fin:
mov RSP, LOC Old_RSP
MY_POP_PRESERVED_REGS
MY_POP_PRESERVED_ABI_REGS
MY_ENDP
_TEXT$LZMADECOPT ENDS

263
Asm/x86/Sha1Opt.asm Normal file
View File

@@ -0,0 +1,263 @@
; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
; 2021-03-10 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
CONST SEGMENT
align 16
Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
CONST ENDS
; _TEXT$SHA1OPT SEGMENT 'CODE'
ifndef x64
.686
.xmm
endif
ifdef x64
rNum equ REG_ABI_PARAM_2
if (IS_LINUX eq 0)
LOCAL_SIZE equ (16 * 2)
endif
else
rNum equ r0
LOCAL_SIZE equ (16 * 1)
endif
rState equ REG_ABI_PARAM_0
rData equ REG_ABI_PARAM_1
MY_sha1rnds4 macro a1, a2, imm
db 0fH, 03aH, 0ccH, (0c0H + a1 * 8 + a2), imm
endm
MY_SHA_INSTR macro cmd, a1, a2
db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
endm
cmd_sha1nexte equ 0c8H
cmd_sha1msg1 equ 0c9H
cmd_sha1msg2 equ 0caH
MY_sha1nexte macro a1, a2
MY_SHA_INSTR cmd_sha1nexte, a1, a2
endm
MY_sha1msg1 macro a1, a2
MY_SHA_INSTR cmd_sha1msg1, a1, a2
endm
MY_sha1msg2 macro a1, a2
MY_SHA_INSTR cmd_sha1msg2, a1, a2
endm
MY_PROLOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa [r4 + 8], xmm6
movdqa [r4 + 8 + 16], xmm7
sub r4, LOCAL_SIZE + 8
movdqa [r4 ], xmm8
movdqa [r4 + 16], xmm9
endif
else ; x86
if (IS_CDECL gt 0)
mov rState, [r4 + REG_SIZE * 1]
mov rData, [r4 + REG_SIZE * 2]
mov rNum, [r4 + REG_SIZE * 3]
else ; fastcall
mov rNum, [r4 + REG_SIZE * 1]
endif
push r5
mov r5, r4
and r4, -16
sub r4, LOCAL_SIZE
endif
endm
MY_EPILOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa xmm8, [r4]
movdqa xmm9, [r4 + 16]
add r4, LOCAL_SIZE + 8
movdqa xmm6, [r4 + 8]
movdqa xmm7, [r4 + 8 + 16]
endif
else ; x86
mov r4, r5
pop r5
endif
MY_ENDP
endm
e0_N equ 0
e1_N equ 1
abcd_N equ 2
e0_save_N equ 3
w_regs equ 4
e0 equ @CatStr(xmm, %e0_N)
e1 equ @CatStr(xmm, %e1_N)
abcd equ @CatStr(xmm, %abcd_N)
e0_save equ @CatStr(xmm, %e0_save_N)
ifdef x64
abcd_save equ xmm8
mask2 equ xmm9
else
abcd_save equ [r4]
mask2 equ e1
endif
LOAD_MASK macro
movdqa mask2, XMMWORD PTR Reverse_Endian_Mask
endm
LOAD_W macro k:req
movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
pshufb @CatStr(xmm, %(w_regs + k)), mask2
endm
; pre2 can be 2 or 3 (recommended)
pre2 equ 3
pre1 equ (pre2 + 1)
NUM_ROUNDS4 equ 20
RND4 macro k
movdqa @CatStr(xmm, %(e0_N + ((k + 1) mod 2))), abcd
MY_sha1rnds4 abcd_N, (e0_N + (k mod 2)), k / 5
nextM = (w_regs + ((k + 1) mod 4))
if (k EQ NUM_ROUNDS4 - 1)
nextM = e0_save_N
endif
MY_sha1nexte (e0_N + ((k + 1) mod 2)), nextM
if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
pxor @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4)))
endif
if (k GE (4 - pre1)) AND (k LT (NUM_ROUNDS4 - pre1))
MY_sha1msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
endif
if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
MY_sha1msg2 (w_regs + ((k + pre2) mod 4)), (w_regs + ((k + pre2 - 1) mod 4))
endif
endm
REVERSE_STATE macro
; abcd ; dcba
; e0 ; 000e
pshufd abcd, abcd, 01bH ; abcd
pshufd e0, e0, 01bH ; e000
endm
MY_PROC Sha1_UpdateBlocks_HW, 3
MY_PROLOG
cmp rNum, 0
je end_c
movdqu abcd, [rState] ; dcba
movd e0, dword ptr [rState + 16] ; 000e
REVERSE_STATE
ifdef x64
LOAD_MASK
endif
align 16
nextBlock:
movdqa abcd_save, abcd
movdqa e0_save, e0
ifndef x64
LOAD_MASK
endif
LOAD_W 0
LOAD_W 1
LOAD_W 2
LOAD_W 3
paddd e0, @CatStr(xmm, %(w_regs))
k = 0
rept NUM_ROUNDS4
RND4 k
k = k + 1
endm
paddd abcd, abcd_save
add rData, 64
sub rNum, 1
jnz nextBlock
REVERSE_STATE
movdqu [rState], abcd
movd dword ptr [rState + 16], e0
end_c:
MY_EPILOG
; _TEXT$SHA1OPT ENDS
end

263
Asm/x86/Sha256Opt.asm Normal file
View File

@@ -0,0 +1,263 @@
; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
; 2021-03-10 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
; .data
; public K
; we can use external SHA256_K_ARRAY defined in Sha256.c
; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes
COMMENT @
ifdef x64
K_CONST equ SHA256_K_ARRAY
else
K_CONST equ _SHA256_K_ARRAY
endif
EXTRN K_CONST:xmmword
@
CONST SEGMENT
align 16
Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
; COMMENT @
align 16
K_CONST \
DD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H
DD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H
DD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H
DD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H
DD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH
DD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH
DD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H
DD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H
DD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H
DD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H
DD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H
DD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H
DD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H
DD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H
DD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H
DD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H
; @
CONST ENDS
; _TEXT$SHA256OPT SEGMENT 'CODE'
ifndef x64
.686
.xmm
endif
ifdef x64
rNum equ REG_ABI_PARAM_2
if (IS_LINUX eq 0)
LOCAL_SIZE equ (16 * 2)
endif
else
rNum equ r0
LOCAL_SIZE equ (16 * 1)
endif
rState equ REG_ABI_PARAM_0
rData equ REG_ABI_PARAM_1
MY_SHA_INSTR macro cmd, a1, a2
db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
endm
cmd_sha256rnds2 equ 0cbH
cmd_sha256msg1 equ 0ccH
cmd_sha256msg2 equ 0cdH
MY_sha256rnds2 macro a1, a2
MY_SHA_INSTR cmd_sha256rnds2, a1, a2
endm
MY_sha256msg1 macro a1, a2
MY_SHA_INSTR cmd_sha256msg1, a1, a2
endm
MY_sha256msg2 macro a1, a2
MY_SHA_INSTR cmd_sha256msg2, a1, a2
endm
MY_PROLOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa [r4 + 8], xmm6
movdqa [r4 + 8 + 16], xmm7
sub r4, LOCAL_SIZE + 8
movdqa [r4 ], xmm8
movdqa [r4 + 16], xmm9
endif
else ; x86
if (IS_CDECL gt 0)
mov rState, [r4 + REG_SIZE * 1]
mov rData, [r4 + REG_SIZE * 2]
mov rNum, [r4 + REG_SIZE * 3]
else ; fastcall
mov rNum, [r4 + REG_SIZE * 1]
endif
push r5
mov r5, r4
and r4, -16
sub r4, LOCAL_SIZE
endif
endm
MY_EPILOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa xmm8, [r4]
movdqa xmm9, [r4 + 16]
add r4, LOCAL_SIZE + 8
movdqa xmm6, [r4 + 8]
movdqa xmm7, [r4 + 8 + 16]
endif
else ; x86
mov r4, r5
pop r5
endif
MY_ENDP
endm
msg equ xmm0
tmp equ xmm0
state0_N equ 2
state1_N equ 3
w_regs equ 4
state1_save equ xmm1
state0 equ @CatStr(xmm, %state0_N)
state1 equ @CatStr(xmm, %state1_N)
ifdef x64
state0_save equ xmm8
mask2 equ xmm9
else
state0_save equ [r4]
mask2 equ xmm0
endif
LOAD_MASK macro
movdqa mask2, XMMWORD PTR Reverse_Endian_Mask
endm
LOAD_W macro k:req
movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
pshufb @CatStr(xmm, %(w_regs + k)), mask2
endm
; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1
pre1 equ 3
pre2 equ 2
RND4 macro k
movdqa msg, xmmword ptr [K_CONST + (k) * 16]
paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))
MY_sha256rnds2 state0_N, state1_N
pshufd msg, msg, 0eH
if (k GE (4 - pre1)) AND (k LT (16 - pre1))
; w4[0] = msg1(w4[-4], w4[-3])
MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
endif
MY_sha256rnds2 state1_N, state0_N
if (k GE (4 - pre2)) AND (k LT (16 - pre2))
movdqa tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4)))
palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4
paddd @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp
; w4[0] = msg2(w4[0], w4[-1])
MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4))
endif
endm
REVERSE_STATE macro
; state0 ; dcba
; state1 ; hgfe
pshufd tmp, state0, 01bH ; abcd
pshufd state0, state1, 01bH ; efgh
movdqa state1, state0 ; efgh
punpcklqdq state0, tmp ; cdgh
punpckhqdq state1, tmp ; abef
endm
MY_PROC Sha256_UpdateBlocks_HW, 3
MY_PROLOG
cmp rNum, 0
je end_c
movdqu state0, [rState] ; dcba
movdqu state1, [rState + 16] ; hgfe
REVERSE_STATE
ifdef x64
LOAD_MASK
endif
align 16
nextBlock:
movdqa state0_save, state0
movdqa state1_save, state1
ifndef x64
LOAD_MASK
endif
LOAD_W 0
LOAD_W 1
LOAD_W 2
LOAD_W 3
k = 0
rept 16
RND4 k
k = k + 1
endm
paddd state0, state0_save
paddd state1, state1_save
add rData, 64
sub rNum, 1
jnz nextBlock
REVERSE_STATE
movdqu [rState], state0
movdqu [rState + 16], state1
end_c:
MY_EPILOG
; _TEXT$SHA256OPT ENDS
end

View File

@@ -1,5 +1,5 @@
; XzCrc64Opt.asm -- CRC64 calculation : optimized version
; 2011-06-28 : Igor Pavlov : Public domain
; 2021-02-06 : Igor Pavlov : Public domain
include 7zAsm.asm
@@ -7,16 +7,15 @@ MY_ASM_START
ifdef x64
rD equ r9
rN equ r10
rD equ r9
rN equ r10
rT equ r5
num_VAR equ r8
num_VAR equ r8
table_VAR equ r9
SRCDAT equ rN + rD
SRCDAT4 equ dword ptr [rD + rN * 1]
CRC_XOR macro dest:req, src:req, t:req
xor dest, QWORD PTR [r5 + src * 8 + 0800h * t]
xor dest, QWORD PTR [rT + src * 8 + 0800h * t]
endm
CRC1b macro
@@ -30,12 +29,15 @@ CRC1b macro
endm
MY_PROLOG macro crc_end:req
ifdef ABI_LINUX
MY_PUSH_2_REGS
else
MY_PUSH_4_REGS
mov r0, r1
mov rN, num_VAR
mov r5, table_VAR
mov rD, r2
endif
mov r0, REG_ABI_PARAM_0
mov rN, REG_ABI_PARAM_2
mov rT, REG_ABI_PARAM_3
mov rD, REG_ABI_PARAM_1
test rN, rN
jz crc_end
@@:
@@ -51,14 +53,14 @@ MY_PROLOG macro crc_end:req
sub rN, 4
and rN, NOT 3
sub rD, rN
mov x1, [SRCDAT]
mov x1, SRCDAT4
xor r0, r1
add rN, 4
endm
MY_EPILOG macro crc_end:req
sub rN, 4
mov x1, [SRCDAT]
mov x1, SRCDAT4
xor r0, r1
mov rD, rN
mov rN, num_VAR
@@ -69,14 +71,18 @@ MY_EPILOG macro crc_end:req
CRC1b
jmp crc_end
@@:
ifdef ABI_LINUX
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
endm
MY_PROC XzCrc64UpdateT4, 4
MY_PROLOG crc_end_4
align 16
main_loop_4:
mov x1, [SRCDAT]
mov x1, SRCDAT4
movzx x2, x0_L
movzx x3, x0_H
shr r0, 16
@@ -96,21 +102,43 @@ MY_PROC XzCrc64UpdateT4, 4
MY_ENDP
else
; x86 (32-bit)
rD equ r1
rN equ r7
rD equ r1
rN equ r7
rT equ r5
crc_val equ (REG_SIZE * 5)
crc_table equ (8 + crc_val)
table_VAR equ [r4 + crc_table]
crc_OFFS equ (REG_SIZE * 5)
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
; cdecl or (GNU fastcall) stack:
; (UInt32 *) table
; size_t size
; void * data
; (UInt64) crc
; ret-ip <-(r4)
data_OFFS equ (8 + crc_OFFS)
size_OFFS equ (REG_SIZE + data_OFFS)
table_OFFS equ (REG_SIZE + size_OFFS)
num_VAR equ [r4 + size_OFFS]
table_VAR equ [r4 + table_OFFS]
else
; Windows fastcall:
; r1 = data, r2 = size
; stack:
; (UInt32 *) table
; (UInt64) crc
; ret-ip <-(r4)
table_OFFS equ (8 + crc_OFFS)
table_VAR equ [r4 + table_OFFS]
num_VAR equ table_VAR
endif
SRCDAT equ rN + rD
SRCDAT4 equ dword ptr [rD + rN * 1]
CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
op0 dest0, DWORD PTR [r5 + src * 8 + 0800h * t]
op1 dest1, DWORD PTR [r5 + src * 8 + 0800h * t + 4]
op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t]
op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]
endm
CRC_XOR macro dest0:req, dest1:req, src:req, t:req
@@ -132,11 +160,17 @@ endm
MY_PROLOG macro crc_end:req
MY_PUSH_4_REGS
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
proc_numParams = proc_numParams + 2 ; for ABI_LINUX
mov rN, [r4 + size_OFFS]
mov rD, [r4 + data_OFFS]
else
mov rN, r2
endif
mov x0, [r4 + crc_val]
mov x2, [r4 + crc_val + 4]
mov r5, table_VAR
mov x0, [r4 + crc_OFFS]
mov x2, [r4 + crc_OFFS + 4]
mov rT, table_VAR
test rN, rN
jz crc_end
@@:
@@ -154,13 +188,13 @@ MY_PROLOG macro crc_end:req
sub rN, 4
and rN, NOT 3
sub rD, rN
xor r0, [SRCDAT]
xor r0, SRCDAT4
add rN, 4
endm
MY_EPILOG macro crc_end:req
sub rN, 4
xor r0, [SRCDAT]
xor r0, SRCDAT4
mov rD, rN
mov rN, num_VAR
@@ -179,7 +213,7 @@ MY_PROC XzCrc64UpdateT4, 5
movzx x6, x0_L
align 16
main_loop_4:
mov r3, [SRCDAT]
mov r3, SRCDAT4
xor r3, r2
CRC xor, mov, r3, r2, r6, 3
@@ -200,6 +234,6 @@ MY_PROC XzCrc64UpdateT4, 5
MY_EPILOG crc_end_4
MY_ENDP
endif
endif ; ! x64
end

4
C/7z.h
View File

@@ -1,5 +1,5 @@
/* 7z.h -- 7z interface
2017-04-03 : Igor Pavlov : Public domain */
2018-07-02 : Igor Pavlov : Public domain */
#ifndef __7Z_H
#define __7Z_H
@@ -91,6 +91,8 @@ typedef struct
UInt64 *CoderUnpackSizes; // for all coders in all folders
Byte *CodersData;
UInt64 RangeLimit;
} CSzAr;
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);

View File

@@ -1,5 +1,5 @@
/* 7zArcIn.c -- 7z Input functions
2018-12-31 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -75,7 +75,7 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc)
return SZ_OK;
}
void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@@ -83,7 +83,7 @@ void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
#define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@@ -105,6 +105,8 @@ static void SzAr_Init(CSzAr *p)
p->CoderUnpackSizes = NULL;
p->CodersData = NULL;
p->RangeLimit = 0;
}
static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc)
@@ -502,7 +504,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
return SZ_ERROR_ARCHIVE;
if (propsSize >= 0x80)
return SZ_ERROR_UNSUPPORTED;
coder->PropsOffset = sd->Data - dataStart;
coder->PropsOffset = (size_t)(sd->Data - dataStart);
coder->PropsSize = (Byte)propsSize;
sd->Data += (size_t)propsSize;
sd->Size -= (size_t)propsSize;
@@ -677,7 +679,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
{
UInt32 numCoders, ci, numInStreams = 0;
p->FoCodersOffsets[fo] = sd.Data - startBufPtr;
p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr);
RINOK(SzReadNumber32(&sd, &numCoders));
if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX)
@@ -797,7 +799,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
{
size_t dataSize = sd.Data - startBufPtr;
const size_t dataSize = (size_t)(sd.Data - startBufPtr);
p->FoStartPackStreamIndex[fo] = packStreamIndex;
p->FoCodersOffsets[fo] = dataSize;
MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc);
@@ -885,7 +887,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i))
numSubDigests += numStreams;
}
ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data;
ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data);
continue;
}
if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd)
@@ -907,7 +909,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
{
ssi->sdSizes.Data = sd->Data;
RINOK(SkipNumbers(sd, numUnpackSizesInData));
ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data;
ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data);
RINOK(ReadID(sd, &type));
}
@@ -919,7 +921,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
{
ssi->sdCRCs.Data = sd->Data;
RINOK(SkipBitUi32s(sd, numSubDigests));
ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data;
ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data);
}
else
{
@@ -947,7 +949,11 @@ static SRes SzReadStreamsInfo(CSzAr *p,
if (type == k7zIdPackInfo)
{
RINOK(ReadNumber(sd, dataOffset));
if (*dataOffset > p->RangeLimit)
return SZ_ERROR_ARCHIVE;
RINOK(ReadPackInfo(p, sd, alloc));
if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset)
return SZ_ERROR_ARCHIVE;
RINOK(ReadID(sd, &type));
}
if (type == k7zIdUnpackInfo)
@@ -1028,12 +1034,12 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size
return SZ_ERROR_ARCHIVE;
for (p = data + pos;
#ifdef _WIN32
*(const UInt16 *)p != 0
*(const UInt16 *)(const void *)p != 0
#else
p[0] != 0 || p[1] != 0
#endif
; p += 2);
pos = p - data + 2;
pos = (size_t)(p - data) + 2;
*offsets++ = (pos >> 1);
}
while (--numFiles);
@@ -1133,6 +1139,8 @@ static SRes SzReadHeader2(
SRes res;
SzAr_Init(&tempAr);
tempAr.RangeLimit = p->db.RangeLimit;
res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX,
p->startPosAfterHeader, &tempAr, allocTemp);
*numTempBufs = tempAr.NumFolders;
@@ -1526,11 +1534,13 @@ static SRes SzArEx_Open2(
nextHeaderSize = GetUi64(header + 20);
nextHeaderCRC = GetUi32(header + 28);
p->startPosAfterHeader = startArcPos + k7zStartHeaderSize;
p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize;
if (CrcCalc(header + 12, 20) != GetUi32(header + 8))
return SZ_ERROR_CRC;
p->db.RangeLimit = nextHeaderOffset;
nextHeaderSizeT = (size_t)nextHeaderSize;
if (nextHeaderSizeT != nextHeaderSize)
return SZ_ERROR_MEM;
@@ -1543,13 +1553,13 @@ static SRes SzArEx_Open2(
{
Int64 pos = 0;
RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END));
if ((UInt64)pos < startArcPos + nextHeaderOffset ||
(UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
(UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset ||
(UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
(UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
return SZ_ERROR_INPUT_EOF;
}
RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset));
RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset));
if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp))
return SZ_ERROR_MEM;
@@ -1575,6 +1585,8 @@ static SRes SzArEx_Open2(
Buf_Init(&tempBuf);
SzAr_Init(&tempAr);
tempAr.RangeLimit = p->db.RangeLimit;
res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp);
SzAr_Free(&tempAr, allocTemp);

196
C/7zCrc.c
View File

@@ -1,5 +1,5 @@
/* 7zCrc.c -- CRC32 init
2017-06-06 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -26,8 +26,20 @@
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
extern
CRC_FUNC g_CrcUpdateT4;
CRC_FUNC g_CrcUpdateT4;
extern
CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8;
extern
CRC_FUNC g_CrcUpdateT0_32;
CRC_FUNC g_CrcUpdateT0_32;
extern
CRC_FUNC g_CrcUpdateT0_64;
CRC_FUNC g_CrcUpdateT0_64;
extern
CRC_FUNC g_CrcUpdate;
CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
@@ -44,6 +56,7 @@ UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -53,6 +66,166 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
return v;
}
/* ---------- hardware CRC ---------- */
#ifdef MY_CPU_LE
#if defined(MY_CPU_ARM_OR_ARM64)
// #pragma message("ARM*")
#if defined(_MSC_VER)
#if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910)
#define USE_ARM64_CRC
#endif
#endif
#elif (defined(__clang__) && (__clang_major__ >= 3)) \
|| (defined(__GNUC__) && (__GNUC__ > 4))
#if !defined(__ARM_FEATURE_CRC32)
#define __ARM_FEATURE_CRC32 1
#if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
#endif
#endif
#if defined(__ARM_FEATURE_CRC32)
#define USE_ARM64_CRC
#include <arm_acle.h>
#endif
#endif
#else
// no hardware CRC
// #define USE_CRC_EMU
#ifdef USE_CRC_EMU
#pragma message("ARM64 CRC emulation")
MY_FORCE_INLINE
UInt32 __crc32b(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data);
return v;
}
MY_FORCE_INLINE
UInt32 __crc32w(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
MY_FORCE_INLINE
UInt32 __crc32d(UInt32 v, UInt64 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
#endif // USE_CRC_EMU
#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#define T0_32_UNROLL_BYTES (4 * 4)
#define T0_64_UNROLL_BYTES (4 * 8)
#ifndef ATTRIB_CRC
#define ATTRIB_CRC
#endif
// #pragma message("USE ARM HW CRC")
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_32_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_32_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
}
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_64_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_64_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
}
#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#endif // MY_CPU_LE
void MY_FAST_CALL CrcGenerateTable()
{
UInt32 i;
@@ -123,6 +296,27 @@ void MY_FAST_CALL CrcGenerateTable()
}
}
#endif
#endif
#ifdef MY_CPU_LE
#ifdef USE_ARM64_CRC
if (CPU_IsSupported_CRC32())
{
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate =
#if defined(MY_CPU_ARM)
CrcUpdateT0_32;
#else
CrcUpdateT0_64;
#endif
}
#endif
#ifdef USE_CRC_EMU
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate = CrcUpdateT0_64;
#endif
#endif
}

View File

@@ -1,5 +1,5 @@
/* 7zCrcOpt.c -- CRC32 calculation
2017-04-03 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -9,6 +9,7 @@
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -16,7 +17,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
v ^= *(const UInt32 *)p;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x300)[((v ) & 0xFF)]
^ (table + 0x200)[((v >> 8) & 0xFF)]
@@ -28,6 +29,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
return v;
}
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@@ -36,13 +38,13 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)p;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x700)[((v ) & 0xFF)]
^ (table + 0x600)[((v >> 8) & 0xFF)]
^ (table + 0x500)[((v >> 16) & 0xFF)]
^ (table + 0x400)[((v >> 24))];
d = *((const UInt32 *)p + 1);
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
@@ -72,7 +74,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
v ^= *(const UInt32 *)p;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x000)[((v ) & 0xFF)]
^ (table + 0x100)[((v >> 8) & 0xFF)]
@@ -94,13 +96,13 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, co
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)p;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x400)[((v ) & 0xFF)]
^ (table + 0x500)[((v >> 8) & 0xFF)]
^ (table + 0x600)[((v >> 16) & 0xFF)]
^ (table + 0x700)[((v >> 24))];
d = *((const UInt32 *)p + 1);
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]

View File

@@ -1,5 +1,5 @@
/* 7zDec.c -- Decoding from 7z folder
2019-02-02 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -21,17 +21,20 @@
#endif
#define k_Copy 0
#define k_Delta 3
#ifndef _7Z_NO_METHOD_LZMA2
#define k_LZMA2 0x21
#endif
#define k_LZMA 0x30101
#define k_BCJ 0x3030103
#define k_BCJ2 0x303011B
#ifndef _7Z_NO_METHODS_FILTERS
#define k_Delta 3
#define k_BCJ 0x3030103
#define k_PPC 0x3030205
#define k_IA64 0x3030401
#define k_ARM 0x3030501
#define k_ARMT 0x3030701
#define k_SPARC 0x3030805
#endif
#ifdef _7ZIP_PPMD_SUPPPORT
@@ -56,7 +59,7 @@ static Byte ReadByte(const IByteIn *pp)
return *p->cur++;
if (p->res == SZ_OK)
{
size_t size = p->cur - p->begin;
size_t size = (size_t)(p->cur - p->begin);
p->processed += size;
p->res = ILookInStream_Skip(p->inStream, size);
size = (1 << 25);
@@ -101,29 +104,33 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c
Ppmd7_Init(&ppmd, order);
}
{
CPpmd7z_RangeDec rc;
Ppmd7z_RangeDec_CreateVTable(&rc);
rc.Stream = &s.vt;
if (!Ppmd7z_RangeDec_Init(&rc))
ppmd.rc.dec.Stream = &s.vt;
if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec))
res = SZ_ERROR_DATA;
else if (s.extra)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else
else if (!s.extra)
{
SizeT i;
for (i = 0; i < outSize; i++)
Byte *buf = outBuffer;
const Byte *lim = buf + outSize;
for (; buf != lim; buf++)
{
int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
int sym = Ppmd7z_DecodeSymbol(&ppmd);
if (s.extra || sym < 0)
break;
outBuffer[i] = (Byte)sym;
*buf = (Byte)sym;
}
if (i != outSize)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc))
if (buf != lim)
res = SZ_ERROR_DATA;
else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec))
{
/* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */
res = SZ_ERROR_DATA;
}
}
if (s.extra)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else if (s.processed + (size_t)(s.cur - s.begin) != inSize)
res = SZ_ERROR_DATA;
}
Ppmd7_Free(&ppmd, allocMain);
return res;
}
@@ -365,7 +372,9 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
return SZ_ERROR_UNSUPPORTED;
}
#ifndef _7Z_NO_METHODS_FILTERS
#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
#endif
static SRes SzFolder_Decode2(const CSzFolder *folder,
const Byte *propsData,

View File

@@ -1,5 +1,5 @@
/* 7zFile.c -- File IO
2017-04-03 : Igor Pavlov : Public domain */
2021-04-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -7,9 +7,19 @@
#ifndef USE_WINDOWS_FILE
#ifndef UNDER_CE
#include <errno.h>
#endif
#include <errno.h>
#ifndef USE_FOPEN
#include <stdio.h>
#include <fcntl.h>
#ifdef _WIN32
#include <io.h>
typedef int ssize_t;
typedef int off_t;
#else
#include <unistd.h>
#endif
#endif
#else
@@ -23,30 +33,36 @@
And message can be "Network connection was lost"
*/
#define kChunkSizeMax (1 << 22)
#endif
#define kChunkSizeMax (1 << 22)
void File_Construct(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
p->handle = INVALID_HANDLE_VALUE;
#else
#elif defined(USE_FOPEN)
p->file = NULL;
#else
p->fd = -1;
#endif
}
#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
static WRes File_Open(CSzFile *p, const char *name, int writeMode)
{
#ifdef USE_WINDOWS_FILE
p->handle = CreateFileA(name,
writeMode ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, NULL,
writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
#else
#elif defined(USE_FOPEN)
p->file = fopen(name, writeMode ? "wb+" : "rb");
return (p->file != 0) ? 0 :
#ifdef UNDER_CE
@@ -54,13 +70,34 @@ static WRes File_Open(CSzFile *p, const char *name, int writeMode)
#else
errno;
#endif
#else
int flags = (writeMode ? (O_CREAT | O_EXCL | O_WRONLY) : O_RDONLY);
#ifdef O_BINARY
flags |= O_BINARY;
#endif
p->fd = open(name, flags, 0666);
return (p->fd != -1) ? 0 : errno;
#endif
}
WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); }
WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); }
WRes OutFile_Open(CSzFile *p, const char *name)
{
#if defined(USE_WINDOWS_FILE) || defined(USE_FOPEN)
return File_Open(p, name, 1);
#else
p->fd = creat(name, 0666);
return (p->fd != -1) ? 0 : errno;
#endif
}
#endif
#ifdef USE_WINDOWS_FILE
static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode)
{
@@ -78,74 +115,124 @@ WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1
WRes File_Close(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
if (p->handle != INVALID_HANDLE_VALUE)
{
if (!CloseHandle(p->handle))
return GetLastError();
p->handle = INVALID_HANDLE_VALUE;
}
#else
#elif defined(USE_FOPEN)
if (p->file != NULL)
{
int res = fclose(p->file);
if (res != 0)
{
if (res == EOF)
return errno;
return res;
}
p->file = NULL;
}
#else
if (p->fd != -1)
{
if (close(p->fd) != 0)
return errno;
p->fd = -1;
}
#endif
return 0;
}
WRes File_Read(CSzFile *p, void *data, size_t *size)
{
size_t originalSize = *size;
*size = 0;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
*size = 0;
do
{
DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
const BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
data = (void *)((Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
return GetLastError();
// debug : we can break here for partial reading mode
if (processed == 0)
break;
}
while (originalSize > 0);
#elif defined(USE_FOPEN)
do
{
const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
const size_t processed = fread(data, 1, curSize, p->file);
data = (void *)((Byte *)data + (size_t)processed);
originalSize -= processed;
*size += processed;
if (processed != curSize)
return ferror(p->file);
// debug : we can break here for partial reading mode
if (processed == 0)
break;
}
while (originalSize > 0);
return 0;
#else
*size = fread(data, 1, originalSize, p->file);
if (*size == originalSize)
return 0;
return ferror(p->file);
do
{
const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
const ssize_t processed = read(p->fd, data, curSize);
if (processed == -1)
return errno;
if (processed == 0)
break;
data = (void *)((Byte *)data + (size_t)processed);
originalSize -= (size_t)processed;
*size += (size_t)processed;
// debug : we can break here for partial reading mode
// break;
}
while (originalSize > 0);
#endif
return 0;
}
WRes File_Write(CSzFile *p, const void *data, size_t *size)
{
size_t originalSize = *size;
*size = 0;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
*size = 0;
do
{
DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
data = (void *)((Byte *)data + processed);
const BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
data = (const void *)((const Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
@@ -154,26 +241,52 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size)
break;
}
while (originalSize > 0);
return 0;
#elif defined(USE_FOPEN)
do
{
const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
const size_t processed = fwrite(data, 1, curSize, p->file);
data = (void *)((Byte *)data + (size_t)processed);
originalSize -= processed;
*size += processed;
if (processed != curSize)
return ferror(p->file);
if (processed == 0)
break;
}
while (originalSize > 0);
#else
*size = fwrite(data, 1, originalSize, p->file);
if (*size == originalSize)
return 0;
return ferror(p->file);
do
{
const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
const ssize_t processed = write(p->fd, data, curSize);
if (processed == -1)
return errno;
if (processed == 0)
break;
data = (void *)((Byte *)data + (size_t)processed);
originalSize -= (size_t)processed;
*size += (size_t)processed;
}
while (originalSize > 0);
#endif
return 0;
}
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
{
#ifdef USE_WINDOWS_FILE
LARGE_INTEGER value;
DWORD moveMethod;
value.LowPart = (DWORD)*pos;
value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
UInt32 low = (UInt32)*pos;
LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
switch (origin)
{
case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
@@ -181,34 +294,52 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
case SZ_SEEK_END: moveMethod = FILE_END; break;
default: return ERROR_INVALID_PARAMETER;
}
value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod);
if (value.LowPart == 0xFFFFFFFF)
low = SetFilePointer(p->handle, (LONG)low, &high, moveMethod);
if (low == (UInt32)0xFFFFFFFF)
{
WRes res = GetLastError();
if (res != NO_ERROR)
return res;
}
*pos = ((Int64)value.HighPart << 32) | value.LowPart;
*pos = ((Int64)high << 32) | low;
return 0;
#else
int moveMethod;
int res;
int moveMethod; // = origin;
switch (origin)
{
case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
case SZ_SEEK_END: moveMethod = SEEK_END; break;
default: return 1;
default: return EINVAL;
}
res = fseek(p->file, (long)*pos, moveMethod);
*pos = ftell(p->file);
return res;
#endif
#if defined(USE_FOPEN)
{
int res = fseek(p->file, (long)*pos, moveMethod);
if (res == -1)
return errno;
*pos = ftell(p->file);
if (*pos == -1)
return errno;
return 0;
}
#else
{
off_t res = lseek(p->fd, (off_t)*pos, moveMethod);
if (res == -1)
return errno;
*pos = res;
return 0;
}
#endif // USE_FOPEN
#endif // USE_WINDOWS_FILE
}
WRes File_GetLength(CSzFile *p, UInt64 *length)
{
#ifdef USE_WINDOWS_FILE
@@ -224,7 +355,7 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
*length = (((UInt64)sizeHigh) << 32) + sizeLow;
return 0;
#else
#elif defined(USE_FOPEN)
long pos = ftell(p->file);
int res = fseek(p->file, 0, SEEK_END);
@@ -232,6 +363,24 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
fseek(p->file, pos, SEEK_SET);
return res;
#else
off_t pos;
*length = 0;
pos = lseek(p->fd, 0, SEEK_CUR);
if (pos != -1)
{
const off_t len2 = lseek(p->fd, 0, SEEK_END);
const off_t res2 = lseek(p->fd, pos, SEEK_SET);
if (len2 != -1)
{
*length = (UInt64)len2;
if (res2 != -1)
return 0;
}
}
return errno;
#endif
}
@@ -241,7 +390,9 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt);
return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ;
WRes wres = File_Read(&p->file, buf, size);
p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
@@ -255,13 +406,17 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ;
WRes wres = File_Read(&p->file, buf, size);
p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
return File_Seek(&p->file, pos, origin);
WRes wres = File_Seek(&p->file, pos, origin);
p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
void FileInStream_CreateVTable(CFileInStream *p)
@@ -276,7 +431,8 @@ void FileInStream_CreateVTable(CFileInStream *p)
static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size)
{
CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt);
File_Write(&p->file, data, &size);
WRes wres = File_Write(&p->file, data, &size);
p->wres = wres;
return size;
}

View File

@@ -1,17 +1,20 @@
/* 7zFile.h -- File IO
2017-04-03 : Igor Pavlov : Public domain */
2021-02-15 : Igor Pavlov : Public domain */
#ifndef __7Z_FILE_H
#define __7Z_FILE_H
#ifdef _WIN32
#define USE_WINDOWS_FILE
// #include <windows.h>
#endif
#ifdef USE_WINDOWS_FILE
#include <windows.h>
#else
#include <stdio.h>
// note: USE_FOPEN mode is limited to 32-bit file size
// #define USE_FOPEN
// #include <stdio.h>
#endif
#include "7zTypes.h"
@@ -24,8 +27,10 @@ typedef struct
{
#ifdef USE_WINDOWS_FILE
HANDLE handle;
#else
#elif defined(USE_FOPEN)
FILE *file;
#else
int fd;
#endif
} CSzFile;
@@ -56,6 +61,7 @@ typedef struct
{
ISeqInStream vt;
CSzFile file;
WRes wres;
} CFileSeqInStream;
void FileSeqInStream_CreateVTable(CFileSeqInStream *p);
@@ -65,6 +71,7 @@ typedef struct
{
ISeekInStream vt;
CSzFile file;
WRes wres;
} CFileInStream;
void FileInStream_CreateVTable(CFileInStream *p);
@@ -74,6 +81,7 @@ typedef struct
{
ISeqOutStream vt;
CSzFile file;
WRes wres;
} CFileOutStream;
void FileOutStream_CreateVTable(CFileOutStream *p);

View File

@@ -1,5 +1,5 @@
/* 7zStream.c -- 7z Stream functions
2017-04-03 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -37,7 +37,7 @@ SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
{
Int64 t = offset;
Int64 t = (Int64)offset;
return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
}

View File

@@ -1,11 +1,13 @@
/* 7zTypes.h -- Basic types
2018-08-04 : Igor Pavlov : Public domain */
2021-04-25 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
#else
#include <errno.h>
#endif
#include <stddef.h>
@@ -43,18 +45,112 @@ EXTERN_C_BEGIN
typedef int SRes;
#ifdef _MSC_VER
#if _MSC_VER > 1200
#define MY_ALIGN(n) __declspec(align(n))
#else
#define MY_ALIGN(n)
#endif
#else
#define MY_ALIGN(n) __attribute__ ((aligned(n)))
#endif
#ifdef _WIN32
/* typedef DWORD WRes; */
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
#else
#else // _WIN32
// #define ENV_HAVE_LSTAT
typedef int WRes;
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
#define MY__FACILITY_ERRNO 0x800
#define MY__FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_WIN32
#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
#define MY__FACILITY__WRes MY__FACILITY_ERRNO
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
( (HRESULT)(x) & 0x0000FFFF) \
| (MY__FACILITY__WRes << 16) \
| (HRESULT)0x80000000 ))
#define MY_SRes_HRESULT_FROM_WRes(x) \
((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
/*
#define ERROR_FILE_NOT_FOUND 2L
#define ERROR_ACCESS_DENIED 5L
#define ERROR_NO_MORE_FILES 18L
#define ERROR_LOCK_VIOLATION 33L
#define ERROR_FILE_EXISTS 80L
#define ERROR_DISK_FULL 112L
#define ERROR_NEGATIVE_SEEK 131L
#define ERROR_ALREADY_EXISTS 183L
#define ERROR_DIRECTORY 267L
#define ERROR_TOO_MANY_POSTS 298L
#define ERROR_INVALID_REPARSE_DATA 4392L
#define ERROR_REPARSE_TAG_INVALID 4393L
#define ERROR_REPARSE_TAG_MISMATCH 4394L
*/
// we use errno equivalents for some WIN32 errors:
#define ERROR_INVALID_FUNCTION EINVAL
#define ERROR_ALREADY_EXISTS EEXIST
#define ERROR_FILE_EXISTS EEXIST
#define ERROR_PATH_NOT_FOUND ENOENT
#define ERROR_FILE_NOT_FOUND ENOENT
#define ERROR_DISK_FULL ENOSPC
// #define ERROR_INVALID_HANDLE EBADF
// we use FACILITY_WIN32 for errors that has no errno equivalent
// Too many posts were made to a semaphore.
#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL)
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
// if (MY__FACILITY__WRes != FACILITY_WIN32),
// we use FACILITY_WIN32 for COM errors:
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L)
#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
/*
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
*/
// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits
typedef long INT_PTR;
typedef unsigned long UINT_PTR;
#define TEXT(quote) quote
#define FILE_ATTRIBUTE_READONLY 0x0001
#define FILE_ATTRIBUTE_HIDDEN 0x0002
#define FILE_ATTRIBUTE_SYSTEM 0x0004
#define FILE_ATTRIBUTE_DIRECTORY 0x0010
#define FILE_ATTRIBUTE_ARCHIVE 0x0020
#define FILE_ATTRIBUTE_DEVICE 0x0040
#define FILE_ATTRIBUTE_NORMAL 0x0080
#define FILE_ATTRIBUTE_TEMPORARY 0x0100
#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200
#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400
#define FILE_ATTRIBUTE_COMPRESSED 0x0800
#define FILE_ATTRIBUTE_OFFLINE 0x1000
#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000
#define FILE_ATTRIBUTE_ENCRYPTED 0x4000
#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */
#endif
@@ -63,6 +159,10 @@ typedef int WRes;
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#endif
#ifndef RINOK_WRes
#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
#endif
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
@@ -75,6 +175,38 @@ typedef int Int32;
typedef unsigned int UInt32;
#endif
#ifndef _WIN32
typedef int INT;
typedef Int32 INT32;
typedef unsigned int UINT;
typedef UInt32 UINT32;
typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility
typedef UINT32 ULONG;
#undef DWORD
typedef UINT32 DWORD;
#define VOID void
#define HRESULT LONG
typedef void *LPVOID;
// typedef void VOID;
// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits)
typedef long INT_PTR;
typedef unsigned long UINT_PTR;
typedef long LONG_PTR;
typedef unsigned long DWORD_PTR;
typedef size_t SIZE_T;
#endif // _WIN32
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
@@ -128,25 +260,37 @@ typedef int BoolInt;
#define MY_CDECL __cdecl
#define MY_FAST_CALL __fastcall
#else
#else // _MSC_VER
#define MY_NO_INLINE
#define MY_FORCE_INLINE
#define MY_CDECL
#define MY_FAST_CALL
/* inline keyword : for C++ / C99 */
/* GCC, clang: */
/*
#if defined (__GNUC__) && (__GNUC__ >= 4)
#define MY_FORCE_INLINE __attribute__((always_inline))
#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
|| (defined(__clang__) && (__clang_major__ >= 4)) \
|| defined(__INTEL_COMPILER) \
|| defined(__xlC__)
#define MY_NO_INLINE __attribute__((noinline))
// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
#else
#define MY_NO_INLINE
#endif
*/
#define MY_FORCE_INLINE
#define MY_CDECL
#if defined(_M_IX86) \
|| defined(__i386__)
// #define MY_FAST_CALL __attribute__((fastcall))
// #define MY_FAST_CALL __attribute__((cdecl))
#define MY_FAST_CALL
#elif defined(MY_CPU_AMD64)
// #define MY_FAST_CALL __attribute__((ms_abi))
#define MY_FAST_CALL
#else
#define MY_FAST_CALL
#endif
#endif // _MSC_VER
/* The following interfaces use first parameter as pointer to structure */
@@ -335,12 +479,11 @@ struct ISzAlloc
GCC 4.8.1 : classes with non-public variable members"
*/
#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#endif
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
/*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
@@ -353,6 +496,7 @@ struct ISzAlloc
*/
#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifdef _WIN32

View File

@@ -1,7 +1,7 @@
#define MY_VER_MAJOR 19
#define MY_VER_MINOR 00
#define MY_VER_MAJOR 21
#define MY_VER_MINOR 02
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "19.00"
#define MY_VERSION_NUMBERS "21.02 alpha"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2019-02-21"
#define MY_DATE "2021-05-06"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2021 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR

301
C/7zip_gcc_c.mak Normal file
View File

@@ -0,0 +1,301 @@
MY_ARCH_2 = $(MY_ARCH)
MY_ASM = jwasm
MY_ASM = asmc
PROGPATH = $(O)/$(PROG)
# for object file
CFLAGS_BASE_LIST = -c
# for ASM file
# CFLAGS_BASE_LIST = -S
CFLAGS_BASE = $(MY_ARCH_2) -O2 $(CFLAGS_BASE_LIST) -Wall -Werror -Wextra $(CFLAGS_WARN) \
-DNDEBUG -D_REENTRANT -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
LDFLAGS_STATIC = -DNDEBUG
# -static
ifdef SystemDrive
IS_MINGW = 1
endif
ifdef DEF_FILE
ifdef IS_MINGW
SHARED_EXT=.dll
LDFLAGS = -shared -DEF $(DEF_FILE) $(LDFLAGS_STATIC)
else
SHARED_EXT=.so
LDFLAGS = -shared -fPIC $(LDFLAGS_STATIC)
CC_SHARED=-fPIC
endif
else
LDFLAGS = $(LDFLAGS_STATIC)
# -s is not required for clang, do we need it for GGC ???
# -s
#-static -static-libgcc -static-libstdc++
ifdef IS_MINGW
SHARED_EXT=.exe
else
SHARED_EXT=
endif
endif
PROGPATH = $(O)/$(PROG)$(SHARED_EXT)
ifndef O
O=_o
endif
ifdef IS_MINGW
RM = del
MY_MKDIR=mkdir
LIB2 = -loleaut32 -luuid -ladvapi32 -lUser32
CXXFLAGS_EXTRA = -DUNICODE -D_UNICODE
# -Wno-delete-non-virtual-dtor
DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll
else
RM = rm -f
MY_MKDIR=mkdir -p
# CFLAGS_BASE := $(CFLAGS_BASE) -D_7ZIP_ST
# CXXFLAGS_EXTRA = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
# LOCAL_LIBS=-lpthread
# LOCAL_LIBS_DLL=$(LOCAL_LIBS) -ldl
LIB2 = -lpthread -ldl
DEL_OBJ_EXE = -$(RM) $(PROGPATH) $(OBJS)
endif
CFLAGS = $(LOCAL_FLAGS) $(CFLAGS_BASE2) $(CFLAGS_BASE) $(CC_SHARED) -o $@
ifdef IS_X64
AFLAGS_ABI = -elf64 -DABI_LINUX
else
AFLAGS_ABI = -elf -DABI_LINUX -DABI_CDECL
# -DABI_CDECL
# -DABI_LINUX
# -DABI_CDECL
endif
AFLAGS = $(AFLAGS_ABI) -Fo$(O)/
CXX_WARN_FLAGS =
#-Wno-invalid-offsetof
#-Wno-reorder
CXXFLAGS = $(LOCAL_FLAGS) $(CXXFLAGS_BASE2) $(CFLAGS_BASE) $(CXXFLAGS_EXTRA) $(CC_SHARED) -o $@ $(CXX_WARN_FLAGS)
all: $(O) $(PROGPATH)
$(O):
$(MY_MKDIR) $(O)
$(PROGPATH): $(OBJS)
$(CXX) -s -o $(PROGPATH) $(MY_ARCH_2) $(LDFLAGS) $(OBJS) $(MY_LIBS) $(LIB2)
ifndef NO_DEFAULT_RES
$O/resource.o: resource.rc
windres.exe $(RFLAGS) resource.rc $O/resource.o
endif
$O/7zAlloc.o: ../../../C/7zAlloc.c
$(CC) $(CFLAGS) $<
$O/7zArcIn.o: ../../../C/7zArcIn.c
$(CC) $(CFLAGS) $<
$O/7zBuf.o: ../../../C/7zBuf.c
$(CC) $(CFLAGS) $<
$O/7zBuf2.o: ../../../C/7zBuf2.c
$(CC) $(CFLAGS) $<
$O/7zCrc.o: ../../../C/7zCrc.c
$(CC) $(CFLAGS) $<
$O/7zDec.o: ../../../C/7zDec.c
$(CC) $(CFLAGS) $<
$O/7zFile.o: ../../../C/7zFile.c
$(CC) $(CFLAGS) $<
$O/7zStream.o: ../../../C/7zStream.c
$(CC) $(CFLAGS) $<
$O/Aes.o: ../../../C/Aes.c
$(CC) $(CFLAGS) $<
$O/Alloc.o: ../../../C/Alloc.c
$(CC) $(CFLAGS) $<
$O/Bcj2.o: ../../../C/Bcj2.c
$(CC) $(CFLAGS) $<
$O/Bcj2Enc.o: ../../../C/Bcj2Enc.c
$(CC) $(CFLAGS) $<
$O/Blake2s.o: ../../../C/Blake2s.c
$(CC) $(CFLAGS) $<
$O/Bra.o: ../../../C/Bra.c
$(CC) $(CFLAGS) $<
$O/Bra86.o: ../../../C/Bra86.c
$(CC) $(CFLAGS) $<
$O/BraIA64.o: ../../../C/BraIA64.c
$(CC) $(CFLAGS) $<
$O/BwtSort.o: ../../../C/BwtSort.c
$(CC) $(CFLAGS) $<
$O/CpuArch.o: ../../../C/CpuArch.c
$(CC) $(CFLAGS) $<
$O/Delta.o: ../../../C/Delta.c
$(CC) $(CFLAGS) $<
$O/DllSecur.o: ../../../C/DllSecur.c
$(CC) $(CFLAGS) $<
$O/HuffEnc.o: ../../../C/HuffEnc.c
$(CC) $(CFLAGS) $<
$O/LzFind.o: ../../../C/LzFind.c
$(CC) $(CFLAGS) $<
# ifdef MT_FILES
$O/LzFindMt.o: ../../../C/LzFindMt.c
$(CC) $(CFLAGS) $<
$O/Threads.o: ../../../C/Threads.c
$(CC) $(CFLAGS) $<
# endif
$O/LzmaEnc.o: ../../../C/LzmaEnc.c
$(CC) $(CFLAGS) $<
$O/Lzma86Dec.o: ../../../C/Lzma86Dec.c
$(CC) $(CFLAGS) $<
$O/Lzma86Enc.o: ../../../C/Lzma86Enc.c
$(CC) $(CFLAGS) $<
$O/Lzma2Dec.o: ../../../C/Lzma2Dec.c
$(CC) $(CFLAGS) $<
$O/Lzma2DecMt.o: ../../../C/Lzma2DecMt.c
$(CC) $(CFLAGS) $<
$O/Lzma2Enc.o: ../../../C/Lzma2Enc.c
$(CC) $(CFLAGS) $<
$O/LzmaLib.o: ../../../C/LzmaLib.c
$(CC) $(CFLAGS) $<
$O/MtCoder.o: ../../../C/MtCoder.c
$(CC) $(CFLAGS) $<
$O/MtDec.o: ../../../C/MtDec.c
$(CC) $(CFLAGS) $<
$O/Ppmd7.o: ../../../C/Ppmd7.c
$(CC) $(CFLAGS) $<
$O/Ppmd7aDec.o: ../../../C/Ppmd7aDec.c
$(CC) $(CFLAGS) $<
$O/Ppmd7Dec.o: ../../../C/Ppmd7Dec.c
$(CC) $(CFLAGS) $<
$O/Ppmd7Enc.o: ../../../C/Ppmd7Enc.c
$(CC) $(CFLAGS) $<
$O/Ppmd8.o: ../../../C/Ppmd8.c
$(CC) $(CFLAGS) $<
$O/Ppmd8Dec.o: ../../../C/Ppmd8Dec.c
$(CC) $(CFLAGS) $<
$O/Ppmd8Enc.o: ../../../C/Ppmd8Enc.c
$(CC) $(CFLAGS) $<
$O/Sha1.o: ../../../C/Sha1.c
$(CC) $(CFLAGS) $<
$O/Sha256.o: ../../../C/Sha256.c
$(CC) $(CFLAGS) $<
$O/Sort.o: ../../../C/Sort.c
$(CC) $(CFLAGS) $<
$O/Xz.o: ../../../C/Xz.c
$(CC) $(CFLAGS) $<
$O/XzCrc64.o: ../../../C/XzCrc64.c
$(CC) $(CFLAGS) $<
ifdef USE_ASM
ifdef IS_X64
USE_X86_ASM=1
else
ifdef IS_X86
USE_X86_ASM=1
endif
endif
endif
ifdef USE_X86_ASM
$O/7zCrcOpt.o: ../../../Asm/x86/7zCrcOpt.asm
$(MY_ASM) $(AFLAGS) $<
$O/XzCrc64Opt.o: ../../../Asm/x86/XzCrc64Opt.asm
$(MY_ASM) $(AFLAGS) $<
$O/AesOpt.o: ../../../Asm/x86/AesOpt.asm
$(MY_ASM) $(AFLAGS) $<
$O/Sha1Opt.o: ../../../Asm/x86/Sha1Opt.asm
$(MY_ASM) $(AFLAGS) $<
$O/Sha256Opt.o: ../../../Asm/x86/Sha256Opt.asm
$(MY_ASM) $(AFLAGS) $<
else
$O/7zCrcOpt.o: ../../7zCrcOpt.c
$(CC) $(CFLAGS) $<
$O/XzCrc64Opt.o: ../../XzCrc64Opt.c
$(CC) $(CFLAGS) $<
$O/Sha1Opt.o: ../../Sha1Opt.c
$(CC) $(CFLAGS) $<
$O/Sha256Opt.o: ../../Sha256Opt.c
$(CC) $(CFLAGS) $<
$O/AesOpt.o: ../../AesOpt.c
$(CC) $(CFLAGS) $<
endif
ifdef USE_LZMA_DEC_ASM
ifdef IS_X64
$O/LzmaDecOpt.o: ../../../Asm/x86/LzmaDecOpt.asm
$(MY_ASM) $(AFLAGS) $<
endif
ifdef IS_ARM64
$O/LzmaDecOpt.o: ../../../Asm/arm64/LzmaDecOpt.S ../../../Asm/arm64/7zAsm.S
$(CC) $(CFLAGS) $<
endif
$O/LzmaDec.o: ../../LzmaDec.c
$(CC) $(CFLAGS) -D_LZMA_DEC_OPT $<
else
$O/LzmaDec.o: ../../LzmaDec.c
$(CC) $(CFLAGS) $<
endif
$O/XzDec.o: ../../../C/XzDec.c
$(CC) $(CFLAGS) $<
$O/XzEnc.o: ../../../C/XzEnc.c
$(CC) $(CFLAGS) $<
$O/XzIn.o: ../../../C/XzIn.c
$(CC) $(CFLAGS) $<
$O/7zMain.o: ../../../C/Util/7z/7zMain.c
$(CC) $(CFLAGS) $<
$O/LzmaUtil.o: ../../../C/Util/Lzma/LzmaUtil.c
$(CC) $(CFLAGS) $<
clean:
-$(DEL_OBJ_EXE)

135
C/Aes.c
View File

@@ -1,10 +1,17 @@
/* Aes.c -- AES encryption / decryption
2017-01-24 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Aes.h"
#include "CpuArch.h"
#include "Aes.h"
AES_CODE_FUNC g_AesCbc_Decode;
#ifndef _SFX
AES_CODE_FUNC g_AesCbc_Encode;
AES_CODE_FUNC g_AesCtr_Code;
UInt32 g_Aes_SupportedFunctions_Flags;
#endif
static UInt32 T[256 * 4];
static const Byte Sbox[256] = {
@@ -25,23 +32,10 @@ static const Byte Sbox[256] = {
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *ivAes, Byte *data, size_t numBlocks);
AES_CODE_FUNC g_AesCbc_Encode;
AES_CODE_FUNC g_AesCbc_Decode;
AES_CODE_FUNC g_AesCtr_Code;
static UInt32 D[256 * 4];
static Byte InvS[256];
static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
#define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24))
@@ -57,6 +51,36 @@ static const Byte Rcon[11] = { 0x00, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0
#define DD(x) (D + (x << 8))
// #define _SHOW_AES_STATUS
#ifdef MY_CPU_X86_OR_AMD64
#define USE_HW_AES
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_AES
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_AES
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_AES
#endif
#endif
#endif
#ifdef USE_HW_AES
#ifdef _SHOW_AES_STATUS
#include <stdio.h>
#define _PRF(x) x
#else
#define _PRF(x)
#endif
#endif
void AesGenTables(void)
{
unsigned i;
@@ -90,18 +114,48 @@ void AesGenTables(void)
}
}
g_AesCbc_Encode = AesCbc_Encode;
g_AesCbc_Decode = AesCbc_Decode;
g_AesCtr_Code = AesCtr_Code;
{
AES_CODE_FUNC d = AesCbc_Decode;
#ifndef _SFX
AES_CODE_FUNC e = AesCbc_Encode;
AES_CODE_FUNC c = AesCtr_Code;
UInt32 flags = 0;
#endif
#ifdef USE_HW_AES
if (CPU_IsSupported_AES())
{
// #pragma message ("AES HW")
_PRF(printf("\n===AES HW\n"));
d = AesCbc_Decode_HW;
#ifndef _SFX
e = AesCbc_Encode_HW;
c = AesCtr_Code_HW;
flags = k_Aes_SupportedFunctions_HW;
#endif
#ifdef MY_CPU_X86_OR_AMD64
if (CPU_Is_Aes_Supported())
if (CPU_IsSupported_VAES_AVX2())
{
g_AesCbc_Encode = AesCbc_Encode_Intel;
g_AesCbc_Decode = AesCbc_Decode_Intel;
g_AesCtr_Code = AesCtr_Code_Intel;
_PRF(printf("\n===vaes avx2\n"));
d = AesCbc_Decode_HW_256;
#ifndef _SFX
c = AesCtr_Code_HW_256;
flags |= k_Aes_SupportedFunctions_HW_256;
#endif
}
#endif
}
#endif
g_AesCbc_Decode = d;
#ifndef _SFX
g_AesCbc_Encode = e;
g_AesCtr_Code = c;
g_Aes_SupportedFunctions_Flags = flags;
#endif
}
}
@@ -142,8 +196,11 @@ void AesGenTables(void)
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
{
unsigned i, wSize;
wSize = keySize + 28;
unsigned i, m;
const UInt32 *wLim;
UInt32 t;
UInt32 rcon = 1;
keySize /= 4;
w[0] = ((UInt32)keySize / 2) + 3;
w += 4;
@@ -151,16 +208,26 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
for (i = 0; i < keySize; i++, key += 4)
w[i] = GetUi32(key);
for (; i < wSize; i++)
t = w[(size_t)keySize - 1];
wLim = w + (size_t)keySize * 3 + 28;
m = 0;
do
{
UInt32 t = w[(size_t)i - 1];
unsigned rem = i % keySize;
if (rem == 0)
t = Ui32(Sbox[gb1(t)] ^ Rcon[i / keySize], Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
else if (keySize > 6 && rem == 4)
t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]);
w[i] = w[i - keySize] ^ t;
if (m == 0)
{
t = Ui32(Sbox[gb1(t)] ^ rcon, Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
rcon <<= 1;
if (rcon & 0x100)
rcon = 0x1b;
m = keySize;
}
else if (m == 4 && keySize > 6)
t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]);
m--;
t ^= w[0];
w[keySize] = t;
}
while (++w != wLim);
}
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
@@ -184,6 +251,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
src and dest are pointers to 4 UInt32 words.
src and dest can point to same block */
// MY_FORCE_INLINE
static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
@@ -207,6 +275,7 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
FT4(0); FT4(1); FT4(2); FT4(3);
}
MY_FORCE_INLINE
static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
@@ -294,7 +363,7 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
UInt32 t = temp[i];
#ifdef MY_CPU_LE_UNALIGN
*((UInt32 *)data) ^= t;
*((UInt32 *)(void *)data) ^= t;
#else
data[0] ^= (t & 0xFF);
data[1] ^= ((t >> 8) & 0xFF);

26
C/Aes.h
View File

@@ -1,5 +1,5 @@
/* Aes.h -- AES encryption / decryption
2013-01-18 : Igor Pavlov : Public domain */
2018-04-28 : Igor Pavlov : Public domain */
#ifndef __AES_H
#define __AES_H
@@ -26,12 +26,34 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize)
/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */
void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */
/* data - 16-byte aligned pointer to data */
/* numBlocks - the number of 16-byte blocks in data array */
typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
extern AES_CODE_FUNC g_AesCbc_Encode;
extern AES_CODE_FUNC g_AesCbc_Decode;
#ifndef _SFX
extern AES_CODE_FUNC g_AesCbc_Encode;
extern AES_CODE_FUNC g_AesCtr_Code;
#define k_Aes_SupportedFunctions_HW (1 << 2)
#define k_Aes_SupportedFunctions_HW_256 (1 << 3)
extern UInt32 g_Aes_SupportedFunctions_Flags;
#endif
#define DECLARE__AES_CODE_FUNC(funcName) \
void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);
DECLARE__AES_CODE_FUNC (AesCbc_Encode)
DECLARE__AES_CODE_FUNC (AesCbc_Decode)
DECLARE__AES_CODE_FUNC (AesCtr_Code)
DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW)
DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW)
DECLARE__AES_CODE_FUNC (AesCtr_Code_HW)
DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256)
DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256)
EXTERN_C_END

View File

@@ -1,184 +1,776 @@
/* AesOpt.c -- Intel's AES
2017-06-08 : Igor Pavlov : Public domain */
/* AesOpt.c -- AES optimized code for x86 AES hardware instructions
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
#define USE_INTEL_AES
#if defined(__clang__)
#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
#define USE_INTEL_AES
#define ATTRIB_AES __attribute__((__target__("aes")))
#if (__clang_major__ >= 8)
#define USE_INTEL_VAES
#define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
#endif
#endif
#elif defined(__GNUC__)
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
#define USE_INTEL_AES
#ifndef __AES__
#define ATTRIB_AES __attribute__((__target__("aes")))
#endif
#if (__GNUC__ >= 8)
#define USE_INTEL_VAES
#define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1110)
#define USE_INTEL_AES
#if (__INTEL_COMPILER >= 1900)
#define USE_INTEL_VAES
#endif
#endif
#elif defined(_MSC_VER)
#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
#define USE_INTEL_AES
#if (_MSC_VER >= 1910)
#define USE_INTEL_VAES
#endif
#endif
#endif
#ifndef ATTRIB_AES
#define ATTRIB_AES
#endif
#ifndef ATTRIB_VAES
#define ATTRIB_VAES
#endif
#ifdef USE_INTEL_AES
#include <wmmintrin.h>
void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
#ifndef USE_INTEL_VAES
#define AES_TYPE_keys __m128i
#define AES_TYPE_data __m128i
#endif
#define AES_FUNC_START(name) \
void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks)
#define AES_FUNC_START2(name) \
AES_FUNC_START (name); \
ATTRIB_AES \
AES_FUNC_START (name)
#define MM_OP(op, dest, src) dest = op(dest, src);
#define MM_OP_m(op, src) MM_OP(op, m, src);
#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src);
#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src);
AES_FUNC_START2 (AesCbc_Encode_HW)
{
__m128i m = *p;
const __m128i k0 = p[2];
const __m128i k1 = p[3];
const UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
for (; numBlocks != 0; numBlocks--, data++)
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
const __m128i *w = p + 3;
m = _mm_xor_si128(m, *data);
m = _mm_xor_si128(m, p[2]);
UInt32 r = numRounds2;
const __m128i *w = p + 4;
__m128i temp = *data;
MM_XOR (temp, k0);
MM_XOR (m, temp);
MM_OP_m (_mm_aesenc_si128, k1);
do
{
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenc_si128(m, w[1]);
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
while (--numRounds2 != 0);
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenclast_si128(m, w[1]);
while (--r);
MM_OP_m (_mm_aesenclast_si128, w[0]);
*data = m;
}
*p = m;
}
#define NUM_WAYS 3
#define AES_OP_W(op, n) { \
const __m128i t = w[n]; \
m0 = op(m0, t); \
m1 = op(m1, t); \
m2 = op(m2, t); \
}
#define WOP_1(op)
#define WOP_2(op) WOP_1 (op) op (m1, 1);
#define WOP_3(op) WOP_2 (op) op (m2, 2);
#define WOP_4(op) WOP_3 (op) op (m3, 3);
#ifdef MY_CPU_AMD64
#define WOP_5(op) WOP_4 (op) op (m4, 4);
#define WOP_6(op) WOP_5 (op) op (m5, 5);
#define WOP_7(op) WOP_6 (op) op (m6, 6);
#define WOP_8(op) WOP_7 (op) op (m7, 7);
#endif
/*
#define WOP_9(op) WOP_8 (op) op (m8, 8);
#define WOP_10(op) WOP_9 (op) op (m9, 9);
#define WOP_11(op) WOP_10(op) op (m10, 10);
#define WOP_12(op) WOP_11(op) op (m11, 11);
#define WOP_13(op) WOP_12(op) op (m12, 12);
#define WOP_14(op) WOP_13(op) op (m13, 13);
*/
#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
#ifdef MY_CPU_AMD64
#define NUM_WAYS 8
#define WOP_M1 WOP_8
#else
#define NUM_WAYS 4
#define WOP_M1 WOP_4
#endif
void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
#define WOP(op) op (m0, 0); WOP_M1(op)
#define DECLARE_VAR(reg, ii) __m128i reg
#define LOAD_data( reg, ii) reg = data[ii];
#define STORE_data( reg, ii) data[ii] = reg;
#if (NUM_WAYS > 1)
#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]);
#endif
#define AVX__DECLARE_VAR(reg, ii) __m256i reg
#define AVX__LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
#define AVX__STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
#define AVX__XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]));
#define MM_OP_key(op, reg) MM_OP(op, reg, key);
#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg)
#define AES_DEC_LAST( reg, ii) MM_OP_key (_mm_aesdeclast_si128, reg)
#define AES_ENC( reg, ii) MM_OP_key (_mm_aesenc_si128, reg)
#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg)
#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg)
#define AVX__AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
#define AVX__AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
#define AVX__AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
#define AVX__AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
#define AVX__AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one); reg = ctr;
#define CTR_END( reg, ii) MM_XOR (data[ii], reg);
#define AVX__CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key);
#define AVX__CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg);
#define WOP_KEY(op, n) { \
const __m128i key = w[n]; \
WOP(op); }
#define AVX__WOP_KEY(op, n) { \
const __m256i key = w[n]; \
WOP(op); }
#define WIDE_LOOP_START \
dataEnd = data + numBlocks; \
if (numBlocks >= NUM_WAYS) \
{ dataEnd -= NUM_WAYS; do { \
#define WIDE_LOOP_END \
data += NUM_WAYS; \
} while (data <= dataEnd); \
dataEnd += NUM_WAYS; } \
#define SINGLE_LOOP \
for (; data < dataEnd; data++)
#define NUM_AES_KEYS_MAX 15
#define WIDE_LOOP_START_AVX(OP) \
dataEnd = data + numBlocks; \
if (numBlocks >= NUM_WAYS * 2) \
{ __m256i keys[NUM_AES_KEYS_MAX]; \
UInt32 ii; \
OP \
for (ii = 0; ii < numRounds; ii++) \
keys[ii] = _mm256_broadcastsi128_si256(p[ii]); \
dataEnd -= NUM_WAYS * 2; do { \
#define WIDE_LOOP_END_AVX(OP) \
data += NUM_WAYS * 2; \
} while (data <= dataEnd); \
dataEnd += NUM_WAYS * 2; \
OP \
_mm256_zeroupper(); \
} \
/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified,
MSVC still can insert vzeroupper instruction. */
AES_FUNC_START2 (AesCbc_Decode_HW)
{
__m128i iv = *p;
for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1;
const __m128i *dataEnd;
p += 2;
WIDE_LOOP_START
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1);
const __m128i *w = p + numRounds2 * 2;
__m128i m0, m1, m2;
{
const __m128i t = w[2];
m0 = _mm_xor_si128(t, data[0]);
m1 = _mm_xor_si128(t, data[1]);
m2 = _mm_xor_si128(t, data[2]);
}
numRounds2--;
const __m128i *w = wStart;
WOP (DECLARE_VAR)
WOP (LOAD_data);
WOP_KEY (AES_XOR, 1)
do
{
AES_DEC(1)
AES_DEC(0)
w -= 2;
WOP_KEY (AES_DEC, 0)
w--;
}
while (--numRounds2 != 0);
AES_DEC(1)
AES_DEC_LAST(0)
while (w != p);
WOP_KEY (AES_DEC_LAST, 0)
{
__m128i t;
t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
MM_XOR (m0, iv);
WOP_M1 (XOR_data_M1)
iv = data[NUM_WAYS - 1];
WOP (STORE_data);
}
}
for (; numBlocks != 0; numBlocks--, data++)
WIDE_LOOP_END
SINGLE_LOOP
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1);
const __m128i *w = p + numRounds2 * 2;
__m128i m = _mm_xor_si128(w[2], *data);
numRounds2--;
const __m128i *w = wStart - 1;
__m128i m = _mm_xor_si128 (w[2], *data);
do
{
m = _mm_aesdec_si128(m, w[1]);
m = _mm_aesdec_si128(m, w[0]);
MM_OP_m (_mm_aesdec_si128, w[1]);
MM_OP_m (_mm_aesdec_si128, w[0]);
w -= 2;
}
while (--numRounds2 != 0);
m = _mm_aesdec_si128(m, w[1]);
m = _mm_aesdeclast_si128(m, w[0]);
while (w != p);
MM_OP_m (_mm_aesdec_si128, w[1]);
MM_OP_m (_mm_aesdeclast_si128, w[0]);
m = _mm_xor_si128(m, iv);
MM_XOR (m, iv);
iv = *data;
*data = m;
}
*p = iv;
p[-2] = iv;
}
void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
AES_FUNC_START2 (AesCtr_Code_HW)
{
__m128i ctr = *p;
__m128i one;
one.m128i_u64[0] = 1;
one.m128i_u64[1] = 0;
for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1;
const __m128i *dataEnd;
__m128i one = _mm_cvtsi32_si128(1);
p += 2;
WIDE_LOOP_START
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
const __m128i *w = p;
__m128i m0, m1, m2;
{
const __m128i t = w[2];
ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
}
w += 3;
UInt32 r = numRoundsMinus2;
WOP (DECLARE_VAR)
WOP (CTR_START);
WOP_KEY (AES_XOR, 0)
w += 1;
do
{
AES_ENC(0)
AES_ENC(1)
w += 2;
WOP_KEY (AES_ENC, 0)
w += 1;
}
while (--numRounds2 != 0);
AES_ENC(0)
AES_ENC_LAST(1)
data[0] = _mm_xor_si128(data[0], m0);
data[1] = _mm_xor_si128(data[1], m1);
data[2] = _mm_xor_si128(data[2], m2);
while (--r);
WOP_KEY (AES_ENC_LAST, 0)
WOP (CTR_END);
}
for (; numBlocks != 0; numBlocks--, data++)
WIDE_LOOP_END
SINGLE_LOOP
{
UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
const __m128i *w = p;
__m128i m;
ctr = _mm_add_epi64(ctr, one);
m = _mm_xor_si128(ctr, p[2]);
w += 3;
MM_OP (_mm_add_epi64, ctr, one);
m = _mm_xor_si128 (ctr, p[0]);
w += 1;
do
{
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenc_si128(m, w[1]);
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
while (--numRounds2 != 0);
m = _mm_aesenc_si128(m, w[0]);
m = _mm_aesenclast_si128(m, w[1]);
*data = _mm_xor_si128(*data, m);
while (--numRounds2);
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenclast_si128, w[1]);
MM_XOR (*data, m);
}
*p = ctr;
p[-2] = ctr;
}
#else
void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCbc_Encode(p, data, numBlocks);
}
void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCbc_Decode(p, data, numBlocks);
}
void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
{
AesCtr_Code(p, data, numBlocks);
}
#ifdef USE_INTEL_VAES
#if defined(__clang__) && defined(_MSC_VER)
#define __SSE4_2__
#define __AES__
#define __AVX__
#define __AVX2__
#define __VAES__
#define __AVX512F__
#define __AVX512VL__
#endif
#include <immintrin.h>
#define VAES_FUNC_START2(name) \
AES_FUNC_START (name); \
ATTRIB_VAES \
AES_FUNC_START (name)
VAES_FUNC_START2 (AesCbc_Decode_HW_256)
{
__m128i iv = *p;
const __m128i *dataEnd;
UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
p += 2;
WIDE_LOOP_START_AVX(;)
{
const __m256i *w = keys + numRounds - 2;
WOP (AVX__DECLARE_VAR)
WOP (AVX__LOAD_data);
AVX__WOP_KEY (AVX__AES_XOR, 1)
do
{
AVX__WOP_KEY (AVX__AES_DEC, 0)
w--;
}
while (w != keys);
AVX__WOP_KEY (AVX__AES_DEC_LAST, 0)
AVX_XOR (m0, _mm256_setr_m128i(iv, data[0]));
WOP_M1 (AVX__XOR_data_M1)
iv = data[NUM_WAYS * 2 - 1];
WOP (AVX__STORE_data);
}
WIDE_LOOP_END_AVX(;)
SINGLE_LOOP
{
const __m128i *w = p + *(const UInt32 *)(p + 1 - 2) * 2 + 1 - 3;
__m128i m = _mm_xor_si128 (w[2], *data);
do
{
MM_OP_m (_mm_aesdec_si128, w[1]);
MM_OP_m (_mm_aesdec_si128, w[0]);
w -= 2;
}
while (w != p);
MM_OP_m (_mm_aesdec_si128, w[1]);
MM_OP_m (_mm_aesdeclast_si128, w[0]);
MM_XOR (m, iv);
iv = *data;
*data = m;
}
p[-2] = iv;
}
/*
SSE2: _mm_cvtsi32_si128 : movd
AVX: _mm256_setr_m128i : vinsertf128
AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm
_mm256_extracti128_si256 : vextracti128
_mm256_broadcastsi128_si256 : vbroadcasti128
*/
#define AVX__CTR_LOOP_START \
ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \
two = _mm256_setr_m128i(one, one); \
two = _mm256_add_epi64(two, two); \
// two = _mm256_setr_epi64x(2, 0, 2, 0);
#define AVX__CTR_LOOP_ENC \
ctr = _mm256_extracti128_si256 (ctr2, 1); \
VAES_FUNC_START2 (AesCtr_Code_HW_256)
{
__m128i ctr = *p;
UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
const __m128i *dataEnd;
__m128i one = _mm_cvtsi32_si128(1);
__m256i ctr2, two;
p += 2;
WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START)
{
const __m256i *w = keys;
UInt32 r = numRounds - 2;
WOP (AVX__DECLARE_VAR)
AVX__WOP_KEY (AVX__CTR_START, 0);
w += 1;
do
{
AVX__WOP_KEY (AVX__AES_ENC, 0)
w += 1;
}
while (--r);
AVX__WOP_KEY (AVX__AES_ENC_LAST, 0)
WOP (AVX__CTR_END);
}
WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC)
SINGLE_LOOP
{
UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
const __m128i *w = p;
__m128i m;
MM_OP (_mm_add_epi64, ctr, one);
m = _mm_xor_si128 (ctr, p[0]);
w += 1;
do
{
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
while (--numRounds2);
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenclast_si128, w[1]);
MM_XOR (*data, m);
}
p[-2] = ctr;
}
#endif // USE_INTEL_VAES
#else // USE_INTEL_AES
/* no USE_INTEL_AES */
#pragma message("AES HW_SW stub was used")
#define AES_TYPE_keys UInt32
#define AES_TYPE_data Byte
#define AES_FUNC_START(name) \
void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \
#define AES_COMPAT_STUB(name) \
AES_FUNC_START(name); \
AES_FUNC_START(name ## _HW) \
{ name(p, data, numBlocks); }
AES_COMPAT_STUB (AesCbc_Encode)
AES_COMPAT_STUB (AesCbc_Decode)
AES_COMPAT_STUB (AesCtr_Code)
#endif // USE_INTEL_AES
#ifndef USE_INTEL_VAES
#pragma message("VAES HW_SW stub was used")
#define VAES_COMPAT_STUB(name) \
void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \
void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \
{ name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); }
VAES_COMPAT_STUB (AesCbc_Decode_HW)
VAES_COMPAT_STUB (AesCtr_Code_HW)
#endif // ! USE_INTEL_VAES
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_AES
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_AES
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_AES
#endif
#endif
#ifdef USE_HW_AES
// #pragma message("=== AES HW === ")
#if defined(__clang__) || defined(__GNUC__)
#ifdef MY_CPU_ARM64
#define ATTRIB_AES __attribute__((__target__("+crypto")))
#else
#define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#else
// _MSC_VER
// for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#ifndef ATTRIB_AES
#define ATTRIB_AES
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
typedef uint8x16_t v128;
#define AES_FUNC_START(name) \
void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks)
#define AES_FUNC_START2(name) \
AES_FUNC_START (name); \
ATTRIB_AES \
AES_FUNC_START (name)
#define MM_OP(op, dest, src) dest = op(dest, src);
#define MM_OP_m(op, src) MM_OP(op, m, src);
#define MM_OP1_m(op) m = op(m);
#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src);
#define MM_XOR_m( src) MM_XOR(m, src);
#define AES_E_m(k) MM_OP_m (vaeseq_u8, k);
#define AES_E_MC_m(k) AES_E_m (k); MM_OP1_m(vaesmcq_u8);
AES_FUNC_START2 (AesCbc_Encode_HW)
{
v128 m = *p;
const v128 k0 = p[2];
const v128 k1 = p[3];
const v128 k2 = p[4];
const v128 k3 = p[5];
const v128 k4 = p[6];
const v128 k5 = p[7];
const v128 k6 = p[8];
const v128 k7 = p[9];
const v128 k8 = p[10];
const v128 k9 = p[11];
const UInt32 numRounds2 = *(const UInt32 *)(p + 1);
const v128 *w = p + ((size_t)numRounds2 * 2);
const v128 k_z1 = w[1];
const v128 k_z0 = w[2];
for (; numBlocks != 0; numBlocks--, data++)
{
MM_XOR_m (*data);
AES_E_MC_m (k0)
AES_E_MC_m (k1)
AES_E_MC_m (k2)
AES_E_MC_m (k3)
AES_E_MC_m (k4)
AES_E_MC_m (k5)
AES_E_MC_m (k6)
AES_E_MC_m (k7)
AES_E_MC_m (k8)
if (numRounds2 >= 6)
{
AES_E_MC_m (k9)
AES_E_MC_m (p[12])
if (numRounds2 != 6)
{
AES_E_MC_m (p[13])
AES_E_MC_m (p[14])
}
}
AES_E_m (k_z1);
MM_XOR_m (k_z0);
*data = m;
}
*p = m;
}
#define WOP_1(op)
#define WOP_2(op) WOP_1 (op) op (m1, 1);
#define WOP_3(op) WOP_2 (op) op (m2, 2);
#define WOP_4(op) WOP_3 (op) op (m3, 3);
#define WOP_5(op) WOP_4 (op) op (m4, 4);
#define WOP_6(op) WOP_5 (op) op (m5, 5);
#define WOP_7(op) WOP_6 (op) op (m6, 6);
#define WOP_8(op) WOP_7 (op) op (m7, 7);
#define NUM_WAYS 8
#define WOP_M1 WOP_8
#define WOP(op) op (m0, 0); WOP_M1(op)
#define DECLARE_VAR(reg, ii) v128 reg
#define LOAD_data( reg, ii) reg = data[ii];
#define STORE_data( reg, ii) data[ii] = reg;
#if (NUM_WAYS > 1)
#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]);
#endif
#define MM_OP_key(op, reg) MM_OP (op, reg, key);
#define AES_D_m(k) MM_OP_m (vaesdq_u8, k);
#define AES_D_IMC_m(k) AES_D_m (k); MM_OP1_m (vaesimcq_u8);
#define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg)
#define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg)
#define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg)
#define AES_D_IMC( reg, ii) AES_D (reg, ii); reg = vaesimcq_u8(reg)
#define AES_E_MC( reg, ii) AES_E (reg, ii); reg = vaesmcq_u8(reg)
#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one); reg = vreinterpretq_u8_u64(ctr);
#define CTR_END( reg, ii) MM_XOR (data[ii], reg);
#define WOP_KEY(op, n) { \
const v128 key = w[n]; \
WOP(op); }
#define WIDE_LOOP_START \
dataEnd = data + numBlocks; \
if (numBlocks >= NUM_WAYS) \
{ dataEnd -= NUM_WAYS; do { \
#define WIDE_LOOP_END \
data += NUM_WAYS; \
} while (data <= dataEnd); \
dataEnd += NUM_WAYS; } \
#define SINGLE_LOOP \
for (; data < dataEnd; data++)
AES_FUNC_START2 (AesCbc_Decode_HW)
{
v128 iv = *p;
const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
const v128 *dataEnd;
p += 2;
WIDE_LOOP_START
{
const v128 *w = wStart;
WOP (DECLARE_VAR)
WOP (LOAD_data);
WOP_KEY (AES_D_IMC, 2)
do
{
WOP_KEY (AES_D_IMC, 1)
WOP_KEY (AES_D_IMC, 0)
w -= 2;
}
while (w != p);
WOP_KEY (AES_D, 1)
WOP_KEY (AES_XOR, 0)
MM_XOR (m0, iv);
WOP_M1 (XOR_data_M1)
iv = data[NUM_WAYS - 1];
WOP (STORE_data);
}
WIDE_LOOP_END
SINGLE_LOOP
{
const v128 *w = wStart;
v128 m = *data;
AES_D_IMC_m (w[2])
do
{
AES_D_IMC_m (w[1]);
AES_D_IMC_m (w[0]);
w -= 2;
}
while (w != p);
AES_D_m (w[1]);
MM_XOR_m (w[0]);
MM_XOR_m (iv);
iv = *data;
*data = m;
}
p[-2] = iv;
}
AES_FUNC_START2 (AesCtr_Code_HW)
{
uint64x2_t ctr = vreinterpretq_u64_u8(*p);
const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
const v128 *dataEnd;
uint64x2_t one = vdupq_n_u64(0);
one = vsetq_lane_u64(1, one, 0);
p += 2;
WIDE_LOOP_START
{
const v128 *w = p;
WOP (DECLARE_VAR)
WOP (CTR_START);
do
{
WOP_KEY (AES_E_MC, 0)
WOP_KEY (AES_E_MC, 1)
w += 2;
}
while (w != wEnd);
WOP_KEY (AES_E_MC, 0)
WOP_KEY (AES_E, 1)
WOP_KEY (AES_XOR, 2)
WOP (CTR_END);
}
WIDE_LOOP_END
SINGLE_LOOP
{
const v128 *w = p;
v128 m;
CTR_START (m, 0);
do
{
AES_E_MC_m (w[0]);
AES_E_MC_m (w[1]);
w += 2;
}
while (w != wEnd);
AES_E_MC_m (w[0]);
AES_E_m (w[1]);
MM_XOR_m (w[2]);
CTR_END (m, 0);
}
p[-2] = vreinterpretq_u8_u64(ctr);
}
#endif // USE_HW_AES
#endif // MY_CPU_ARM_OR_ARM64

View File

@@ -1,12 +1,12 @@
/* Alloc.c -- Memory allocation functions
2018-04-27 : Igor Pavlov : Public domain */
2020-10-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <stdio.h>
#ifdef _WIN32
#include <windows.h>
#include <Windows.h>
#endif
#include <stdlib.h>
@@ -122,7 +122,6 @@ static void PrintAddr(void *p)
#define Print(s)
#define PrintLn()
#define PrintHex(v, align)
#define PrintDec(v, align)
#define PrintAddr(p)
#endif
@@ -133,10 +132,11 @@ void *MyAlloc(size_t size)
{
if (size == 0)
return NULL;
PRINT_ALLOC("Alloc ", g_allocCount, size, NULL);
#ifdef _SZ_ALLOC_DEBUG
{
void *p = malloc(size);
PRINT_ALLOC("Alloc ", g_allocCount, size, p);
// PRINT_ALLOC("Alloc ", g_allocCount, size, p);
return p;
}
#else
@@ -172,14 +172,20 @@ void MidFree(void *address)
VirtualFree(address, 0, MEM_RELEASE);
}
#ifndef MEM_LARGE_PAGES
#undef _7ZIP_LARGE_PAGES
#ifdef _7ZIP_LARGE_PAGES
#ifdef MEM_LARGE_PAGES
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
#else
#define MY__MEM_LARGE_PAGES 0x20000000
#endif
#ifdef _7ZIP_LARGE_PAGES
extern
SIZE_T g_LargePageSize;
SIZE_T g_LargePageSize = 0;
typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
#endif
typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID);
#endif // _7ZIP_LARGE_PAGES
void SetLargePageSize()
{
@@ -214,7 +220,7 @@ void *BigAlloc(size_t size)
size2 = (size + ps) & ~ps;
if (size2 >= size)
{
void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
if (res)
return res;
}
@@ -280,13 +286,15 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
*/
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32)
#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
#define USE_posix_memalign
#endif
#ifndef USE_posix_memalign
#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
#endif
/*
This posix_memalign() is for test purposes only.
We also need special Free() function instead of free(),

View File

@@ -1,5 +1,5 @@
/* Alloc.h -- Memory allocation functions
2018-02-19 : Igor Pavlov : Public domain */
2021-02-08 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H
#define __COMMON_ALLOC_H
@@ -13,7 +13,7 @@ void MyFree(void *address);
#ifdef _WIN32
void SetLargePageSize();
void SetLargePageSize(void);
void *MidAlloc(size_t size);
void MidFree(void *address);

View File

@@ -1,5 +1,5 @@
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
2018-04-28 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -123,7 +123,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
const Byte *srcLim;
Byte *dest;
SizeT num = p->lims[BCJ2_STREAM_MAIN] - src;
SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
if (num == 0)
{
@@ -134,7 +134,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
dest = p->dest;
if (num > (SizeT)(p->destLim - dest))
{
num = p->destLim - dest;
num = (SizeT)(p->destLim - dest);
if (num == 0)
{
p->state = BCJ2_DEC_STATE_ORIG;
@@ -168,7 +168,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
break;
}
num = src - p->bufs[BCJ2_STREAM_MAIN];
num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
if (src == srcLim)
{
@@ -228,7 +228,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
p->ip += 4;
val -= p->ip;
dest = p->dest;
rem = p->destLim - dest;
rem = (SizeT)(p->destLim - dest);
if (rem < 4)
{

View File

@@ -1,5 +1,5 @@
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
2019-02-02 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -104,7 +104,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
const Byte *src = p->src;
const Byte *srcLim;
Byte *dest;
SizeT num = p->srcLim - src;
SizeT num = (SizeT)(p->srcLim - src);
if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
{
@@ -118,7 +118,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
dest = p->bufs[BCJ2_STREAM_MAIN];
if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
{
num = p->lims[BCJ2_STREAM_MAIN] - dest;
num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
if (num == 0)
{
p->state = BCJ2_STREAM_MAIN;
@@ -152,7 +152,7 @@ static void Bcj2Enc_Encode_2(CBcj2Enc *p)
break;
}
num = src - p->src;
num = (SizeT)(src - p->src);
if (src == srcLim)
{

View File

@@ -1,5 +1,5 @@
/* Blake2s.c -- BLAKE2s and BLAKE2sp Hash
2015-06-30 : Igor Pavlov : Public domain
2021-02-09 : Igor Pavlov : Public domain
2015 : Samuel Neves : Public domain */
#include <string.h>
@@ -34,7 +34,7 @@ static const Byte k_Blake2s_Sigma[BLAKE2S_NUM_ROUNDS][16] =
};
void Blake2s_Init0(CBlake2s *p)
static void Blake2s_Init0(CBlake2s *p)
{
unsigned i;
for (i = 0; i < 8; i++)

14
C/Bra.c
View File

@@ -1,5 +1,5 @@
/* Bra.c -- Converters for RISC code
2017-04-04 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -22,7 +22,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
return p - data;
return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
@@ -43,7 +43,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
return p - data;
return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
@@ -78,7 +78,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
UInt32 b3;
if (p > lim)
return p - data;
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
@@ -113,7 +113,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
UInt32 b3;
if (p > lim)
return p - data;
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
@@ -162,7 +162,7 @@ SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
return p - data;
return (SizeT)(p - data);
p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
@@ -196,7 +196,7 @@ SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
return p - data;
return (SizeT)(p - data);
/*
v = GetBe32(p);
p += 4;

View File

@@ -1,5 +1,5 @@
/* Bra86.c -- Converter for x86 code (BCJ)
2017-04-03 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -25,7 +25,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding
break;
{
SizeT d = (SizeT)(p - data - pos);
SizeT d = (SizeT)(p - data) - pos;
pos = (SizeT)(p - data);
if (p >= limit)
{

View File

@@ -1,5 +1,5 @@
/* BwtSort.c -- BWT block sorting
2018-07-04 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -60,7 +60,7 @@ SortGroup - is recursive Range-Sort function with HeapSort optimization for smal
returns: 1 - if there are groups, 0 - no more groups
*/
UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, UInt32 left, UInt32 range
#endif
@@ -116,7 +116,7 @@ UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 group
}
HeapSort(temp, groupSize);
mask = ((1 << NumRefBits) - 1);
mask = (((UInt32)1 << NumRefBits) - 1);
thereAreGroups = 0;
group = groupOffset;

View File

@@ -1,9 +1,13 @@
/* Compiler.h
2017-04-03 : Igor Pavlov : Public domain */
2021-01-05 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H
#define __7Z_COMPILER_H
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-private-field"
#endif
#ifdef _MSC_VER
#ifdef UNDER_CE
@@ -25,6 +29,12 @@
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#ifdef __clang__
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
// #pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
#endif
#define UNUSED_VAR(x) (void)x;

View File

@@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code
2018-02-18: Igor Pavlov : Public domain */
2021-04-28 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -55,6 +55,47 @@ static UInt32 CheckFlag(UInt32 flag)
#define CHECK_CPUID_IS_SUPPORTED
#endif
#ifndef USE_ASM
#ifdef _MSC_VER
#if _MSC_VER >= 1600
#define MY__cpuidex __cpuidex
#else
/*
__cpuid (function == 4) requires subfunction number in ECX.
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
__cpuid() in new MSVC clears ECX.
__cpuid() in old MSVC (14.00) doesn't clear ECX
We still can use __cpuid for low (function) values that don't require ECX,
but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
where ECX value is first parameter for FAST_CALL / NO_INLINE function,
So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
*/
static
MY_NO_INLINE
void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
{
UNUSED_VAR(subFunction);
__cpuid(CPUInfo, function);
}
#define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
#pragma message("======== MY__cpuidex_HACK WAS USED ========")
#endif
#else
#define MY__cpuidex(info, func, func2) __cpuid(info, func)
#pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
#endif
#endif
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
@@ -99,18 +140,20 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
#endif
"=c" (*c) ,
"=d" (*d)
: "0" (function)) ;
: "0" (function), "c"(0) ) ;
#endif
#else
int CPUInfo[4];
__cpuid(CPUInfo, function);
*a = CPUInfo[0];
*b = CPUInfo[1];
*c = CPUInfo[2];
*d = CPUInfo[3];
MY__cpuidex(CPUInfo, (int)function, 0);
*a = (UInt32)CPUInfo[0];
*b = (UInt32)CPUInfo[1];
*c = (UInt32)CPUInfo[2];
*d = (UInt32)CPUInfo[3];
#endif
}
@@ -174,7 +217,7 @@ BoolInt CPU_Is_InOrder()
}
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
#include <windows.h>
#include <Windows.h>
static BoolInt CPU_Sys_Is_SSE_Supported()
{
OSVERSIONINFO vi;
@@ -188,13 +231,77 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
#define CHECK_SYS_SSE_SUPPORT
#endif
BoolInt CPU_Is_Aes_Supported()
static UInt32 X86_CPUID_ECX_Get_Flags()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p))
return 0;
return p.c;
}
BoolInt CPU_IsSupported_AES()
{
return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
}
BoolInt CPU_IsSupported_SSSE3()
{
return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41()
{
return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p))
return False;
return (p.c >> 25) & 1;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
return (d[1] >> 29) & 1;
}
}
// #include <stdio.h>
#ifdef _WIN32
#include <Windows.h>
#endif
BoolInt CPU_IsSupported_VAES_AVX2()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
#ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False;
#endif
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5) // avx2
// & (d[1] >> 31) // avx512vl
& (d[2] >> 9); // vaes // VEX-256/EVEX
}
}
BoolInt CPU_IsSupported_PageGB()
@@ -215,4 +322,117 @@ BoolInt CPU_IsSupported_PageGB()
}
}
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _WIN32
#include <Windows.h>
BoolInt CPU_IsSupported_CRC32()
{ return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRYPTO()
{ return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
#else
#if defined(__APPLE__)
/*
#include <stdio.h>
#include <string.h>
static void Print_sysctlbyname(const char *name)
{
size_t bufSize = 256;
char buf[256];
int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
{
int i;
printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
for (i = 0; i < 20; i++)
printf(" %2x", (unsigned)(Byte)buf[i]);
}
}
*/
BoolInt CPU_IsSupported_CRC32(void)
{
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
UInt32 val = 0;
if (My_sysctlbyname_Get_UInt32("hw.optional.armv8_crc32", &val) == 0 && val == 1)
return 1;
return 0;
}
#ifdef MY_CPU_ARM64
#define APPLE_CRYPTO_SUPPORT_VAL 1
#else
#define APPLE_CRYPTO_SUPPORT_VAL 0
#endif
BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
#else // __APPLE__
#include <sys/auxv.h>
#define USE_HWCAP
#ifdef USE_HWCAP
#include <asm/hwcap.h>
#ifdef MY_CPU_ARM64
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name)) ? 1 : 0; }
#elif defined(MY_CPU_ARM)
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
#endif
#else // USE_HWCAP
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return 0; }
#endif // USE_HWCAP
MY_HWCAP_CHECK_FUNC (CRC32)
MY_HWCAP_CHECK_FUNC (SHA1)
MY_HWCAP_CHECK_FUNC (SHA2)
MY_HWCAP_CHECK_FUNC (AES)
#endif // __APPLE__
#endif // _WIN32
#endif // MY_CPU_ARM_OR_ARM64
#ifdef __APPLE__
#include <sys/sysctl.h>
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
{
return sysctlbyname(name, buf, bufSize, NULL, 0);
}
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
{
size_t bufSize = sizeof(*val);
int res = My_sysctlbyname_Get(name, val, &bufSize);
if (res == 0 && bufSize != sizeof(*val))
return EFAULT;
return res;
}
#endif

View File

@@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
2018-02-18 : Igor Pavlov : Public domain */
2021-04-25 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
@@ -14,6 +14,10 @@ MY_CPU_BE means that CPU is BIG ENDIAN.
If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
MY_CPU_64BIT means that processor can work with 64-bit registers.
MY_CPU_64BIT can be used to select fast code branch
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/
#if defined(_M_X64) \
@@ -24,8 +28,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY_CPU_AMD64
#ifdef __ILP32__
#define MY_CPU_NAME "x32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "x64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#endif
@@ -35,7 +41,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
|| defined(__i386__)
#define MY_CPU_X86
#define MY_CPU_NAME "x86"
#define MY_CPU_32BIT
/* #define MY_CPU_32BIT */
#define MY_CPU_SIZEOF_POINTER 4
#endif
@@ -59,8 +66,14 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
|| defined(__THUMBEL__) \
|| defined(__THUMBEB__)
#define MY_CPU_ARM
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
#define MY_CPU_NAME "armt"
#else
#define MY_CPU_NAME "arm"
#define MY_CPU_32BIT
#endif
/* #define MY_CPU_32BIT */
#define MY_CPU_SIZEOF_POINTER 4
#endif
@@ -84,17 +97,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#if defined(__ppc64__) \
|| defined(__powerpc64__)
|| defined(__powerpc64__) \
|| defined(__ppc__) \
|| defined(__powerpc__) \
|| defined(__PPC__) \
|| defined(_POWER)
#if defined(__ppc64__) \
|| defined(__powerpc64__) \
|| defined(_LP64) \
|| defined(__64BIT__)
#ifdef __ILP32__
#define MY_CPU_NAME "ppc64-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "ppc64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#elif defined(__ppc__) \
|| defined(__powerpc__)
#else
#define MY_CPU_NAME "ppc"
#define MY_CPU_32BIT
#define MY_CPU_SIZEOF_POINTER 4
/* #define MY_CPU_32BIT */
#endif
#endif
@@ -111,6 +136,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY_CPU_X86_OR_AMD64
#endif
#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
#define MY_CPU_ARM_OR_ARM64
#endif
#ifdef _WIN32
@@ -170,6 +199,41 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#error Stop_Compiling_Bad_32_64_BIT
#endif
#ifdef __SIZEOF_POINTER__
#ifdef MY_CPU_SIZEOF_POINTER
#if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
#endif
#else
#define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
#endif
#endif
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
#if defined (_LP64)
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
#endif
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
#define MY_CPU_pragma_pop __pragma(pack(pop))
#else
#define MY_CPU_pragma_pack_push_1
#define MY_CPU_pragma_pop
#endif
#else
#ifdef __xlC__
// for XLC compiler:
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
#define MY_CPU_pragma_pop _Pragma("pack()")
#else
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
#define MY_CPU_pragma_pop _Pragma("pack(pop)")
#endif
#endif
#ifndef MY_CPU_NAME
#ifdef MY_CPU_LE
@@ -202,9 +266,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#else
@@ -242,7 +306,7 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY__has_builtin(x) 0
#endif
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
@@ -253,8 +317,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
@@ -262,9 +326,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
@@ -325,10 +389,35 @@ int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
BoolInt CPU_Is_InOrder();
BoolInt CPU_Is_Aes_Supported();
BoolInt CPU_IsSupported_PageGB();
BoolInt CPU_Is_InOrder(void);
BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void);
BoolInt CPU_IsSupported_SHA(void);
BoolInt CPU_IsSupported_PageGB(void);
#elif defined(MY_CPU_ARM_OR_ARM64)
BoolInt CPU_IsSupported_CRC32(void);
#if defined(_WIN32)
BoolInt CPU_IsSupported_CRYPTO(void);
#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
#else
BoolInt CPU_IsSupported_SHA1(void);
BoolInt CPU_IsSupported_SHA2(void);
BoolInt CPU_IsSupported_AES(void);
#endif
#endif
#if defined(__APPLE__)
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif
EXTERN_C_END

169
C/Delta.c
View File

@@ -1,5 +1,5 @@
/* Delta.c -- Delta converter
2009-05-26 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -12,53 +12,158 @@ void Delta_Init(Byte *state)
state[i] = 0;
}
static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
{
unsigned i;
for (i = 0; i < size; i++)
dest[i] = src[i];
}
void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
Byte buf[DELTA_STATE_SIZE];
unsigned j = 0;
MyMemCpy(buf, state, delta);
Byte temp[DELTA_STATE_SIZE];
if (size == 0)
return;
{
SizeT i;
for (i = 0; i < size;)
unsigned i = 0;
do
temp[i] = state[i];
while (++i != delta);
}
if (size <= delta)
{
for (j = 0; j < delta && i < size; i++, j++)
unsigned i = 0, k;
do
{
Byte b = data[i];
data[i] = (Byte)(b - buf[j]);
buf[j] = b;
Byte b = *data;
*data++ = (Byte)(b - temp[i]);
temp[i] = b;
}
while (++i != size);
k = 0;
do
{
if (i == delta)
i = 0;
state[k] = temp[i++];
}
while (++k != delta);
return;
}
{
Byte *p = data + size - delta;
{
unsigned i = 0;
do
state[i] = *p++;
while (++i != delta);
}
{
const Byte *lim = data + delta;
ptrdiff_t dif = -(ptrdiff_t)delta;
if (((ptrdiff_t)size + dif) & 1)
{
--p; *p = (Byte)(*p - p[dif]);
}
while (p != lim)
{
--p; *p = (Byte)(*p - p[dif]);
--p; *p = (Byte)(*p - p[dif]);
}
dif = -dif;
do
{
--p; *p = (Byte)(*p - temp[--dif]);
}
while (dif != 0);
}
}
}
if (j == delta)
j = 0;
MyMemCpy(state, buf + j, delta - j);
MyMemCpy(state + delta - j, buf, j);
}
void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
Byte buf[DELTA_STATE_SIZE];
unsigned j = 0;
MyMemCpy(buf, state, delta);
unsigned i;
const Byte *lim;
if (size == 0)
return;
i = 0;
lim = data + size;
if (size <= delta)
{
SizeT i;
for (i = 0; i < size;)
do
*data = (Byte)(*data + state[i++]);
while (++data != lim);
for (; delta != i; state++, delta--)
*state = state[i];
data -= i;
}
else
{
for (j = 0; j < delta && i < size; i++, j++)
/*
#define B(n) b ## n
#define I(n) Byte B(n) = state[n];
#define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); }
#define F(n) if (data != lim) { U(n) }
if (delta == 1)
{
buf[j] = data[i] = (Byte)(buf[j] + data[i]);
I(0)
if ((lim - data) & 1) { U(0) }
while (data != lim) { U(0) U(0) }
data -= 1;
}
else if (delta == 2)
{
I(0) I(1)
lim -= 1; while (data < lim) { U(0) U(1) }
lim += 1; F(0)
data -= 2;
}
else if (delta == 3)
{
I(0) I(1) I(2)
lim -= 2; while (data < lim) { U(0) U(1) U(2) }
lim += 2; F(0) F(1)
data -= 3;
}
else if (delta == 4)
{
I(0) I(1) I(2) I(3)
lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) }
lim += 3; F(0) F(1) F(2)
data -= 4;
}
else
*/
{
do
{
*data = (Byte)(*data + state[i++]);
data++;
}
while (i != delta);
{
ptrdiff_t dif = -(ptrdiff_t)delta;
do
*data = (Byte)(*data + data[dif]);
while (++data != lim);
data += dif;
}
}
}
if (j == delta)
j = 0;
MyMemCpy(state, buf + j, delta - j);
MyMemCpy(state + delta - j, buf, j);
do
*state++ = *data;
while (++data != lim);
}

View File

@@ -5,7 +5,7 @@
#ifdef _WIN32
#include <windows.h>
#include <Windows.h>
#include "DllSecur.h"

View File

@@ -10,8 +10,8 @@ EXTERN_C_BEGIN
#ifdef _WIN32
void My_SetDefaultDllDirectories();
void LoadSecurityDlls();
void My_SetDefaultDllDirectories(void);
void LoadSecurityDlls(void);
#endif

View File

@@ -1,5 +1,5 @@
/* HuffEnc.c -- functions for Huffman encoding
2017-04-03 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -8,7 +8,7 @@
#define kMaxLen 16
#define NUM_BITS 10
#define MASK ((1 << NUM_BITS) - 1)
#define MASK (((unsigned)1 << NUM_BITS) - 1)
#define NUM_COUNTERS 64

View File

@@ -1,10 +1,11 @@
/* LzFind.c -- Match finder for LZ algorithms
2018-07-08 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "CpuArch.h"
#include "LzFind.h"
#include "LzHash.h"
@@ -14,7 +15,45 @@
#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
#define kMaxHistorySize ((UInt32)7 << 29)
#define kStartMaxLen 3
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
#define kFix5HashSize kFix4HashSize
/*
HASH2_CALC:
if (hv) match, then cur[0] and cur[1] also match
*/
#define HASH2_CALC hv = GetUi16(cur);
// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255]
/*
HASH3_CALC:
if (cur[0]) and (h2) match, then cur[1] also match
if (cur[0]) and (hv) match, then cur[1] and cur[2] also match
*/
#define HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
#define HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; }
#define HASH5_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \
/* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \
hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; }
#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
{
@@ -44,9 +83,9 @@ static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr
return (p->bufferBase != NULL);
}
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
static Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
{
@@ -77,7 +116,7 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
for (;;)
{
Byte *dest = p->buffer + (p->streamPos - p->pos);
size_t size = (p->bufferBase + p->blockSize - dest);
size_t size = (size_t)(p->bufferBase + p->blockSize - dest);
if (size == 0)
return;
@@ -204,10 +243,10 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 hs;
p->matchMaxLen = matchMaxLen;
{
// UInt32 hs4;
p->fixedHashSize = 0;
if (p->numHashBytes == 2)
hs = (1 << 16) - 1;
else
if (p->numHashBytes != 2)
{
hs = historySize;
if (hs > p->expectedDataSize)
@@ -218,9 +257,9 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
// we propagated 16 bits in (hs). Low 16 bits must be set later
hs >>= 1;
hs |= 0xFFFF; /* don't change it! It's required for Deflate */
if (hs > (1 << 24))
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
@@ -228,12 +267,30 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
hs >>= 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// hs = ((UInt32)1 << 25) - 1; // for test
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1;
}
p->hashMask = hs;
hs++;
/*
hs4 = (1 << 20);
if (hs4 > hs)
hs4 = hs;
// hs4 = (1 << 16); // for test
p->hash4Mask = hs4 - 1;
*/
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
hs += p->fixedHashSize;
}
@@ -249,6 +306,10 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
numSons <<= 1;
newSize = hs + numSons;
// aligned size is not required here, but it can be better for some loops
#define NUM_REFS_ALIGN_MASK 0xF
newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
if (p->hash && p->numRefs == newSize)
return 1;
@@ -349,15 +410,23 @@ static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
{
size_t i;
for (i = 0; i < numItems; i++)
if (numItems == 0)
return;
{
UInt32 value = items[i];
if (value <= subValue)
value = kEmptyHashValue;
else
value -= subValue;
items[i] = value;
const CLzRef *lim = items + numItems - 1;
for (; items < lim; items += 2)
{
UInt32 v, m;
v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue;
v = items[1]; items[0] = m; m = v - subValue; if (v < subValue) m = kEmptyHashValue;
items[1] = m;
}
if (items == lim)
{
UInt32 v, m;
v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue;
items[0] = m;
}
}
}
@@ -429,8 +498,8 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos
{
ptrdiff_t diff;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
diff = (ptrdiff_t)0 - delta;
if (cur[maxLen] == cur[maxLen + diff])
diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
{
const Byte *c = cur;
while (*c == c[diff])
@@ -588,15 +657,21 @@ static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define GET_MATCHES_FOOTER(offset, maxLen) \
offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
offset = (unsigned)(func((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
distances + offset, (UInt32)(_maxLen_)) - distances); MOVE_POS_RET;
#define GET_MATCHES_FOOTER_BT(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
#define GET_MATCHES_FOOTER_HC(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec)
#define SKIP_FOOTER \
SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
#define UPDATE_maxLen { \
ptrdiff_t diff = (ptrdiff_t)0 - d2; \
ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \
const Byte *c = cur + maxLen; \
const Byte *lim = cur + lenLimit; \
for (; c != lim; c++) if (*(c + diff) != *c) break; \
@@ -610,7 +685,7 @@ static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
offset = 0;
GET_MATCHES_FOOTER(offset, 1)
GET_MATCHES_FOOTER_BT(1)
}
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
@@ -621,7 +696,7 @@ UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
offset = 0;
GET_MATCHES_FOOTER(offset, 2)
GET_MATCHES_FOOTER_BT(2)
}
static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
@@ -659,9 +734,10 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
}
GET_MATCHES_FOOTER(offset, maxLen)
GET_MATCHES_FOOTER_BT(maxLen)
}
static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 h2, h3, d2, d3, pos;
@@ -676,53 +752,61 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
curMatch = (hash + kFix4HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
maxLen = 0;
maxLen = 3;
offset = 0;
for (;;)
{
if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
{
maxLen = 2;
distances[0] = 2;
distances[1] = d2 - 1;
offset = 2;
}
if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
if (*(cur - d2 + 2) == cur[2])
{
// distances[0] = 3;
}
else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
{
maxLen = 3;
distances[(size_t)offset + 1] = d3 - 1;
offset += 2;
d2 = d3;
distances[2 + 1] = d3 - 1;
offset = 4;
}
if (offset != 0)
else
break;
}
else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
{
d2 = d3;
distances[1] = d3 - 1;
offset = 2;
}
else
break;
UPDATE_maxLen
distances[(size_t)offset - 2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
MOVE_POS_RET;
MOVE_POS_RET
}
break;
}
if (maxLen < 3)
maxLen = 3;
GET_MATCHES_FOOTER(offset, maxLen)
GET_MATCHES_FOOTER_BT(maxLen)
}
/*
static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
UInt32 h2, h3, d2, d3, maxLen, offset, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -733,53 +817,49 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
d4 = pos - (hash + kFix4HashSize)[h4];
// d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[h4] = pos;
// (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
maxLen = 0;
maxLen = 4;
offset = 0;
for (;;)
{
if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
{
distances[0] = maxLen = 2;
distances[0] = 2;
distances[1] = d2 - 1;
offset = 2;
if (*(cur - d2 + 2) == cur[2])
distances[0] = maxLen = 3;
{
}
else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
{
distances[2] = maxLen = 3;
distances[3] = d3 - 1;
offset = 4;
d2 = d3;
}
else
break;
}
else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
{
distances[0] = maxLen = 3;
distances[1] = d3 - 1;
offset = 2;
d2 = d3;
}
else
break;
if (d2 != d4 && d4 < p->cyclicBufferSize
&& *(cur - d4) == *cur
&& *(cur - d4 + 3) == *(cur + 3))
{
maxLen = 4;
distances[(size_t)offset + 1] = d4 - 1;
offset += 2;
d2 = d4;
}
if (offset != 0)
{
distances[(size_t)offset - 2] = 3;
if (*(cur - d2 + 3) != cur[3])
break;
UPDATE_maxLen
distances[(size_t)offset - 2] = maxLen;
if (maxLen == lenLimit)
@@ -787,14 +867,12 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
MOVE_POS_RET;
}
break;
}
if (maxLen < 4)
maxLen = 4;
GET_MATCHES_FOOTER(offset, maxLen)
GET_MATCHES_FOOTER_BT(maxLen)
}
*/
static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
@@ -816,27 +894,38 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[hv] = pos;
maxLen = 0;
maxLen = 3;
offset = 0;
for (;;)
{
if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
{
maxLen = 2;
distances[0] = 2;
distances[1] = d2 - 1;
offset = 2;
}
if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
if (*(cur - d2 + 2) == cur[2])
{
// distances[0] = 3;
}
else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
{
maxLen = 3;
distances[(size_t)offset + 1] = d3 - 1;
offset += 2;
d2 = d3;
distances[2 + 1] = d3 - 1;
offset = 4;
}
if (offset != 0)
else
break;
}
else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
{
d2 = d3;
distances[1] = d3 - 1;
offset = 2;
}
else
break;
UPDATE_maxLen
distances[(size_t)offset - 2] = (UInt32)maxLen;
if (maxLen == lenLimit)
@@ -844,20 +933,16 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET;
}
break;
}
if (maxLen < 3)
maxLen = 3;
offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
distances + offset, maxLen) - (distances));
MOVE_POS_RET
GET_MATCHES_FOOTER_HC(maxLen);
}
/*
static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
UInt32 h2, h3, d2, d3, maxLen, offset, pos;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -868,53 +953,49 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
d2 = pos - hash [h2];
d3 = pos - (hash + kFix3HashSize)[h3];
d4 = pos - (hash + kFix4HashSize)[h4];
// d4 = pos - (hash + kFix4HashSize)[h4];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = pos;
(hash + kFix3HashSize)[h3] = pos;
(hash + kFix4HashSize)[h4] = pos;
// (hash + kFix4HashSize)[h4] = pos;
(hash + kFix5HashSize)[hv] = pos;
maxLen = 0;
maxLen = 4;
offset = 0;
for (;;)
{
if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
{
distances[0] = maxLen = 2;
distances[0] = 2;
distances[1] = d2 - 1;
offset = 2;
if (*(cur - d2 + 2) == cur[2])
distances[0] = maxLen = 3;
{
}
else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
{
distances[2] = maxLen = 3;
distances[3] = d3 - 1;
offset = 4;
d2 = d3;
}
else
break;
}
else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
{
distances[0] = maxLen = 3;
distances[1] = d3 - 1;
offset = 2;
d2 = d3;
}
else
break;
if (d2 != d4 && d4 < p->cyclicBufferSize
&& *(cur - d4) == *cur
&& *(cur - d4 + 3) == *(cur + 3))
{
maxLen = 4;
distances[(size_t)offset + 1] = d4 - 1;
offset += 2;
d2 = d4;
}
if (offset != 0)
{
distances[(size_t)offset - 2] = 3;
if (*(cur - d2 + 3) != cur[3])
break;
UPDATE_maxLen
distances[(size_t)offset - 2] = maxLen;
if (maxLen == lenLimit)
@@ -922,16 +1003,12 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET;
}
break;
}
if (maxLen < 4)
maxLen = 4;
offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
distances + offset, maxLen) - (distances));
MOVE_POS_RET
GET_MATCHES_FOOTER_HC(maxLen);
}
*/
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{
@@ -940,11 +1017,11 @@ UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
HASH_ZIP_CALC;
curMatch = p->hash[hv];
p->hash[hv] = p->pos;
offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
distances, 2) - (distances));
MOVE_POS_RET
offset = 0;
GET_MATCHES_FOOTER_HC(2)
}
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
do
@@ -1006,12 +1083,11 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
while (--num != 0);
}
/*
static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
do
{
UInt32 h2, h3, h4;
UInt32 h2, h3;
UInt32 *hash;
SKIP_HEADER(5)
HASH5_CALC;
@@ -1019,13 +1095,12 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[h4] =
// (hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = p->pos;
SKIP_FOOTER
}
while (--num != 0);
}
*/
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
@@ -1046,27 +1121,26 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
while (--num != 0);
}
/*
static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
do
{
UInt32 h2, h3, h4;
UInt32 h2, h3;
UInt32 *hash;
SKIP_HEADER(5)
HASH5_CALC;
hash = p->hash;
curMatch = hash + kFix5HashSize)[hv];
curMatch = (hash + kFix5HashSize)[hv];
hash [h2] =
(hash + kFix3HashSize)[h3] =
(hash + kFix4HashSize)[h4] =
// (hash + kFix4HashSize)[h4] =
(hash + kFix5HashSize)[hv] = p->pos;
p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS
}
while (--num != 0);
}
*/
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{
@@ -1089,18 +1163,16 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
if (!p->btMode)
{
/* if (p->numHashBytes <= 4) */
if (p->numHashBytes <= 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
}
/*
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
}
*/
}
else if (p->numHashBytes == 2)
{
@@ -1112,16 +1184,14 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
}
else /* if (p->numHashBytes == 4) */
else if (p->numHashBytes == 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
}
/*
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
}
*/
}

View File

@@ -1,5 +1,5 @@
/* LzFind.h -- Match finder for LZ algorithms
2017-06-10 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
@@ -61,7 +61,7 @@ typedef struct _CMatchFinder
&& (!(p)->directInput || (p)->directInputRem == 0))
int MatchFinder_NeedMove(CMatchFinder *p);
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
// Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);

View File

@@ -1,12 +1,64 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2018-12-29 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "LzHash.h"
#include "CpuArch.h"
#include "LzHash.h"
#include "LzFindMt.h"
// #define LOG_ITERS
#ifdef LOG_ITERS
#include <stdio.h>
static UInt64 g_NumIters_Tree;
static UInt64 g_NumIters_Loop;
#define LOG_ITER(x) x
#else
#define LOG_ITER(x)
#endif
#define kMtHashBlockSize (1 << 17)
#define kMtHashNumBlocks (1 << 1)
#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
#define kMtBtBlockSize (1 << 16)
#define kMtBtNumBlocks (1 << 4)
#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
/*
HASH functions:
We use raw 8/16 bits from a[1] and a[2],
xored with crc(a[0]) and crc(a[3]).
We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
our crc() function provides one-to-one correspondence for low 8-bit values:
(crc[0...0xFF] & 0xFF) <-> [0...0xFF]
*/
#define MT_HASH2_CALC \
h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
#define MT_HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
/*
#define MT_HASH3_CALC__NO_2 { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
#define __MT_HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
// (kHash4Size - 1);
*/
static void MtSync_Construct(CMtSync *p)
{
p->wasCreated = False;
@@ -18,8 +70,11 @@ static void MtSync_Construct(CMtSync *p)
Event_Construct(&p->wasStopped);
Semaphore_Construct(&p->freeSemaphore);
Semaphore_Construct(&p->filledSemaphore);
p->affinity = 0;
}
MY_NO_INLINE
static void MtSync_GetNextBlock(CMtSync *p)
{
if (p->needStart)
@@ -81,8 +136,7 @@ static void MtSync_Destruct(CMtSync *p)
p->exit = True;
if (p->needStart)
Event_Set(&p->canStart);
Thread_Wait(&p->thread);
Thread_Close(&p->thread);
Thread_Wait_Close(&p->thread);
}
if (p->csWasInitialized)
{
@@ -103,6 +157,7 @@ static void MtSync_Destruct(CMtSync *p)
static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks)
{
WRes wres;
if (p->wasCreated)
return SZ_OK;
@@ -118,7 +173,11 @@ static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj,
p->needStart = True;
RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj));
if (p->affinity != 0)
wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
else
wres = Thread_Create(&p->thread, startAddress, obj);
RINOK_THREAD(wres);
p->wasCreated = True;
return SZ_OK;
}
@@ -131,23 +190,161 @@ static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj,
return res;
}
void MtSync_Init(CMtSync *p) { p->needStart = True; }
// static void MtSync_Init(CMtSync *p) { p->needStart = True; }
#define kMtMaxValForNormalize 0xFFFFFFFF
// #define kMtMaxValForNormalize ((1 << 25) + (1 << 20))
#ifdef MY_CPU_LE_UNALIGN
#define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
#else
#define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
#endif
#define GetHeads_DECL(name) \
static void GetHeads ## name(const Byte *p, UInt32 pos, \
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
#define GetHeads_LOOP(v) \
for (; numHeads != 0; numHeads--) { \
const UInt32 value = (v); \
p++; \
*heads++ = pos - hash[value]; \
hash[value] = pos++; }
#define DEF_GetHeads2(name, v, action) \
static void GetHeads ## name(const Byte *p, UInt32 pos, \
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \
{ action; for (; numHeads != 0; numHeads--) { \
const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } }
GetHeads_DECL(name) { action \
GetHeads_LOOP(v) }
#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask)
DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask)
DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask)
/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */
DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
// BT3 is not good for crc collisions for big hashMask values.
/*
GetHeads_DECL(3b)
{
UNUSED_VAR(hashMask);
UNUSED_VAR(crc);
{
const Byte *pLim = p + numHeads;
if (numHeads == 0)
return;
pLim--;
while (p < pLim)
{
UInt32 v1 = GetUi32(p);
UInt32 v0 = v1 & 0xFFFFFF;
UInt32 h0, h1;
p += 2;
v1 >>= 8;
h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
heads += 2;
}
if (p == pLim)
{
UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
*heads = pos - hash[v0];
hash[v0] = pos;
}
}
}
*/
/*
GetHeads_DECL(4)
{
unsigned sh = 0;
UNUSED_VAR(crc)
while ((hashMask & 0x80000000) == 0)
{
hashMask <<= 1;
sh++;
}
GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
}
#define GetHeads4b GetHeads4
*/
#define USE_GetHeads_LOCAL_CRC
#ifdef USE_GetHeads_LOCAL_CRC
GetHeads_DECL(4)
{
UInt32 crc0[256];
UInt32 crc1[256];
{
unsigned i;
for (i = 0; i < 256; i++)
{
UInt32 v = crc[i];
crc0[i] = v & hashMask;
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
// crc1[i] = rotlFixed(v, 8) & hashMask;
}
}
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
}
GetHeads_DECL(4b)
{
UInt32 crc0[256];
{
unsigned i;
for (i = 0; i < 256; i++)
crc0[i] = crc[i] & hashMask;
}
GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
}
GetHeads_DECL(5)
{
UInt32 crc0[256];
UInt32 crc1[256];
UInt32 crc2[256];
{
unsigned i;
for (i = 0; i < 256; i++)
{
UInt32 v = crc[i];
crc0[i] = v & hashMask;
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
}
}
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
}
GetHeads_DECL(5b)
{
UInt32 crc0[256];
UInt32 crc1[256];
{
unsigned i;
for (i = 0; i < 256; i++)
{
UInt32 v = crc[i];
crc0[i] = v & hashMask;
crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
}
}
GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
}
#else
DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
#endif
static void HashThreadFunc(CMatchFinderMt *mt)
{
@@ -244,11 +441,11 @@ static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p)
MY_NO_INLINE
static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
UInt32 *d, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes)
{
do
{
UInt32 *_distances = ++distances;
UInt32 *_distances = ++d;
UInt32 delta = *hash++;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
@@ -258,14 +455,15 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz
unsigned maxLen = (unsigned)_maxLen;
/*
if (size > 1)
#define PREF_STEP 1
if (size > PREF_STEP)
{
UInt32 delta = *hash;
UInt32 delta = hash[PREF_STEP - 1];
if (delta < _cyclicBufferSize)
{
UInt32 cyc1 = _cyclicBufferPos + 1;
size_t cyc1 = _cyclicBufferPos + PREF_STEP;
CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1);
Byte b = *(cur + 1 - delta);
Byte b = *(cur + PREF_STEP - delta);
_distances[0] = pair[0];
_distances[1] = b;
}
@@ -276,8 +474,9 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz
*ptr0 = *ptr1 = kEmptyHashValue;
}
else
for(;;)
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
@@ -292,8 +491,8 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz
if (maxLen < len)
{
maxLen = len;
*distances++ = (UInt32)len;
*distances++ = delta - 1;
*d++ = (UInt32)len;
*d++ = delta - 1;
if (len == lenLimit)
{
UInt32 pair1 = pair[1];
@@ -333,39 +532,39 @@ static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLz
_cyclicBufferPos++;
cur++;
{
UInt32 num = (UInt32)(distances - _distances);
UInt32 num = (UInt32)(d - _distances);
_distances[-1] = num;
}
}
while (distances < limit && --size != 0);
while (d < limit && --size != 0);
*posRes = pos;
return distances;
return d;
}
#endif
static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
{
UInt32 numProcessed = 0;
UInt32 curPos = 2;
UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2
distances[1] = p->hashNumAvail;
d[1] = p->hashNumAvail;
while (curPos < limit)
{
if (p->hashBufPos == p->hashBufPosLimit)
{
MatchFinderMt_GetNextBlock_Hash(p);
distances[1] = numProcessed + p->hashNumAvail;
d[1] = numProcessed + p->hashNumAvail;
if (p->hashNumAvail >= p->numHashBytes)
continue;
distances[0] = curPos + p->hashNumAvail;
distances += curPos;
d[0] = curPos + p->hashNumAvail;
d += curPos;
for (; p->hashNumAvail != 0; p->hashNumAvail--)
*distances++ = 0;
*d++ = 0;
return;
}
{
@@ -387,7 +586,7 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
#ifndef MFMT_GM_INLINE
while (curPos < limit && size-- != 0)
{
UInt32 *startDistances = distances + curPos;
UInt32 *startDistances = d + curPos;
UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
startDistances + 1, p->numHashBytes - 1) - startDistances);
@@ -401,9 +600,9 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
{
UInt32 posRes;
curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
distances + limit,
size, &posRes) - distances);
d + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
d + limit,
size, &posRes) - d);
p->hashBufPos += posRes - pos;
cyclicBufferPos += posRes - pos;
p->buffer += posRes - pos;
@@ -420,7 +619,7 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances)
}
}
distances[0] = curPos;
d[0] = curPos;
}
static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
@@ -448,7 +647,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
}
}
void BtThreadFunc(CMatchFinderMt *mt)
static void BtThreadFunc(CMatchFinderMt *mt)
{
CMtSync *p = &mt->btSync;
for (;;)
@@ -491,6 +690,14 @@ void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
{
MtSync_Destruct(&p->hashSync);
MtSync_Destruct(&p->btSync);
LOG_ITER(
printf("\nTree %9d * %7d iter = %9d sum \n",
(UInt32)(g_NumIters_Tree / 1000),
(UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
(UInt32)(g_NumIters_Loop / 1000)
));
MatchFinderMt_FreeMem(p, alloc);
}
@@ -553,6 +760,7 @@ static void MatchFinderMt_Init(CMatchFinderMt *p)
p->hash = mf->hash;
p->fixedHashSize = mf->fixedHashSize;
// p->hash4Mask = mf->hash4Mask;
p->crc = mf->crc;
p->son = mf->son;
@@ -572,22 +780,24 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
/* p->MatchFinder->ReleaseStream(); */
}
static void MatchFinderMt_Normalize(CMatchFinderMt *p)
{
MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
p->lzPos = p->historySize + 1;
}
MY_NO_INLINE
static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
{
UInt32 blockIndex;
UInt32 blockIndex, k;
MtSync_GetNextBlock(&p->btSync);
blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask);
p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize;
p->btBufPosLimit += p->btBuf[p->btBufPos++];
p->btNumAvailBytes = p->btBuf[p->btBufPos++];
k = blockIndex * kMtBtBlockSize;
p->btBufPosLimit = k + p->btBuf[k];
p->btNumAvailBytes = p->btBuf[k + 1];
p->btBufPos = k + 2;
if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize)
MatchFinderMt_Normalize(p);
{
MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize);
p->lzPos = p->historySize + 1;
}
}
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
@@ -603,170 +813,289 @@ static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
return p->btNumAvailBytes;
}
static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
UInt32 h2, curMatch2;
UInt32 h2, c2;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 lzPos = p->lzPos;
UInt32 m = p->lzPos;
MT_HASH2_CALC
curMatch2 = hash[h2];
hash[h2] = lzPos;
c2 = hash[h2];
hash[h2] = m;
if (curMatch2 >= matchMinPos)
if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
if (c2 >= matchMinPos)
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
*distances++ = 2;
*distances++ = lzPos - curMatch2 - 1;
*d++ = 2;
*d++ = m - c2 - 1;
}
return distances;
return d;
}
static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
UInt32 h2, h3, curMatch2, curMatch3;
UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 lzPos = p->lzPos;
UInt32 m = p->lzPos;
MT_HASH3_CALC
curMatch2 = hash[ h2];
curMatch3 = (hash + kFix3HashSize)[h3];
c2 = hash[h2];
c3 = (hash + kFix3HashSize)[h3];
hash[ h2] = lzPos;
(hash + kFix3HashSize)[h3] = lzPos;
hash[h2] = m;
(hash + kFix3HashSize)[h3] = m;
if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
distances[1] = lzPos - curMatch2 - 1;
if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
d[1] = m - c2 - 1;
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
{
distances[0] = 3;
return distances + 2;
d[0] = 3;
return d + 2;
}
distances[0] = 2;
distances += 2;
d[0] = 2;
d += 2;
}
if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{
*distances++ = 3;
*distances++ = lzPos - curMatch3 - 1;
*d++ = 3;
*d++ = m - c3 - 1;
}
return distances;
return d;
}
/*
static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances)
{
UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 lzPos = p->lzPos;
MT_HASH4_CALC
curMatch2 = hash[ h2];
curMatch3 = (hash + kFix3HashSize)[h3];
curMatch4 = (hash + kFix4HashSize)[h4];
hash[ h2] = lzPos;
(hash + kFix3HashSize)[h3] = lzPos;
(hash + kFix4HashSize)[h4] = lzPos;
if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0])
{
distances[1] = lzPos - curMatch2 - 1;
if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2])
{
distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3;
return distances + 2;
}
distances[0] = 2;
distances += 2;
}
if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0])
{
distances[1] = lzPos - curMatch3 - 1;
if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3])
{
distances[0] = 4;
return distances + 2;
}
distances[0] = 3;
distances += 2;
}
if (curMatch4 >= matchMinPos)
if (
cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] &&
cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3]
)
{
*distances++ = 4;
*distances++ = lzPos - curMatch4 - 1;
}
return distances;
}
*/
#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances)
/*
static
UInt32 MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
{
const UInt32 *btBuf = p->btBuf + p->btBufPos;
UInt32 len = *btBuf++;
UInt32 pos = p->btBufPos;
const UInt32 *bt = p->btBuf + pos;
UInt32 len = *bt++;
UInt32 matchMinPos;
const UInt32 *d_base = d;
UInt32 avail = p->btNumAvailBytes - 1;
p->btBufPos = pos + 1 + len;
{
UInt32 temp1 = p->historySize;
p->btNumAvailBytes = avail;
#define BT_HASH_BYTES_MAX 5
if (len != 0)
temp1 = bt[1];
else if (avail < (BT_HASH_BYTES_MAX - 2))
{
INCREASE_LZ_POS
return 0;
}
matchMinPos = p->lzPos - temp1;
}
for (;;)
{
UInt32 h2, h3, c2, c3;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 m = p->lzPos;
MT_HASH3_CALC
c2 = hash[h2];
c3 = (hash + kFix3HashSize)[h3];
hash[h2] = m;
(hash + kFix3HashSize)[h3] = m;
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
d[1] = m - c2 - 1;
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
{
d[0] = 3;
d += 2;
break;
}
// else
{
d[0] = 2;
d += 2;
}
}
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{
*d++ = 3;
*d++ = m - c3 - 1;
}
break;
}
if (len != 0)
{
do
{
UInt32 v0 = bt[0];
UInt32 v1 = bt[1];
bt += 2;
d[0] = v0;
d[1] = v1;
d += 2;
}
while ((len -= 2) != 0);
}
INCREASE_LZ_POS
return (UInt32)(d - d_base);
}
*/
static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{
UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
UInt32 *hash = p->hash;
const Byte *cur = p->pointerToCurPos;
UInt32 m = p->lzPos;
MT_HASH3_CALC
// MT_HASH4_CALC
c2 = hash[h2];
c3 = (hash + kFix3HashSize)[h3];
// c4 = (hash + kFix4HashSize)[h4];
hash[h2] = m;
(hash + kFix3HashSize)[h3] = m;
// (hash + kFix4HashSize)[h4] = m;
#define _USE_H2
#ifdef _USE_H2
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{
d[1] = m - c2 - 1;
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
{
// d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
// return d + 2;
if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
{
d[0] = 4;
return d + 2;
}
d[0] = 3;
d += 2;
#ifdef _USE_H4
if (c4 >= matchMinPos)
if (
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
)
{
*d++ = 4;
*d++ = m - c4 - 1;
}
#endif
return d;
}
d[0] = 2;
d += 2;
}
#endif
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{
d[1] = m - c3 - 1;
if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
{
d[0] = 4;
return d + 2;
}
d[0] = 3;
d += 2;
}
#ifdef _USE_H4
if (c4 >= matchMinPos)
if (
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
)
{
*d++ = 4;
*d++ = m - c4 - 1;
}
#endif
return d;
}
static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
{
const UInt32 *bt = p->btBuf + p->btBufPos;
UInt32 len = *bt++;
p->btBufPos += 1 + len;
p->btNumAvailBytes--;
{
UInt32 i;
for (i = 0; i < len; i += 2)
{
UInt32 v0 = btBuf[0];
UInt32 v1 = btBuf[1];
btBuf += 2;
distances[0] = v0;
distances[1] = v1;
distances += 2;
UInt32 v0 = bt[0];
UInt32 v1 = bt[1];
bt += 2;
d[0] = v0;
d[1] = v1;
d += 2;
}
}
INCREASE_LZ_POS
return len;
}
static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances)
{
const UInt32 *btBuf = p->btBuf + p->btBufPos;
UInt32 len = *btBuf++;
p->btBufPos += 1 + len;
static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
{
UInt32 pos = p->btBufPos;
const UInt32 *bt = p->btBuf + pos;
UInt32 len = *bt++;
UInt32 avail = p->btNumAvailBytes - 1;
p->btNumAvailBytes = avail;
p->btBufPos = pos + 1 + len;
if (len == 0)
{
/* change for bt5 ! */
if (p->btNumAvailBytes-- >= 4)
len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances));
#define BT_HASH_BYTES_MAX 5
if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, d) - d);
}
else
{
/* Condition: there are matches in btBuf with length < p->numHashBytes */
UInt32 *distances2;
p->btNumAvailBytes--;
distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances);
/*
first match pair from BinTree: (match_len, match_dist),
(match_len >= numHashBytes).
MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
*/
UInt32 *d2;
d2 = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
do
{
UInt32 v0 = btBuf[0];
UInt32 v1 = btBuf[1];
btBuf += 2;
distances2[0] = v0;
distances2[1] = v1;
distances2 += 2;
UInt32 v0 = bt[0];
UInt32 v1 = bt[1];
bt += 2;
d2[0] = v0;
d2[1] = v1;
d2 += 2;
}
while ((len -= 2) != 0);
len = (UInt32)(distances2 - (distances));
len = (UInt32)(d2 - d);
}
INCREASE_LZ_POS
return len;
@@ -802,19 +1131,18 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
SKIP_FOOTER_MT
}
/*
static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
{
SKIP_HEADER_MT(4)
UInt32 h2, h3, h4;
MT_HASH4_CALC
(hash + kFix4HashSize)[h4] =
UInt32 h2, h3 /*, h4 */;
MT_HASH3_CALC
// MT_HASH4_CALC
// (hash + kFix4HashSize)[h4] =
(hash + kFix3HashSize)[h3] =
hash[ h2] =
p->lzPos;
SKIP_FOOTER_MT
}
*/
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
{
@@ -832,22 +1160,23 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable)
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
break;
case 3:
p->GetHeadsFunc = GetHeads3;
p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads3b : GetHeads3;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
break;
default:
/* case 4: */
case 4:
p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4;
// it's fast inline version of GetMatches()
// vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
break;
/*
default:
p->GetHeadsFunc = GetHeads5;
p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads5b : GetHeads5;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip;
break;
*/
}
}

View File

@@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2018-07-04 : Igor Pavlov : Public domain */
2019-11-05 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
@@ -9,14 +9,6 @@
EXTERN_C_BEGIN
#define kMtHashBlockSize (1 << 13)
#define kMtHashNumBlocks (1 << 3)
#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
#define kMtBtBlockSize (1 << 14)
#define kMtBtNumBlocks (1 << 6)
#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
typedef struct _CMtSync
{
BoolInt wasCreated;
@@ -34,6 +26,7 @@ typedef struct _CMtSync
BoolInt csWasEntered;
CCriticalSection cs;
UInt32 numProcessedBlocks;
UInt64 affinity;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
@@ -56,6 +49,7 @@ typedef struct _CMatchFinderMt
UInt32 *hash;
UInt32 fixedHashSize;
// UInt32 hash4Mask;
UInt32 historySize;
const UInt32 *crc;

View File

@@ -1,57 +1,34 @@
/* LzHash.h -- HASH functions for LZ algorithms
2015-04-12 : Igor Pavlov : Public domain */
2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H
#define __LZ_HASH_H
/*
(kHash2Size >= (1 << 8)) : Required
(kHash3Size >= (1 << 16)) : Required
*/
#define kHash2Size (1 << 10)
#define kHash3Size (1 << 16)
#define kHash4Size (1 << 20)
// #define kHash4Size (1 << 20)
#define kFix3HashSize (kHash2Size)
#define kFix4HashSize (kHash2Size + kHash3Size)
#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
/*
We use up to 3 crc values for hash:
crc0
crc1 << Shift_1
crc2 << Shift_2
(Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
Small values for Shift are not good for collision rate.
Big value for Shift_2 increases the minimum size
of hash table, that will be slow for small files.
*/
#define HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
#define HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
#define HASH5_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
temp ^= (p->crc[cur[3]] << 5); \
h4 = temp & (kHash4Size - 1); \
hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
#define MT_HASH2_CALC \
h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
#define MT_HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
#define MT_HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
#define kLzHash_CrcShift_1 5
#define kLzHash_CrcShift_2 10
#endif

View File

@@ -1,5 +1,5 @@
/* Lzma2Dec.c -- LZMA2 Decoder
2019-02-02 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
@@ -93,7 +93,8 @@ void Lzma2Dec_Init(CLzma2Dec *p)
LzmaDec_Init(&p->decoder);
}
static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
// ELzma2State
static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
{
switch (p->state)
{

View File

@@ -1,25 +1,25 @@
/* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread
2019-02-02 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
// #define SHOW_DEBUG_INFO
// #define _7ZIP_ST
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
#ifndef _7ZIP_ST
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
#define PRF(x)
#endif
#define PRF_STR(s) PRF(printf("\n" s "\n"))
#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2))
// #define _7ZIP_ST
#endif
#include "Alloc.h"
@@ -28,10 +28,10 @@
#ifndef _7ZIP_ST
#include "MtDec.h"
#endif
#define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28)
#endif
void Lzma2DecMtProps_Init(CLzma2DecMtProps *p)
{
@@ -255,7 +255,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
const unsigned kNumAlignBits = 12;
const unsigned kNumCacheLineBits = 7; /* <= kNumAlignBits */
t->alloc.numAlignBits = kNumAlignBits;
t->alloc.offset = ((UInt32)coderIndex * ((1 << 11) + (1 << 8) + (1 << 6))) & ((1 << kNumAlignBits) - (1 << kNumCacheLineBits));
t->alloc.offset = ((UInt32)coderIndex * (((unsigned)1 << 11) + (1 << 8) + (1 << 6))) & (((unsigned)1 << kNumAlignBits) - ((unsigned)1 << kNumCacheLineBits));
t->alloc.baseAlloc = me->alignOffsetAlloc.baseAlloc;
}
}
@@ -527,7 +527,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
BoolInt needWriteToStream,
const Byte *src, size_t srcSize,
const Byte *src, size_t srcSize, BoolInt isCross,
BoolInt *needContinue, BoolInt *canRecode)
{
CLzma2DecMt *me = (CLzma2DecMt *)pp;
@@ -536,12 +536,14 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
const Byte *data = t->outBuf;
BoolInt needContinue2 = True;
UNUSED_VAR(src)
UNUSED_VAR(srcSize)
UNUSED_VAR(isCross)
PRF_STR_INT_2("Write", coderIndex, srcSize);
*needContinue = False;
*canRecode = True;
UNUSED_VAR(src)
UNUSED_VAR(srcSize)
if (
// t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK
@@ -696,7 +698,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
inPos = 0;
inLim = p->inBufSize;
inData = p->inBuf;
p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim);
p->readRes = ISeqInStream_Read(p->inStream, (void *)(p->inBuf), &inLim);
// p->readProcessed += inLim;
// inLim -= 5; p->readWasFinished = True; // for test
if (inLim == 0 || p->readRes != SZ_OK)
@@ -838,6 +840,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
p->inProcessed = 0;
p->readWasFinished = False;
p->readRes = SZ_OK;
*isMT = False;
@@ -856,7 +859,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
if (p->props.numThreads > 1)
{
IMtDecCallback vt;
IMtDecCallback2 vt;
Lzma2DecMt_FreeSt(p);
@@ -955,7 +958,12 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
*inProcessed = p->inProcessed;
// res = SZ_OK; // for test
if (res == SZ_OK && p->readRes != SZ_OK)
if (res == SZ_ERROR_INPUT_EOF)
{
if (p->readRes != SZ_OK)
res = p->readRes;
}
else if (res == SZ_OK && p->readRes != SZ_OK)
res = p->readRes;
/*

View File

@@ -1,5 +1,5 @@
/* Lzma2Enc.c -- LZMA2 Encoder
2018-07-04 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -330,7 +330,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
numBlocks++;
if (numBlocks < (unsigned)t2)
{
t2r = (unsigned)numBlocks;
t2r = (int)numBlocks;
if (t2r == 0)
t2r = 1;
t3 = t1 * t2r;
@@ -632,15 +632,15 @@ static SRes Lzma2Enc_EncodeMt1(
{
if (outBuf)
{
size_t destPos = *outBufSize;
const size_t destPos = *outBufSize;
if (destPos >= outLim)
return SZ_ERROR_OUTPUT_EOF;
outBuf[destPos] = 0;
outBuf[destPos] = LZMA2_CONTROL_EOF; // 0
*outBufSize = destPos + 1;
}
else
{
Byte b = 0;
const Byte b = LZMA2_CONTROL_EOF; // 0;
if (ISeqOutStream_Write(outStream, &b, 1) != 1)
return SZ_ERROR_WRITE;
}
@@ -780,13 +780,13 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
p->outBufSize = destBlockSize;
}
p->mtCoder.numThreadsMax = p->props.numBlockThreads_Max;
p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize;
{
SRes res = MtCoder_Code(&p->mtCoder);
if (!outStream)
*outBufSize = p->outBuf - outBuf;
*outBufSize = (size_t)(p->outBuf - outBuf);
return res;
}
}

View File

@@ -11,8 +11,6 @@
#include "Bra.h"
#include "LzmaEnc.h"
#define SZE_OUT_OVERFLOW SZE_DATA_ERROR
int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
int level, UInt32 dictSize, int filterMode)
{

View File

@@ -1,5 +1,5 @@
/* LzmaDec.c -- LZMA Decoder
2018-07-04 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -13,10 +13,12 @@
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
#define RC_INIT_SIZE 5
#ifndef _LZMA_DEC_OPT
#define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
@@ -62,9 +64,10 @@
probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;)
#endif // _LZMA_DEC_OPT
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound;
@@ -114,6 +117,9 @@
#define kMatchMinLen 2
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
#define kMatchSpecLen_Error_Data (1 << 9)
#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
/* External ASM code needs same CLzmaProb array layout. So don't change it. */
/* (probs_1664) is faster and better for code size at some platforms */
@@ -166,10 +172,12 @@
/*
p->remainLen : shows status of LZMA decoder:
< kMatchSpecLenStart : normal remain
= kMatchSpecLenStart : finished
< kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset
= kMatchSpecLenStart : the LZMA stream was finished with end mark
= kMatchSpecLenStart + 1 : need init range coder
= kMatchSpecLenStart + 2 : need init range coder and state
= kMatchSpecLen_Error_Fail : Internal Code Failure
= kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error
*/
/* ---------- LZMA_DECODE_REAL ---------- */
@@ -188,23 +196,31 @@ In:
{
LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
is not END_OF_PAYALOAD_MARKER, then function returns error code.
is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
}
Processing:
first LZMA symbol will be decoded in any case
All checks for limits are at the end of main loop,
It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
The first LZMA symbol will be decoded in any case.
All main checks for limits are at the end of main loop,
It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
Out:
RangeCoder is normalized
Result:
SZ_OK - OK
SZ_ERROR_DATA - Error
p->remainLen:
< kMatchSpecLenStart : normal remain
= kMatchSpecLenStart : finished
< kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
= kMatchSpecLenStart : the LZMA stream was finished with end mark
SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
p->remainLen : undefined
p->reps[*] : undefined
*/
@@ -316,11 +332,6 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
else
{
UPDATE_1(prob);
/*
// that case was checked before with kBadRepCode
if (checkDicSize == 0 && processedPos == 0)
return SZ_ERROR_DATA;
*/
prob = probs + IsRepG0 + state;
IF_BIT_0(prob)
{
@@ -329,6 +340,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
IF_BIT_0(prob)
{
UPDATE_0(prob);
// that case was checked before with kBadRepCode
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
// The caller doesn't allow (dicPos == limit) case here
// so we don't need the following check:
// if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
processedPos++;
@@ -518,8 +536,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
{
p->dicPos = dicPos;
return SZ_ERROR_DATA;
len += kMatchSpecLen_Error_Data + kMatchMinLen;
// len = kMatchSpecLen_Error_Data;
// len += kMatchMinLen;
break;
}
}
@@ -532,8 +552,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
if ((rem = limit - dicPos) == 0)
{
p->dicPos = dicPos;
return SZ_ERROR_DATA;
/*
We stop decoding and return SZ_OK, and we can resume decoding later.
Any error conditions can be tested later in caller code.
For more strict mode we can stop decoding with error
// len += kMatchSpecLen_Error_Data;
*/
break;
}
curLen = ((rem < len) ? (unsigned)rem : len);
@@ -572,7 +597,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->buf = buf;
p->range = range;
p->code = code;
p->remainLen = (UInt32)len;
p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
p->dicPos = dicPos;
p->processedPos = processedPos;
p->reps[0] = rep0;
@@ -580,40 +605,61 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->reps[2] = rep2;
p->reps[3] = rep3;
p->state = (UInt32)state;
if (len >= kMatchSpecLen_Error_Data)
return SZ_ERROR_DATA;
return SZ_OK;
}
#endif
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{
if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
{
Byte *dic = p->dic;
SizeT dicPos = p->dicPos;
SizeT dicBufSize = p->dicBufSize;
unsigned len = (unsigned)p->remainLen;
SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
if (len == 0 /* || len >= kMatchSpecLenStart */)
return;
{
SizeT dicPos = p->dicPos;
Byte *dic;
SizeT dicBufSize;
SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
{
SizeT rem = limit - dicPos;
if (rem < len)
{
len = (unsigned)(rem);
if (len == 0)
return;
}
}
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
p->checkDicSize = p->prop.dicSize;
p->processedPos += (UInt32)len;
p->remainLen -= (UInt32)len;
while (len != 0)
dic = p->dic;
rep0 = p->reps[0];
dicBufSize = p->dicBufSize;
do
{
len--;
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
}
while (--len);
p->dicPos = dicPos;
}
}
/*
At staring of new stream we have one of the following symbols:
- Literal - is allowed
- Non-Rep-Match - is allowed only if it's end marker symbol
- Rep-Match - is not allowed
We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
*/
#define kRange0 0xFFFFFFFF
#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
@@ -621,69 +667,77 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
#error Stop_Compiling_Bad_LZMA_Check
#endif
/*
LzmaDec_DecodeReal2():
It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
and we support the following state of (p->checkDicSize):
if (total_processed < p->prop.dicSize) then
{
(total_processed == p->processedPos)
(p->checkDicSize == 0)
}
else
(p->checkDicSize == p->prop.dicSize)
*/
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{
do
{
SizeT limit2 = limit;
if (p->checkDicSize == 0)
{
UInt32 rem = p->prop.dicSize - p->processedPos;
if (limit - p->dicPos > rem)
limit2 = p->dicPos + rem;
if (p->processedPos == 0)
if (p->code >= kBadRepCode)
return SZ_ERROR_DATA;
limit = p->dicPos + rem;
}
RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
{
int res = LZMA_DECODE_REAL(p, limit, bufLimit);
if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
p->checkDicSize = p->prop.dicSize;
LzmaDec_WriteRem(p, limit);
return res;
}
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
return 0;
}
typedef enum
{
DUMMY_ERROR, /* unexpected end of input stream */
DUMMY_INPUT_EOF, /* need more input data */
DUMMY_LIT,
DUMMY_MATCH,
DUMMY_REP
} ELzmaDummy;
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
{
UInt32 range = p->range;
UInt32 code = p->code;
const Byte *bufLimit = buf + inSize;
const Byte *bufLimit = *bufOut;
const CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state;
ELzmaDummy res;
for (;;)
{
const CLzmaProb *prob;
UInt32 bound;
unsigned ttt;
unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
prob = probs + IsMatch + COMBINED_PS_STATE;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
/* if (bufLimit - buf >= 7) return DUMMY_LIT; */
prob = probs + Literal;
if (p->checkDicSize != 0 || p->processedPos != 0)
prob += ((UInt32)LZMA_LIT_SIZE *
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
if (state < kNumLitStates)
{
@@ -735,8 +789,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
NORMALIZE_CHECK;
return DUMMY_REP;
break;
}
else
{
@@ -812,8 +865,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
{
unsigned numDirectBits = ((posSlot >> 1) - 1);
/* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
if (posSlot < kEndPosModelIndex)
{
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
@@ -844,12 +895,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
}
}
}
break;
}
NORMALIZE_CHECK;
*bufOut = buf;
return res;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
{
p->remainLen = kMatchSpecLenStart + 1;
@@ -872,16 +926,41 @@ void LzmaDec_Init(CLzmaDec *p)
}
/*
LZMA supports optional end_marker.
So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
When the decoder reaches dicLimit, it looks (finishMode) parameter:
if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
must check (status) value. The caller can show the error,
if the end of stream is expected, and the (status) is noit
LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
*/
#define RETURN__NOT_FINISHED__FOR_FINISH \
*status = LZMA_STATUS_NOT_FINISHED; \
return SZ_ERROR_DATA; // for strict mode
// return SZ_OK; // for relaxed mode
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT inSize = *srcLen;
(*srcLen) = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
if (p->remainLen > kMatchSpecLenStart)
{
if (p->remainLen > kMatchSpecLenStart + 2)
return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
p->tempBuf[p->tempBufSize++] = *src++;
if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
@@ -896,6 +975,12 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
| ((UInt32)p->tempBuf[2] << 16)
| ((UInt32)p->tempBuf[3] << 8)
| ((UInt32)p->tempBuf[4]);
if (p->checkDicSize == 0
&& p->processedPos == 0
&& p->code >= kBadRepCode)
return SZ_ERROR_DATA;
p->range = 0xFFFFFFFF;
p->tempBufSize = 0;
@@ -913,10 +998,21 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
p->remainLen = 0;
}
for (;;)
{
if (p->remainLen == kMatchSpecLenStart)
{
if (p->code != 0)
return SZ_ERROR_DATA;
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return SZ_OK;
}
LzmaDec_WriteRem(p, dicLimit);
while (p->remainLen != kMatchSpecLenStart)
{
// (p->remainLen == 0 || p->dicPos == dicLimit)
int checkEndMarkNow = 0;
if (p->dicPos >= dicLimit)
@@ -933,92 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
}
if (p->remainLen != 0)
{
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_ERROR_DATA;
RETURN__NOT_FINISHED__FOR_FINISH;
}
checkEndMarkNow = 1;
}
// (p->remainLen == 0)
if (p->tempBufSize == 0)
{
SizeT processed;
const Byte *bufLimit;
int dummyProcessed = -1;
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
int dummyRes = LzmaDec_TryDummy(p, src, inSize);
if (dummyRes == DUMMY_ERROR)
const Byte *bufOut = src + inSize;
ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
if (dummyRes == DUMMY_INPUT_EOF)
{
memcpy(p->tempBuf, src, inSize);
p->tempBufSize = (unsigned)inSize;
size_t i;
if (inSize >= LZMA_REQUIRED_INPUT_MAX)
break;
(*srcLen) += inSize;
p->tempBufSize = (unsigned)inSize;
for (i = 0; i < inSize; i++)
p->tempBuf[i] = src[i];
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
dummyProcessed = (int)(bufOut - src);
if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
break;
if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
{
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_ERROR_DATA;
unsigned i;
(*srcLen) += (unsigned)dummyProcessed;
p->tempBufSize = (unsigned)dummyProcessed;
for (i = 0; i < (unsigned)dummyProcessed; i++)
p->tempBuf[i] = src[i];
// p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH;
}
bufLimit = src;
// we will decode only one iteration
}
else
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
p->buf = src;
if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
return SZ_ERROR_DATA;
processed = (SizeT)(p->buf - src);
(*srcLen) += processed;
{
int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
SizeT processed = (SizeT)(p->buf - src);
if (dummyProcessed < 0)
{
if (processed > inSize)
break;
}
else if ((unsigned)dummyProcessed != processed)
break;
src += processed;
inSize -= processed;
}
else
(*srcLen) += processed;
if (res != SZ_OK)
{
unsigned rem = p->tempBufSize, lookAhead = 0;
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
p->tempBuf[rem++] = src[lookAhead++];
p->tempBufSize = rem;
p->remainLen = kMatchSpecLen_Error_Data;
return SZ_ERROR_DATA;
}
}
continue;
}
{
// we have some data in (p->tempBuf)
// in strict mode: tempBufSize is not enough for one Symbol decoding.
// in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
unsigned rem = p->tempBufSize;
unsigned ahead = 0;
int dummyProcessed = -1;
while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
p->tempBuf[rem++] = src[ahead++];
// ahead - the size of new data copied from (src) to (p->tempBuf)
// rem - the size of temp buffer including new data from (src)
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
if (dummyRes == DUMMY_ERROR)
const Byte *bufOut = p->tempBuf + rem;
ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
if (dummyRes == DUMMY_INPUT_EOF)
{
(*srcLen) += (SizeT)lookAhead;
if (rem >= LZMA_REQUIRED_INPUT_MAX)
break;
p->tempBufSize = rem;
(*srcLen) += (SizeT)ahead;
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
dummyProcessed = (int)(bufOut - p->tempBuf);
if ((unsigned)dummyProcessed < p->tempBufSize)
break;
if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
{
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_ERROR_DATA;
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
p->tempBufSize = (unsigned)dummyProcessed;
// p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH;
}
}
p->buf = p->tempBuf;
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
return SZ_ERROR_DATA;
{
unsigned kkk = (unsigned)(p->buf - p->tempBuf);
if (rem < kkk)
return SZ_ERROR_FAIL; /* some internal error */
rem -= kkk;
if (lookAhead < rem)
return SZ_ERROR_FAIL; /* some internal error */
lookAhead -= rem;
// we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
SizeT processed = (SizeT)(p->buf - p->tempBuf);
rem = p->tempBufSize;
if (dummyProcessed < 0)
{
if (processed > LZMA_REQUIRED_INPUT_MAX)
break;
if (processed < rem)
break;
}
(*srcLen) += (SizeT)lookAhead;
src += lookAhead;
inSize -= (SizeT)lookAhead;
else if ((unsigned)dummyProcessed != processed)
break;
processed -= rem;
src += processed;
inSize -= processed;
(*srcLen) += processed;
p->tempBufSize = 0;
if (res != SZ_OK)
{
p->remainLen = kMatchSpecLen_Error_Data;
return SZ_ERROR_DATA;
}
}
}
}
}
if (p->code != 0)
return SZ_ERROR_DATA;
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return SZ_OK;
/* Some unexpected error: internal error of code, memory corruption or hardware failure */
p->remainLen = kMatchSpecLen_Error_Fail;
return SZ_ERROR_FAIL;
}
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT outSize = *destLen;

View File

@@ -1,5 +1,5 @@
/* LzmaDec.h -- LZMA Decoder
2018-04-21 : Igor Pavlov : Public domain */
2020-03-19 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H
#define __LZMA_DEC_H
@@ -181,6 +181,7 @@ Returns:
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
@@ -223,6 +224,7 @@ Returns:
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,

View File

@@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder
2019-01-10: Igor Pavlov : Public domain */
2021-04-01: Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -19,6 +19,19 @@
#include "LzFindMt.h"
#endif
/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
void LzmaEnc_Finish(CLzmaEncHandle pp);
void LzmaEnc_SaveState(CLzmaEncHandle pp);
void LzmaEnc_RestoreState(CLzmaEncHandle pp);
#ifdef SHOW_STAT
static unsigned g_STAT_OFFSET = 0;
#endif
@@ -36,7 +49,7 @@ static unsigned g_STAT_OFFSET = 0;
#define kNumMoveReducingBits 4
#define kNumBitPriceShiftBits 4
#define kBitPrice (1 << kNumBitPriceShiftBits)
// #define kBitPrice (1 << kNumBitPriceShiftBits)
#define REP_LEN_COUNT 64
@@ -47,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
p->reduceSize = (UInt64)(Int64)-1;
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
p->writeEndMark = 0;
p->affinity = 0;
}
void LzmaEncProps_Normalize(CLzmaEncProps *p)
@@ -55,7 +69,13 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (level < 0) level = 5;
p->level = level;
if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
if (p->dictSize == 0)
p->dictSize =
( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
( level <= 6 ? ((UInt32)1 << (level + 19)) :
( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
)));
if (p->dictSize > p->reduceSize)
{
unsigned i;
@@ -74,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
if (p->numHashBytes < 0) p->numHashBytes = 4;
if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
if (p->numThreads < 0)
p->numThreads =
@@ -93,7 +113,7 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
return props.dictSize;
}
#if (_MSC_VER >= 1400)
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
/* BSR code is fast for some new CPUs */
/* #define LZMA_LOG_BSR */
#endif
@@ -193,7 +213,7 @@ typedef struct
#define kNumLenToPosStates 4
#define kNumPosSlotBits 6
#define kDicLogSizeMin 0
// #define kDicLogSizeMin 0
#define kDicLogSizeMax 32
#define kDistTableSizeMax (kDicLogSizeMax * 2)
@@ -462,16 +482,16 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
p->dictSize = props.dictSize;
{
unsigned fb = props.fb;
unsigned fb = (unsigned)props.fb;
if (fb < 5)
fb = 5;
if (fb > LZMA_MATCH_LEN_MAX)
fb = LZMA_MATCH_LEN_MAX;
p->numFastBytes = fb;
}
p->lc = props.lc;
p->lp = props.lp;
p->pb = props.pb;
p->lc = (unsigned)props.lc;
p->lp = (unsigned)props.lp;
p->pb = (unsigned)props.pb;
p->fastMode = (props.algo == 0);
// p->_maxMode = True;
p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
@@ -479,17 +499,17 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
unsigned numHashBytes = 4;
if (props.btMode)
{
if (props.numHashBytes < 2)
numHashBytes = 2;
else if (props.numHashBytes < 4)
numHashBytes = props.numHashBytes;
if (props.numHashBytes < 2) numHashBytes = 2;
else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;
}
if (props.numHashBytes >= 5) numHashBytes = 5;
p->matchFinderBase.numHashBytes = numHashBytes;
}
p->matchFinderBase.cutValue = props.mc;
p->writeEndMark = props.writeEndMark;
p->writeEndMark = (BoolInt)props.writeEndMark;
#ifndef _7ZIP_ST
/*
@@ -500,6 +520,8 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
}
*/
p->multiThread = (props.numThreads > 1);
p->matchFinderMt.btSync.affinity =
p->matchFinderMt.hashSync.affinity = props.affinity;
#endif
return SZ_OK;
@@ -536,8 +558,8 @@ static void RangeEnc_Construct(CRangeEnc *p)
p->bufBase = NULL;
}
#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)
#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
#define RC_BUF_SIZE (1 << 16)
@@ -578,7 +600,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
size_t num;
if (p->res != SZ_OK)
return;
num = p->buf - p->bufBase;
num = (size_t)(p->buf - p->bufBase);
if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
p->res = SZ_ERROR_WRITE;
p->processed += num;
@@ -656,7 +678,7 @@ static void RangeEnc_FlushData(CRangeEnc *p)
range += newBound & mask; \
mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
mask += ((1 << kNumMoveBits) - 1); \
ttt += (Int32)(mask - ttt) >> kNumMoveBits; \
ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
*(prob) = (CLzmaProb)ttt; \
RC_NORM(p) \
}
@@ -749,7 +771,7 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
bitCount++;
}
}
ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
// printf("\n%3d: %5d", i, ProbPrices[i]);
}
}
@@ -1011,7 +1033,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
{
const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
const Byte *p2 = p1 + len;
ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1];
ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
const Byte *lim = p1 + numAvail;
for (; p2 != lim && *p2 == p2[dif]; p2++)
{}
@@ -2198,7 +2220,7 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
void LzmaEnc_Construct(CLzmaEnc *p)
static void LzmaEnc_Construct(CLzmaEnc *p)
{
RangeEnc_Construct(&p->rc);
MatchFinder_Construct(&p->matchFinderBase);
@@ -2233,7 +2255,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
return p;
}
void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->litProbs);
ISzAlloc_Free(alloc, p->saveState.litProbs);
@@ -2241,7 +2263,7 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
p->saveState.litProbs = NULL;
}
void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
#ifndef _7ZIP_ST
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
@@ -2259,6 +2281,7 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
}
MY_NO_INLINE
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
{
UInt32 nowPos32, startPos32;
@@ -2521,12 +2544,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
// { int y; for (y = 0; y < 100; y++) {
FillDistancesPrices(p);
// }}
LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
}
if (p->repLenEncCounter <= 0)
{
p->repLenEncCounter = REP_LEN_COUNT;
LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
}
@@ -2611,7 +2634,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
return SZ_OK;
}
void LzmaEnc_Init(CLzmaEnc *p)
static void LzmaEnc_Init(CLzmaEnc *p)
{
unsigned i;
p->state = 0;
@@ -2675,12 +2698,12 @@ void LzmaEnc_Init(CLzmaEnc *p)
p->additionalOffset = 0;
p->pbMask = (1 << p->pb) - 1;
p->pbMask = ((unsigned)1 << p->pb) - 1;
p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
}
void LzmaEnc_InitPrices(CLzmaEnc *p)
static void LzmaEnc_InitPrices(CLzmaEnc *p)
{
if (!p->fastMode)
{
@@ -2694,8 +2717,8 @@ void LzmaEnc_InitPrices(CLzmaEnc *p)
p->repLenEncCounter = REP_LEN_COUNT;
LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
@@ -2788,12 +2811,13 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
}
/*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
{
const CLzmaEnc *p = (CLzmaEnc *)pp;
return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
}
*/
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
{
@@ -2841,6 +2865,7 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
}
MY_NO_INLINE
static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
{
SRes res = SZ_OK;
@@ -2899,14 +2924,14 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
if (dictSize >= ((UInt32)1 << 22))
{
UInt32 kDictMask = ((UInt32)1 << 20) - 1;
const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
dictSize = (dictSize + kDictMask) & ~kDictMask;
}
else for (i = 11; i <= 30; i++)
{
if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
if (dictSize <= ((UInt32)2 << i)) { dictSize = ((UInt32)2 << i); break; }
if (dictSize <= ((UInt32)3 << i)) { dictSize = ((UInt32)3 << i); break; }
}
for (i = 0; i < 4; i++)
@@ -2917,7 +2942,7 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
{
return ((CLzmaEnc *)pp)->writeEndMark;
return (unsigned)((CLzmaEnc *)pp)->writeEndMark;
}
@@ -2974,3 +2999,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
LzmaEnc_Destroy(p, alloc, allocBig);
return res;
}
/*
#ifndef _7ZIP_ST
void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2])
{
const CLzmaEnc *p = (CLzmaEnc *)pp;
lz_threads[0] = p->matchFinderMt.hashSync.thread;
lz_threads[1] = p->matchFinderMt.btSync.thread;
}
#endif
*/

View File

@@ -1,5 +1,5 @@
/* LzmaEnc.h -- LZMA Encoder
2017-07-27 : Igor Pavlov : Public domain */
2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H
#define __LZMA_ENC_H
@@ -29,6 +29,8 @@ typedef struct _CLzmaEncProps
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
UInt64 affinity;
} CLzmaEncProps;
void LzmaEncProps_Init(CLzmaEncProps *p);

View File

@@ -1,5 +1,5 @@
/* LzmaLib.h -- LZMA library interface
2013-01-18 : Igor Pavlov : Public domain */
2021-04-03 : Igor Pavlov : Public domain */
#ifndef __LZMA_LIB_H
#define __LZMA_LIB_H
@@ -40,14 +40,16 @@ outPropsSize -
level - compression level: 0 <= level <= 9;
level dictSize algo fb
0: 16 KB 0 32
1: 64 KB 0 32
2: 256 KB 0 32
3: 1 MB 0 32
4: 4 MB 0 32
0: 64 KB 0 32
1: 256 KB 0 32
2: 1 MB 0 32
3: 4 MB 0 32
4: 16 MB 0 32
5: 16 MB 1 32
6: 32 MB 1 32
7+: 64 MB 1 64
7: 32 MB 1 64
8: 64 MB 1 64
9: 64 MB 1 64
The default value for "level" is 5.
@@ -83,6 +85,11 @@ fb - Word size (the number of fast bytes).
numThreads - The number of thereads. 1 or 2. The default value is 2.
Fast mode (algo = 0) can use only 1 thread.
In:
dest - output data buffer
destLen - output data buffer size
src - input data
srcLen - input data size
Out:
destLen - processed output size
Returns:
@@ -108,8 +115,8 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
LzmaUncompress
--------------
In:
dest - output data
destLen - output data size
dest - output data buffer
destLen - output data buffer size
src - input data
srcLen - input data size
Out:

View File

@@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder
2018-07-04 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -7,7 +7,7 @@
#ifndef _7ZIP_ST
SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
{
CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt);
UInt64 inSize2 = 0;
@@ -70,8 +70,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t)
{
t->stop = 1;
Event_Set(&t->startEvent);
Thread_Wait(&t->thread);
Thread_Close(&t->thread);
Thread_Wait_Close(&t->thread);
}
Event_Close(&t->startEvent);
@@ -342,7 +341,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
for (;;)
{
if (Event_Wait(&t->startEvent) != 0)
return SZ_ERROR_THREAD;
return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
if (t->stop)
return 0;
{
@@ -358,7 +357,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
if (numFinished == mtc->numStartedThreads)
if (Event_Set(&mtc->finishedEvent) != 0)
return SZ_ERROR_THREAD;
return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
}
#endif
}

View File

@@ -1,16 +1,21 @@
/* MtDec.c -- Multi-thread Decoder
2019-02-02 : Igor Pavlov : Public domain */
2021-02-27 : Igor Pavlov : Public domain */
#include "Precomp.h"
// #define SHOW_DEBUG_INFO
// #include <stdio.h>
#include <string.h>
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
#include "MtDec.h"
#ifndef _7ZIP_ST
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
@@ -19,10 +24,6 @@
#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
#include "MtDec.h"
#ifndef _7ZIP_ST
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress)
{
p->progress = progress;
@@ -77,7 +78,7 @@ void MtProgress_SetError(CMtProgress *p, SRes res)
}
#define RINOK_THREAD(x) RINOK(x)
#define RINOK_THREAD(x) RINOK_WRes(x)
static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
@@ -156,8 +157,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t)
{
Event_Set(&t->canWrite); /* we can disable it. There are no threads waiting canWrite in normal cases */
Event_Set(&t->canRead);
Thread_Wait(&t->thread);
Thread_Close(&t->thread);
Thread_Wait_Close(&t->thread);
}
Event_Close(&t->canRead);
@@ -289,12 +289,13 @@ static WRes ThreadFunc2(CMtDecThread *t)
Byte *afterEndData = NULL;
size_t afterEndData_Size = 0;
BoolInt afterEndData_IsCross = False;
BoolInt canCreateNewThread = False;
// CMtDecCallbackInfo parse;
CMtDecThread *nextThread;
PRF_STR_INT("Event_Wait(&t->canRead)", t->index);
PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index);
RINOK_THREAD(Event_Wait(&t->canRead));
if (p->exitThread)
@@ -418,10 +419,12 @@ static WRes ThreadFunc2(CMtDecThread *t)
parse.srcFinished = finish;
parse.canCreateNewThread = True;
// PRF(printf("\nParse size = %d\n", (unsigned)size))
PRF(printf("\nParse size = %d\n", (unsigned)size));
p->mtCallback->Parse(p->mtCallbackObject, t->index, &parse);
PRF(printf(" Parse processed = %d, state = %d \n", (unsigned)parse.srcSize, (unsigned)parse.state));
needWrite = True;
canCreateNewThread = parse.canCreateNewThread;
@@ -478,16 +481,12 @@ static WRes ThreadFunc2(CMtDecThread *t)
if (parse.state == MTDEC_PARSE_END)
{
p->crossStart = 0;
p->crossEnd = 0;
if (crossSize != 0)
memcpy(data + parse.srcSize, parseData + parse.srcSize, size - parse.srcSize); // we need all data
afterEndData_Size = size - parse.srcSize;
afterEndData = parseData + parse.srcSize;
afterEndData_Size = size - parse.srcSize;
if (crossSize != 0)
afterEndData_IsCross = True;
// we reduce data size to required bytes (parsed only)
inDataSize -= (size - parse.srcSize);
inDataSize -= afterEndData_Size;
if (!prev)
inDataSize_Start = parse.srcSize;
break;
@@ -752,13 +751,15 @@ static WRes ThreadFunc2(CMtDecThread *t)
{
// p->inProcessed += inCodePos;
PRF(printf("\n--Write afterSize = %d\n", (unsigned)afterEndData_Size));
res = p->mtCallback->Write(p->mtCallbackObject, t->index,
res == SZ_OK && needWriteToStream && !wasInterrupted, // needWrite
afterEndData, afterEndData_Size,
afterEndData, afterEndData_Size, afterEndData_IsCross,
&needContinue,
&canRecode);
// res= E_INVALIDARG; // for test
// res = SZ_ERROR_FAIL; // for test
PRF(printf("\nAfter Write needContinue = %d\n", (unsigned)needContinue));
PRF(printf("\nprocessed = %d\n", (unsigned)p->inProcessed));
@@ -847,7 +848,7 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
res = ThreadFunc2(t);
p = t->mtDec;
if (res == 0)
return p->exitThreadWRes;
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes;
{
// it's unexpected situation for some threading function error
if (p->exitThreadWRes == 0)
@@ -858,15 +859,14 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc1(void *pp)
Event_Set(&p->threads[0].canWrite);
MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res));
}
return res;
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res;
}
static MY_NO_INLINE THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE ThreadFunc(void *pp)
{
CMtDecThread *t = (CMtDecThread *)pp;
// fprintf(stderr, "\n%d = %p - before", t->index, &t);
#ifdef USE_ALLOCA
CMtDecThread *t = (CMtDecThread *)pp;
// fprintf(stderr, "\n%d = %p - before", t->index, &t);
t->allocaPtr = alloca(t->index * 128);
#endif
return ThreadFunc1(pp);
@@ -1092,13 +1092,14 @@ SRes MtDec_Code(CMtDec *p)
{
WRes wres;
WRes sres;
SRes sres;
CMtDecThread *nextThread = &p->threads[p->numStartedThreads++];
// wres = MtDecThread_CreateAndStart(nextThread);
wres = MtDecThread_CreateEvents(nextThread);
if (wres == 0) { wres = Event_Set(&nextThread->canWrite);
if (wres == 0) { wres = Event_Set(&nextThread->canRead);
if (wres == 0) { wres = ThreadFunc(nextThread);
if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread);
wres = (WRes)(UINT_PTR)res;
if (wres != 0)
{
p->needContinue = False;
@@ -1131,7 +1132,7 @@ SRes MtDec_Code(CMtDec *p)
// if (sres != SZ_OK)
return sres;
// return E_FAIL;
// return SZ_ERROR_FAIL;
}
}

View File

@@ -1,5 +1,5 @@
/* MtDec.h -- Multi-thread Decoder
2018-07-04 : Igor Pavlov : Public domain */
2020-03-05 : Igor Pavlov : Public domain */
#ifndef __MT_DEC_H
#define __MT_DEC_H
@@ -108,11 +108,12 @@ typedef struct
*/
SRes (*Write)(void *p, unsigned coderIndex,
BoolInt needWriteToStream,
const Byte *src, size_t srcSize,
const Byte *src, size_t srcSize, BoolInt isCross,
// int srcFinished,
BoolInt *needContinue,
BoolInt *canRecode);
} IMtDecCallback;
} IMtDecCallback2;
@@ -132,7 +133,7 @@ typedef struct _CMtDec
ICompressProgress *progress;
ISzAllocPtr alloc;
IMtDecCallback *mtCallback;
IMtDecCallback2 *mtCallback;
void *mtCallbackObject;

138
C/Ppmd.h
View File

@@ -1,5 +1,5 @@
/* Ppmd.h -- PPMD codec common code
2017-04-03 : Igor Pavlov : Public domain
2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD_H
@@ -9,7 +9,16 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
EXTERN_C_BEGIN
#ifdef MY_CPU_32BIT
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
/*
PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block.
if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields.
if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields.
if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed,
if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional,
and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit.
PPMD code works slightly faster in (PPMD_32BIT) mode.
*/
#define PPMD_32BIT
#endif
@@ -28,7 +37,7 @@ EXTERN_C_BEGIN
#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
#pragma pack(push, 1)
MY_CPU_pragma_pack_push_1
/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
/* SEE-contexts for PPM-contexts with masked symbols */
@@ -40,41 +49,114 @@ typedef struct
} CPpmd_See;
#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
{ (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); }
{ (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); }
typedef struct
{
Byte Symbol;
Byte Freq;
UInt16 SuccessorLow;
UInt16 SuccessorHigh;
UInt16 Successor_0;
UInt16 Successor_1;
} CPpmd_State;
#pragma pack(pop)
typedef struct CPpmd_State2_
{
Byte Symbol;
Byte Freq;
} CPpmd_State2;
typedef
#ifdef PPMD_32BIT
CPpmd_State *
#else
UInt32
#endif
CPpmd_State_Ref;
typedef struct CPpmd_State4_
{
UInt16 Successor_0;
UInt16 Successor_1;
} CPpmd_State4;
typedef
#ifdef PPMD_32BIT
void *
#else
UInt32
#endif
CPpmd_Void_Ref;
MY_CPU_pragma_pop
/*
PPMD code can write full CPpmd_State structure data to CPpmd*_Context
at (byte offset = 2) instead of some fields of original CPpmd*_Context structure.
If we use pointers to different types, but that point to shared
memory space, we can have aliasing problem (strict aliasing).
XLC compiler in -O2 mode can change the order of memory write instructions
in relation to read instructions, if we have use pointers to different types.
To solve that aliasing problem we use combined CPpmd*_Context structure
with unions that contain the fields from both structures:
the original CPpmd*_Context and CPpmd_State.
So we can access the fields from both structures via one pointer,
and the compiler doesn't change the order of write instructions
in relation to read instructions.
If we don't use memory write instructions to shared memory in
some local code, and we use only reading instructions (read only),
then probably it's safe to use pointers to different types for reading.
*/
#ifdef PPMD_32BIT
#define Ppmd_Ref_Type(type) type *
#define Ppmd_GetRef(p, ptr) (ptr)
#define Ppmd_GetPtr(p, ptr) (ptr)
#define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr)
#else
#define Ppmd_Ref_Type(type) UInt32
#define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
#define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
#define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs))
#endif // PPMD_32BIT
typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref;
typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref;
typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref;
/*
#ifdef MY_CPU_LE_UNALIGN
// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache.
#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0)
#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v)
#else
*/
/*
We can write 16-bit halves to 32-bit (Successor) field in any selected order.
But the native order is more consistent way.
So we use the native order, if LE/BE order can be detected here at compile time.
*/
#ifdef MY_CPU_BE
#define Ppmd_GET_SUCCESSOR(p) \
( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) )
#define Ppmd_SET_SUCCESSOR(p, v) { \
(p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \
(p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); }
#else
#define Ppmd_GET_SUCCESSOR(p) \
( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) )
#define Ppmd_SET_SUCCESSOR(p, v) { \
(p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \
(p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); }
#endif
// #endif
typedef
#ifdef PPMD_32BIT
Byte *
#else
UInt32
#endif
CPpmd_Byte_Ref;
#define PPMD_SetAllBitsIn256Bytes(p) \
{ size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \

844
C/Ppmd7.c
View File

File diff suppressed because it is too large Load Diff

169
C/Ppmd7.h
View File

@@ -1,10 +1,8 @@
/* Ppmd7.h -- PPMdH compression codec
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
/* This code supports virtual RangeDecoder and includes the implementation
of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H.
If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */
#ifndef __PPMD7_H
#define __PPMD7_H
@@ -21,23 +19,56 @@ EXTERN_C_BEGIN
struct CPpmd7_Context_;
typedef
#ifdef PPMD_32BIT
struct CPpmd7_Context_ *
#else
UInt32
#endif
CPpmd7_Context_Ref;
typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref;
// MY_CPU_pragma_pack_push_1
typedef struct CPpmd7_Context_
{
UInt16 NumStats;
union
{
UInt16 SummFreq;
CPpmd_State2 State2;
} Union2;
union
{
CPpmd_State_Ref Stats;
CPpmd_State4 State4;
} Union4;
CPpmd7_Context_Ref Suffix;
} CPpmd7_Context;
#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
// MY_CPU_pragma_pop
#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
typedef struct
{
UInt32 Range;
UInt32 Code;
UInt32 Low;
IByteIn *Stream;
} CPpmd7_RangeDec;
typedef struct
{
UInt32 Range;
Byte Cache;
// Byte _dummy_[3];
UInt64 Low;
UInt64 CacheSize;
IByteOut *Stream;
} CPpmd7z_RangeEnc;
typedef struct
{
@@ -48,17 +79,30 @@ typedef struct
UInt32 Size;
UInt32 GlueCount;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 AlignOffset;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
Byte Indx2Units[PPMD_NUM_INDEXES];
union
{
CPpmd7_RangeDec dec;
CPpmd7z_RangeEnc enc;
} rc;
Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256];
Byte NS2BSIndx[256], NS2Indx[256];
Byte ExpEscape[16];
CPpmd_See DummySee, See[25][16];
UInt16 BinSumm[128][64];
// int LastSymbol;
} CPpmd7;
void Ppmd7_Construct(CPpmd7 *p);
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
@@ -68,74 +112,69 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
/* ---------- Internal Functions ---------- */
extern const Byte PPMD7_kExpEscape[16];
#ifdef PPMD_32BIT
#define Ppmd7_GetPtr(p, ptr) (ptr)
#define Ppmd7_GetContext(p, ptr) (ptr)
#define Ppmd7_GetStats(p, ctx) ((ctx)->Stats)
#else
#define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
#define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs)))
#define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats)))
#endif
#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context)
#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd7_Update1(CPpmd7 *p);
void Ppmd7_Update1_0(CPpmd7 *p);
void Ppmd7_Update2(CPpmd7 *p);
void Ppmd7_UpdateBin(CPpmd7 *p);
#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3))
#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4))
// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3))
// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4))
#define Ppmd7_GetBinSumm(p) \
&p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \
p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \
(p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \
2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \
((p->RunLength >> 26) & 0x20)]
&p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \
[ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \
+ PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \
+ (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ]
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
/*
We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure:
1) Ppmd7a_*: original PPMdH
2) Ppmd7z_*: modified PPMdH with 7z Range Coder
Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH)
*/
/* ---------- Decode ---------- */
typedef struct IPpmd7_RangeDec IPpmd7_RangeDec;
#define PPMD7_SYM_END (-1)
#define PPMD7_SYM_ERROR (-2)
struct IPpmd7_RangeDec
{
UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total);
void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size);
UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0);
};
/*
You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init()
typedef struct
{
IPpmd7_RangeDec vt;
UInt32 Range;
UInt32 Code;
IByteIn *Stream;
} CPpmd7z_RangeDec;
Ppmd7*_DecodeSymbol()
out:
>= 0 : decoded byte
-1 : PPMD7_SYM_END : End of payload marker
-2 : PPMD7_SYM_ERROR : Data error
*/
void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
/* Ppmd7a_* : original PPMdH */
BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p);
#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd7a_DecodeSymbol(CPpmd7 *p);
/* Ppmd7z_* : modified PPMdH with 7z Range Coder */
BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p);
#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc);
int Ppmd7z_DecodeSymbol(CPpmd7 *p);
// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim);
/* ---------- Encode ---------- */
typedef struct
{
UInt64 Low;
UInt32 Range;
Byte Cache;
UInt64 CacheSize;
IByteOut *Stream;
} CPpmd7z_RangeEnc;
void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p);
void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p);
void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol);
void Ppmd7z_Init_RangeEnc(CPpmd7 *p);
void Ppmd7z_Flush_RangeEnc(CPpmd7 *p);
// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol);
void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim);
EXTERN_C_END

View File

@@ -1,6 +1,8 @@
/* Ppmd7Dec.c -- PPMdH Decoder
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@@ -8,184 +10,288 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#define kTopValue (1 << 24)
BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
#define READ_BYTE(p) IByteIn_Read((p)->Stream)
BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
if (IByteIn_Read(p->Stream) != 0)
if (READ_BYTE(p) != 0)
return False;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt);
#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \
{ (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8;
static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total)
#define RC_NORM_1(p) RC_NORM_BASE(p) }
#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R (&p->rc.dec)
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{
GET_Ppmd7z_RangeDec
return p->Code / (p->Range /= total);
R->Code -= start * R->Range;
R->Range *= size;
RC_NORM_LOCAL(R)
}
static void Range_Normalize(CPpmd7z_RangeDec *p)
{
if (p->Range < kTopValue)
{
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
p->Range <<= 8;
if (p->Range < kTopValue)
{
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
p->Range <<= 8;
}
}
}
static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size)
{
GET_Ppmd7z_RangeDec
p->Code -= start * p->Range;
p->Range *= size;
Range_Normalize(p);
}
static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0)
{
GET_Ppmd7z_RangeDec
UInt32 newBound = (p->Range >> 14) * size0;
UInt32 symbol;
if (p->Code < newBound)
{
symbol = 0;
p->Range = newBound;
}
else
{
symbol = 1;
p->Code -= newBound;
p->Range -= newBound;
}
Range_Normalize(p);
return symbol;
}
void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p)
{
p->vt.GetThreshold = Range_GetThreshold;
p->vt.Decode = Range_Decode;
p->vt.DecodeBit = Range_DecodeBit;
}
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define MASK(sym) ((signed char *)charMask)[sym]
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc)
#define MASK(sym) ((unsigned char *)charMask)[sym]
// MY_FORCE_INLINE
// static
int Ppmd7z_DecodeSymbol(CPpmd7 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
UInt32 summFreq = p->MinContext->Union2.SummFreq;
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte symbol;
rc->Decode(rc, 0, s->Freq);
Byte sym;
RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd7_Update1_0(p);
return symbol;
return sym;
}
p->PrevSuccess = 0;
i = p->MinContext->NumStats - 1;
i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((hiCnt += (++s)->Freq) > count)
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte symbol;
rc->Decode(rc, hiCnt - s->Freq, s->Freq);
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd7_Update1(p);
return symbol;
return sym;
}
}
while (--i);
if (count >= p->MinContext->SummFreq)
return -2;
p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt);
if (hiCnt >= summFreq)
return PPMD7_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt);
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
i = p->MinContext->NumStats - 1;
do { MASK((--s)->Symbol) = 0; } while (--i);
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt16 *prob = Ppmd7_GetBinSumm(p);
if (rc->DecodeBit(rc, *prob) == 0)
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte symbol;
*prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
Ppmd7_UpdateBin(p);
return symbol;
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM_1(R)
/* we can use single byte normalization here because of
(min(BinSumm[][]) = 95) > (1 << (14 - 8)) */
// sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
// Ppmd7_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 128));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
*prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
return sym;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(size0);
R->Code -= size0;
R->Range -= size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *ps[256], *s;
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
CPpmd_See *see;
unsigned i, num, numMasked = p->MinContext->NumStats;
CPpmd7_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!p->MinContext->Suffix)
return -1;
p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
if (!mc->Suffix)
return PPMD7_SYM_END;
mc = Ppmd7_GetContext(p, mc->Suffix);
}
while (p->MinContext->NumStats == numMasked);
hiCnt = 0;
s = Ppmd7_GetStats(p, p->MinContext);
i = 0;
num = p->MinContext->NumStats - numMasked;
while (mc->NumStats == numMasked);
s = Ppmd7_GetStats(p, mc);
{
unsigned num = mc->NumStats;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
int k = (int)(MASK(s->Symbol));
hiCnt += (s->Freq & k);
ps[i] = s++;
i -= k;
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
while (i != num);
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
count = rc->GetThreshold(rc, freqSum);
count = RC_GetThreshold(freqSum);
if (count < hiCnt)
{
Byte symbol;
CPpmd_State **pps = ps;
for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
s = *pps;
rc->Decode(rc, hiCnt - s->Freq, s->Freq);
Byte sym;
s = Ppmd7_GetStats(p, p->MinContext);
hiCnt = count;
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
// if ((Int32)count >= 0)
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
};
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd7_Update2(p);
return symbol;
return sym;
}
if (count >= freqSum)
return -2;
rc->Decode(rc, hiCnt, freqSum - hiCnt);
return PPMD7_SYM_ERROR;
RC_Decode(hiCnt, freqSum - hiCnt);
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
s = Ppmd7_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}
/*
Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim)
{
int sym = 0;
if (buf != lim)
do
{
sym = Ppmd7z_DecodeSymbol(p);
if (sym < 0)
break;
*buf = (Byte)sym;
}
while (++buf < lim);
p->LastSymbol = sym;
return buf;
}
*/

View File

@@ -1,6 +1,8 @@
/* Ppmd7Enc.c -- PPMdH Encoder
2017-04-03 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@@ -8,65 +10,60 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#define kTopValue (1 << 24)
void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p)
#define R (&p->rc.enc)
void Ppmd7z_Init_RangeEnc(CPpmd7 *p)
{
p->Low = 0;
p->Range = 0xFFFFFFFF;
p->Cache = 0;
p->CacheSize = 1;
R->Low = 0;
R->Range = 0xFFFFFFFF;
R->Cache = 0;
R->CacheSize = 1;
}
static void RangeEnc_ShiftLow(CPpmd7z_RangeEnc *p)
MY_NO_INLINE
static void RangeEnc_ShiftLow(CPpmd7 *p)
{
if ((UInt32)p->Low < (UInt32)0xFF000000 || (unsigned)(p->Low >> 32) != 0)
if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0)
{
Byte temp = p->Cache;
Byte temp = R->Cache;
do
{
IByteOut_Write(p->Stream, (Byte)(temp + (Byte)(p->Low >> 32)));
IByteOut_Write(R->Stream, (Byte)(temp + (Byte)(R->Low >> 32)));
temp = 0xFF;
}
while (--p->CacheSize != 0);
p->Cache = (Byte)((UInt32)p->Low >> 24);
while (--R->CacheSize != 0);
R->Cache = (Byte)((UInt32)R->Low >> 24);
}
p->CacheSize++;
p->Low = (UInt32)p->Low << 8;
R->CacheSize++;
R->Low = (UInt32)((UInt32)R->Low << 8);
}
static void RangeEnc_Encode(CPpmd7z_RangeEnc *p, UInt32 start, UInt32 size, UInt32 total)
#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p);
#define RC_NORM_1(p) RC_NORM_BASE(p) }
#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
/*
#define RangeEnc_Encode(p, start, _size_) \
{ UInt32 size = _size_; \
R->Low += start * R->Range; \
R->Range *= size; \
RC_NORM_LOCAL(p); }
*/
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size)
{
p->Low += start * (p->Range /= total);
p->Range *= size;
while (p->Range < kTopValue)
{
p->Range <<= 8;
RangeEnc_ShiftLow(p);
}
R->Low += start * R->Range;
R->Range *= size;
RC_NORM_LOCAL(p);
}
static void RangeEnc_EncodeBit_0(CPpmd7z_RangeEnc *p, UInt32 size0)
{
p->Range = (p->Range >> 14) * size0;
while (p->Range < kTopValue)
{
p->Range <<= 8;
RangeEnc_ShiftLow(p);
}
}
static void RangeEnc_EncodeBit_1(CPpmd7z_RangeEnc *p, UInt32 size0)
{
UInt32 newBound = (p->Range >> 14) * size0;
p->Low += newBound;
p->Range -= newBound;
while (p->Range < kTopValue)
{
p->Range <<= 8;
RangeEnc_ShiftLow(p);
}
}
void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
{
unsigned i;
for (i = 0; i < 5; i++)
@@ -74,31 +71,53 @@ void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p)
}
#define MASK(sym) ((signed char *)charMask)[sym]
void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
#define RC_Encode(start, size) RangeEnc_Encode(p, start, size);
#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p);
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
#define SUFFIX(ctx) CTX((ctx)->Suffix)
typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
MY_FORCE_INLINE
static
void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
R->Range /= p->MinContext->Union2.SummFreq;
if (s->Symbol == symbol)
{
RangeEnc_Encode(rc, 0, s->Freq, p->MinContext->SummFreq);
// R->Range /= p->MinContext->Union2.SummFreq;
RC_EncodeFinal(0, s->Freq);
p->FoundState = s;
Ppmd7_Update1_0(p);
return;
}
p->PrevSuccess = 0;
sum = s->Freq;
i = p->MinContext->NumStats - 1;
i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((++s)->Symbol == symbol)
{
RangeEnc_Encode(rc, sum, s->Freq, p->MinContext->SummFreq);
// R->Range /= p->MinContext->Union2.SummFreq;
RC_EncodeFinal(sum, s->Freq);
p->FoundState = s;
Ppmd7_Update1(p);
return;
@@ -107,81 +126,198 @@ void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol)
}
while (--i);
p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
// R->Range /= p->MinContext->Union2.SummFreq;
RC_Encode(sum, p->MinContext->Union2.SummFreq - sum);
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
// MASK(s->Symbol) = 0;
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
i = p->MinContext->NumStats - 1;
do { MASK((--s)->Symbol) = 0; } while (--i);
RangeEnc_Encode(rc, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq);
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
UInt16 *prob = Ppmd7_GetBinSumm(p);
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt32 pr = *prob;
UInt32 bound = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
RangeEnc_EncodeBit_0(rc, *prob);
*prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeEnc_EncodeBit_0(p, bound);
R->Range = bound;
RC_NORM_1(p);
// p->FoundState = s;
// Ppmd7_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
p->FoundState = s;
Ppmd7_UpdateBin(p);
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 128));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
return;
}
else
{
RangeEnc_EncodeBit_1(rc, *prob);
*prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeEnc_EncodeBit_1(p, bound);
R->Low += bound;
R->Range -= bound;
RC_NORM_LOCAL(p)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(s->Symbol) = 0;
p->PrevSuccess = 0;
}
}
for (;;)
{
UInt32 escFreq;
CPpmd_See *see;
CPpmd_State *s;
UInt32 sum;
unsigned i, numMasked = p->MinContext->NumStats;
UInt32 sum, escFreq;
CPpmd7_Context *mc;
unsigned i, numMasked;
RC_NORM_REMOTE(p)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!p->MinContext->Suffix)
if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
mc = Ppmd7_GetContext(p, mc->Suffix);
i = mc->NumStats;
}
while (p->MinContext->NumStats == numMasked);
while (i == numMasked);
see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
s = Ppmd7_GetStats(p, p->MinContext);
p->MinContext = mc;
// see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
{
if (i != 256)
{
unsigned nonMasked = i - numMasked;
see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+ p->HiBitsFlag
+ (nonMasked < (unsigned)SUFFIX(mc)->NumStats - i)
+ 2 * (unsigned)(mc->Union2.SummFreq < 11 * i)
+ 4 * (unsigned)(numMasked > nonMasked);
{
// if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
unsigned summ = (UInt16)see->Summ; // & 0xFFFF
unsigned r = (summ >> see->Shift);
see->Summ = (UInt16)(summ - r);
escFreq = r + (r == 0);
}
}
else
{
see = &p->DummySee;
escFreq = 1;
}
}
s = Ppmd7_GetStats(p, mc);
sum = 0;
i = p->MinContext->NumStats;
// i = mc->NumStats;
do
{
int cur = s->Symbol;
if (cur == symbol)
unsigned cur = s->Symbol;
if ((int)cur == symbol)
{
UInt32 low = sum;
CPpmd_State *s1 = s;
do
{
sum += (s->Freq & (int)(MASK(s->Symbol)));
s++;
}
while (--i);
RangeEnc_Encode(rc, low, s1->Freq, sum + escFreq);
UInt32 freq = s->Freq;
unsigned num2;
Ppmd_See_Update(see);
p->FoundState = s1;
p->FoundState = s;
sum += escFreq;
num2 = i / 2;
i &= 1;
sum += freq & (0 - (UInt32)i);
if (num2 != 0)
{
s += i;
for (;;)
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
if (--num2 == 0)
break;
}
}
R->Range /= sum;
RC_EncodeFinal(low, freq);
Ppmd7_Update2(p);
return;
}
sum += (s->Freq & (int)(MASK(cur)));
MASK(cur) = 0;
sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
RangeEnc_Encode(rc, sum, escFreq, sum + escFreq);
see->Summ = (UInt16)(see->Summ + sum + escFreq);
{
UInt32 total = sum + escFreq;
see->Summ = (UInt16)(see->Summ + total);
R->Range /= total;
RC_Encode(sum, escFreq);
}
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
s--;
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
}
void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim)
{
for (; buf < lim; buf++)
{
Ppmd7z_EncodeSymbol(p, *buf);
}
}

279
C/Ppmd7aDec.c Normal file
View File

@@ -0,0 +1,279 @@
/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#include "Precomp.h"
#include "Ppmd7.h"
#define kTop (1 << 24)
#define kBot (1 << 15)
#define READ_BYTE(p) IByteIn_Read((p)->Stream)
BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
p->Low = 0;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
p->Code = (p->Code << 8) | READ_BYTE(p); \
p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R (&p->rc.dec)
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{
start *= R->Range;
R->Low += start;
R->Code -= start;
R->Range *= size;
RC_NORM_LOCAL(R)
}
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
int Ppmd7a_DecodeSymbol(CPpmd7 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
UInt32 summFreq = p->MinContext->Union2.SummFreq;
if (summFreq > R->Range)
return PPMD7_SYM_ERROR;
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte sym;
RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update1_0(p);
return sym;
}
p->PrevSuccess = 0;
i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update1(p);
return sym;
}
}
while (--i);
if (hiCnt >= summFreq)
return PPMD7_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt);
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt16 *prob = Ppmd7_GetBinSumm(p);
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM(R)
// sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
// Ppmd7_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 128));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
return sym;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(size0);
R->Low += size0;
R->Code -= size0;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
CPpmd_See *see;
CPpmd7_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return PPMD7_SYM_END;
mc = Ppmd7_GetContext(p, mc->Suffix);
}
while (mc->NumStats == numMasked);
s = Ppmd7_GetStats(p, mc);
{
unsigned num = mc->NumStats;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
if (freqSum > R->Range)
return PPMD7_SYM_ERROR;
count = RC_GetThreshold(freqSum);
if (count < hiCnt)
{
Byte sym;
s = Ppmd7_GetStats(p, p->MinContext);
hiCnt = count;
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
// if ((Int32)count >= 0)
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
};
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update2(p);
return sym;
}
if (count >= freqSum)
return PPMD7_SYM_ERROR;
RC_Decode(hiCnt, freqSum - hiCnt);
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
s = Ppmd7_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}

1062
C/Ppmd8.c
View File

File diff suppressed because it is too large Load Diff

124
C/Ppmd8.h
View File

@@ -1,5 +1,5 @@
/* Ppmd8.h -- PPMdI codec
2018-07-04 : Igor Pavlov : Public domain
/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -14,35 +14,45 @@ EXTERN_C_BEGIN
#define PPMD8_MIN_ORDER 2
#define PPMD8_MAX_ORDER 16
struct CPpmd8_Context_;
typedef
#ifdef PPMD_32BIT
struct CPpmd8_Context_ *
#else
UInt32
#endif
CPpmd8_Context_Ref;
typedef Ppmd_Ref_Type(struct CPpmd8_Context_) CPpmd8_Context_Ref;
#pragma pack(push, 1)
// MY_CPU_pragma_pack_push_1
typedef struct CPpmd8_Context_
{
Byte NumStats;
Byte Flags;
union
{
UInt16 SummFreq;
CPpmd_State2 State2;
} Union2;
union
{
CPpmd_State_Ref Stats;
CPpmd_State4 State4;
} Union4;
CPpmd8_Context_Ref Suffix;
} CPpmd8_Context;
#pragma pack(pop)
// MY_CPU_pragma_pop
#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
/* The BUG in Shkarin's code for FREEZE mode was fixed, but that fixed
code is not compatible with original code for some files compressed
/* PPMdI code rev.2 contains the fix over PPMdI code rev.1.
But the code PPMdI.2 is not compatible with PPMdI.1 for some files compressed
in FREEZE mode. So we disable FREEZE mode support. */
// #define PPMD8_FREEZE_SUPPORT
enum
{
PPMD8_RESTORE_METHOD_RESTART,
@@ -50,22 +60,28 @@ enum
#ifdef PPMD8_FREEZE_SUPPORT
, PPMD8_RESTORE_METHOD_FREEZE
#endif
, PPMD8_RESTORE_METHOD_UNSUPPPORTED
};
typedef struct
{
CPpmd8_Context *MinContext, *MaxContext;
CPpmd_State *FoundState;
unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder;
unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, RestoreMethod;
Int32 RunLength, InitRL; /* must be 32-bit at least */
UInt32 Size;
UInt32 GlueCount;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 AlignOffset;
unsigned RestoreMethod;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
/* Range Coder */
UInt32 Range;
UInt32 Code;
UInt32 Low;
@@ -75,16 +91,18 @@ typedef struct
IByteOut *Out;
} Stream;
Byte Indx2Units[PPMD_NUM_INDEXES];
Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
UInt32 Stamps[PPMD_NUM_INDEXES];
Byte NS2BSIndx[256], NS2Indx[260];
Byte ExpEscape[16];
CPpmd_See DummySee, See[24][32];
UInt16 BinSumm[25][64];
} CPpmd8;
void Ppmd8_Construct(CPpmd8 *p);
BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc);
@@ -94,43 +112,69 @@ void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod);
/* ---------- Internal Functions ---------- */
extern const Byte PPMD8_kExpEscape[16];
#ifdef PPMD_32BIT
#define Ppmd8_GetPtr(p, ptr) (ptr)
#define Ppmd8_GetContext(p, ptr) (ptr)
#define Ppmd8_GetStats(p, ctx) ((ctx)->Stats)
#else
#define Ppmd8_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
#define Ppmd8_GetContext(p, offs) ((CPpmd8_Context *)Ppmd8_GetPtr((p), (offs)))
#define Ppmd8_GetStats(p, ctx) ((CPpmd_State *)Ppmd8_GetPtr((p), ((ctx)->Stats)))
#endif
#define Ppmd8_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
#define Ppmd8_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd8_Context)
#define Ppmd8_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd8_Update1(CPpmd8 *p);
void Ppmd8_Update1_0(CPpmd8 *p);
void Ppmd8_Update2(CPpmd8 *p);
void Ppmd8_UpdateBin(CPpmd8 *p);
#define Ppmd8_GetBinSumm(p) \
&p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]][ \
p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \
p->PrevSuccess + p->MinContext->Flags + ((p->RunLength >> 26) & 0x20)]
&p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]] \
[ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \
+ p->MinContext->Flags ]
CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale);
/* 20.01: the original PPMdI encoder and decoder probably could work incorrectly in some rare cases,
where the original PPMdI code can give "Divide by Zero" operation.
We use the following fix to allow correct working of encoder and decoder in any cases.
We correct (Escape_Freq) and (_sum_), if (_sum_) is larger than p->Range) */
#define PPMD8_CORRECT_SUM_RANGE(p, _sum_) if (_sum_ > p->Range /* /1 */) _sum_ = p->Range;
/* ---------- Decode ---------- */
BoolInt Ppmd8_RangeDec_Init(CPpmd8 *p);
#define PPMD8_SYM_END (-1)
#define PPMD8_SYM_ERROR (-2)
/*
You must set (CPpmd8::Stream.In) before Ppmd8_RangeDec_Init()
Ppmd8_DecodeSymbol()
out:
>= 0 : decoded byte
-1 : PPMD8_SYM_END : End of payload marker
-2 : PPMD8_SYM_ERROR : Data error
*/
BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p);
#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd8_DecodeSymbol(CPpmd8 *p); /* returns: -1 as EndMarker, -2 as DataError */
int Ppmd8_DecodeSymbol(CPpmd8 *p);
/* ---------- Encode ---------- */
#define Ppmd8_RangeEnc_Init(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; }
void Ppmd8_RangeEnc_FlushData(CPpmd8 *p);
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol); /* symbol = -1 means EndMarker */
#define Ppmd8_Init_RangeEnc(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; }
void Ppmd8_Flush_RangeEnc(CPpmd8 *p);
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol);
EXTERN_C_END

View File

@@ -1,5 +1,5 @@
/* Ppmd8Dec.c -- PPMdI Decoder
2018-07-04 : Igor Pavlov : Public domain
/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -11,147 +11,269 @@ This code is based on:
#define kTop (1 << 24)
#define kBot (1 << 15)
BoolInt Ppmd8_RangeDec_Init(CPpmd8 *p)
#define READ_BYTE(p) IByteIn_Read((p)->Stream.In)
BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p)
{
unsigned i;
p->Low = 0;
p->Range = 0xFFFFFFFF;
p->Code = 0;
p->Range = 0xFFFFFFFF;
p->Low = 0;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream.In);
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
static UInt32 RangeDec_GetThreshold(CPpmd8 *p, UInt32 total)
{
return p->Code / (p->Range /= total);
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
p->Code = (p->Code << 8) | READ_BYTE(p); \
p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R p
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeDec_Decode(CPpmd8 *p, UInt32 start, UInt32 size)
{
start *= p->Range;
p->Low += start;
p->Code -= start;
p->Range *= size;
while ((p->Low ^ (p->Low + p->Range)) < kTop ||
(p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1)))
{
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream.In);
p->Range <<= 8;
p->Low <<= 8;
}
start *= R->Range;
R->Low += start;
R->Code -= start;
R->Range *= size;
RC_NORM_LOCAL(R)
}
#define MASK(sym) ((signed char *)charMask)[sym]
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
typedef CPpmd8_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd8_UpdateModel(CPpmd8 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
int Ppmd8_DecodeSymbol(CPpmd8 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
if ((count = RangeDec_GetThreshold(p, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
UInt32 summFreq = p->MinContext->Union2.SummFreq;
PPMD8_CORRECT_SUM_RANGE(p, summFreq)
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte symbol;
RangeDec_Decode(p, 0, s->Freq);
Byte sym;
RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd8_Update1_0(p);
return symbol;
return sym;
}
p->PrevSuccess = 0;
i = p->MinContext->NumStats;
do
{
if ((hiCnt += (++s)->Freq) > count)
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte symbol;
RangeDec_Decode(p, hiCnt - s->Freq, s->Freq);
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd8_Update1(p);
return symbol;
return sym;
}
}
while (--i);
if (count >= p->MinContext->SummFreq)
return -2;
RangeDec_Decode(p, hiCnt, p->MinContext->SummFreq - hiCnt);
if (hiCnt >= summFreq)
return PPMD8_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt);
PPMD_SetAllBitsIn256Bytes(charMask);
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
i = p->MinContext->NumStats;
do { MASK((--s)->Symbol) = 0; } while (--i);
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
UInt16 *prob = Ppmd8_GetBinSumm(p);
if (((p->Code / (p->Range >>= 14)) < *prob))
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte symbol;
RangeDec_Decode(p, 0, *prob);
*prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
symbol = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol;
Ppmd8_UpdateBin(p);
return symbol;
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM(R)
// sym = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol;
// Ppmd8_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 196));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
p->MaxContext = p->MinContext = c;
else
Ppmd8_UpdateModel(p);
}
RangeDec_Decode(p, *prob, (1 << 14) - *prob);
*prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
p->InitEsc = PPMD8_kExpEscape[*prob >> 10];
return sym;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(rc2, size0);
R->Low += size0;
R->Code -= size0;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *ps[256], *s;
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
UInt32 freqSum2;
CPpmd_See *see;
unsigned i, num, numMasked = p->MinContext->NumStats;
CPpmd8_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!p->MinContext->Suffix)
return -1;
p->MinContext = Ppmd8_GetContext(p, p->MinContext->Suffix);
if (!mc->Suffix)
return PPMD8_SYM_END;
mc = Ppmd8_GetContext(p, mc->Suffix);
}
while (p->MinContext->NumStats == numMasked);
hiCnt = 0;
s = Ppmd8_GetStats(p, p->MinContext);
i = 0;
num = p->MinContext->NumStats - numMasked;
while (mc->NumStats == numMasked);
s = Ppmd8_GetStats(p, mc);
{
unsigned num = (unsigned)mc->NumStats + 1;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
int k = (int)(MASK(s->Symbol));
hiCnt += (s->Freq & k);
ps[i] = s++;
i -= k;
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
while (i != num);
see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
count = RangeDec_GetThreshold(p, freqSum);
freqSum2 = freqSum;
PPMD8_CORRECT_SUM_RANGE(R, freqSum2);
count = RC_GetThreshold(freqSum2);
if (count < hiCnt)
{
Byte symbol;
CPpmd_State **pps = ps;
for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
s = *pps;
RangeDec_Decode(p, hiCnt - s->Freq, s->Freq);
Byte sym;
// Ppmd_See_Update(see); // new (see->Summ) value can overflow over 16-bits in some rare cases
s = Ppmd8_GetStats(p, p->MinContext);
hiCnt = count;
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
}
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd8_Update2(p);
return symbol;
return sym;
}
if (count >= freqSum)
return -2;
RangeDec_Decode(p, hiCnt, freqSum - hiCnt);
if (count >= freqSum2)
return PPMD8_SYM_ERROR;
RC_Decode(hiCnt, freqSum2 - hiCnt);
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
s = Ppmd8_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats + 1;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}

View File

@@ -1,5 +1,5 @@
/* Ppmd8Enc.c -- PPMdI Encoder
2017-04-03 : Igor Pavlov : Public domain
/* Ppmd8Enc.c -- Ppmd8 (PPMdI) Encoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
@@ -11,59 +11,100 @@ This code is based on:
#define kTop (1 << 24)
#define kBot (1 << 15)
void Ppmd8_RangeEnc_FlushData(CPpmd8 *p)
#define WRITE_BYTE(p) IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24))
void Ppmd8_Flush_RangeEnc(CPpmd8 *p)
{
unsigned i;
for (i = 0; i < 4; i++, p->Low <<= 8 )
IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24));
WRITE_BYTE(p);
}
static void RangeEnc_Normalize(CPpmd8 *p)
{
while ((p->Low ^ (p->Low + p->Range)) < kTop ||
(p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1)))
{
IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24));
p->Range <<= 8;
p->Low <<= 8;
}
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) \
{ WRITE_BYTE(p); p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
// #define RC_PRE(total) p->Range /= total;
// #define RC_PRE(total)
#define R p
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total)
{
p->Low += start * (p->Range /= total);
p->Range *= size;
RangeEnc_Normalize(p);
}
static void RangeEnc_EncodeBit_0(CPpmd8 *p, UInt32 size0)
{
p->Range >>= 14;
p->Range *= size0;
RangeEnc_Normalize(p);
}
static void RangeEnc_EncodeBit_1(CPpmd8 *p, UInt32 size0)
{
p->Low += size0 * (p->Range >>= 14);
p->Range *= ((1 << 14) - size0);
RangeEnc_Normalize(p);
R->Low += start * (R->Range /= total);
R->Range *= size;
RC_NORM_LOCAL(R);
}
#define MASK(sym) ((signed char *)charMask)[sym]
#define RC_Encode(start, size, total) RangeEnc_Encode(p, start, size, total);
#define RC_EncodeFinal(start, size, total) RC_Encode(start, size, total); RC_NORM_REMOTE(p);
#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
typedef CPpmd8_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd8_UpdateModel(CPpmd8 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
// MY_FORCE_INLINE
// static
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
UInt32 summFreq = p->MinContext->Union2.SummFreq;
PPMD8_CORRECT_SUM_RANGE(p, summFreq)
// RC_PRE(summFreq);
if (s->Symbol == symbol)
{
RangeEnc_Encode(p, 0, s->Freq, p->MinContext->SummFreq);
RC_EncodeFinal(0, s->Freq, summFreq);
p->FoundState = s;
Ppmd8_Update1_0(p);
return;
@@ -75,7 +116,8 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
{
if ((++s)->Symbol == symbol)
{
RangeEnc_Encode(p, sum, s->Freq, p->MinContext->SummFreq);
RC_EncodeFinal(sum, s->Freq, summFreq);
p->FoundState = s;
Ppmd8_Update1(p);
return;
@@ -84,80 +126,189 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
}
while (--i);
RC_Encode(sum, summFreq - sum, summFreq);
PPMD_SetAllBitsIn256Bytes(charMask);
// MASK(s->Symbol) = 0;
// i = p->MinContext->NumStats;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
i = p->MinContext->NumStats;
do { MASK((--s)->Symbol) = 0; } while (--i);
RangeEnc_Encode(p, sum, p->MinContext->SummFreq - sum, p->MinContext->SummFreq);
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
UInt16 *prob = Ppmd8_GetBinSumm(p);
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
UInt32 pr = *prob;
UInt32 bound = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
RangeEnc_EncodeBit_0(p, *prob);
*prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeEnc_EncodeBit_0(p, bound);
R->Range = bound;
RC_NORM(R);
// p->FoundState = s;
// Ppmd8_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
p->FoundState = s;
Ppmd8_UpdateBin(p);
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196)
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
p->MaxContext = p->MinContext = c;
else
Ppmd8_UpdateModel(p);
}
return;
}
else
{
RangeEnc_EncodeBit_1(p, *prob);
*prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
p->InitEsc = PPMD8_kExpEscape[*prob >> 10];
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeEnc_EncodeBit_1(p, bound);
R->Low += bound;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - bound;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(s->Symbol) = 0;
p->PrevSuccess = 0;
}
}
for (;;)
{
UInt32 escFreq;
CPpmd_See *see;
CPpmd_State *s;
UInt32 sum;
unsigned i, numMasked = p->MinContext->NumStats;
UInt32 sum, escFreq;
CPpmd8_Context *mc;
unsigned i, numMasked;
RC_NORM_REMOTE(p)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!p->MinContext->Suffix)
if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
p->MinContext = Ppmd8_GetContext(p, p->MinContext->Suffix);
mc = Ppmd8_GetContext(p, mc->Suffix);
}
while (p->MinContext->NumStats == numMasked);
while (mc->NumStats == numMasked);
p->MinContext = mc;
see = Ppmd8_MakeEscFreq(p, numMasked, &escFreq);
s = Ppmd8_GetStats(p, p->MinContext);
sum = 0;
i = p->MinContext->NumStats + 1;
i = (unsigned)p->MinContext->NumStats + 1;
do
{
int cur = s->Symbol;
if (cur == symbol)
unsigned cur = s->Symbol;
if ((int)cur == symbol)
{
UInt32 low = sum;
CPpmd_State *s1 = s;
do
{
sum += (s->Freq & (int)(MASK(s->Symbol)));
s++;
}
while (--i);
RangeEnc_Encode(p, low, s1->Freq, sum + escFreq);
UInt32 freq = s->Freq;
unsigned num2;
Ppmd_See_Update(see);
p->FoundState = s1;
p->FoundState = s;
sum += escFreq;
num2 = i / 2;
i &= 1;
sum += freq & (0 - (UInt32)i);
if (num2 != 0)
{
s += i;
for (;;)
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
if (--num2 == 0)
break;
}
}
PPMD8_CORRECT_SUM_RANGE(p, sum);
RC_EncodeFinal(low, freq, sum);
Ppmd8_Update2(p);
return;
}
sum += (s->Freq & (int)(MASK(cur)));
MASK(cur) = 0;
sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
RangeEnc_Encode(p, sum, escFreq, sum + escFreq);
see->Summ = (UInt16)(see->Summ + sum + escFreq);
{
UInt32 total = sum + escFreq;
see->Summ = (UInt16)(see->Summ + total);
PPMD8_CORRECT_SUM_RANGE(p, total);
RC_Encode(sum, total - sum, total);
}
{
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
s--;
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
}

609
C/Sha1.c
View File

@@ -1,5 +1,5 @@
/* Sha1.c -- SHA-1 Hash
2017-04-03 : Igor Pavlov : Public domain
2021-04-01 : Igor Pavlov : Public domain
This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -10,331 +10,434 @@ This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ l
#include "RotateDefs.h"
#include "Sha1.h"
// define it for speed optimization
#if defined(_MSC_VER) && (_MSC_VER < 1900)
// #define USE_MY_MM
#endif
#ifdef MY_CPU_X86_OR_AMD64
#ifdef _MSC_VER
#if _MSC_VER >= 1200
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define _SHA_SUPPORTED
#endif
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
#if _MSC_VER >= 1910
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define _SHA_SUPPORTED
#endif
#endif
#endif
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
#ifdef _SHA_SUPPORTED
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks;
static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
#define UPDATE_BLOCKS(p) p->func_UpdateBlocks
#else
#define UPDATE_BLOCKS(p) Sha1_UpdateBlocks
#endif
BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
{
SHA1_FUNC_UPDATE_BLOCKS func = Sha1_UpdateBlocks;
#ifdef _SHA_SUPPORTED
if (algo != SHA1_ALGO_SW)
{
if (algo == SHA1_ALGO_DEFAULT)
func = g_FUNC_UPDATE_BLOCKS;
else
{
if (algo != SHA1_ALGO_HW)
return False;
func = g_FUNC_UPDATE_BLOCKS_HW;
if (!func)
return False;
}
}
#else
if (algo > 1)
return False;
#endif
p->func_UpdateBlocks = func;
return True;
}
/* define it for speed optimization */
// #define _SHA1_UNROLL
// allowed unroll steps: (1, 2, 4, 5, 20)
#ifdef _SHA1_UNROLL
#define kNumW 16
#define WW(i) W[(i)&15]
#define STEP_PRE 20
#define STEP_MAIN 20
#else
#define _SHA1_BIG_W
#define STEP_PRE 5
#define STEP_MAIN 5
#endif
#ifdef _SHA1_BIG_W
#define kNumW 80
#define WW(i) W[i]
#define w(i) W[i]
#else
#define kNumW 16
#define w(i) W[(i)&15]
#endif
#define w0(i) (W[i] = data[i])
#define w0(i) (W[i] = GetBe32(data + (size_t)(i) * 4))
#define w1(i) (w(i) = rotlFixed(w((size_t)(i)-3) ^ w((size_t)(i)-8) ^ w((size_t)(i)-14) ^ w((size_t)(i)-16), 1))
#define w1(i) (WW(i) = rotlFixed(WW((i)-3) ^ WW((i)-8) ^ WW((i)-14) ^ WW((i)-16), 1))
#define f0(x,y,z) ( 0x5a827999 + (z^(x&(y^z))) )
#define f1(x,y,z) ( 0x6ed9eba1 + (x^y^z) )
#define f2(x,y,z) ( 0x8f1bbcdc + ((x&y)|(z&(x|y))) )
#define f3(x,y,z) ( 0xca62c1d6 + (x^y^z) )
#define f1(x,y,z) (z^(x&(y^z)))
#define f2(x,y,z) (x^y^z)
#define f3(x,y,z) ((x&y)|(z&(x|y)))
#define f4(x,y,z) (x^y^z)
/*
#define T1(fx, ww) \
tmp = e + fx(b,c,d) + ww + rotlFixed(a, 5); \
e = d; \
d = c; \
c = rotlFixed(b, 30); \
b = a; \
a = tmp; \
*/
#define RK(a,b,c,d,e, fx, w, k) e += fx(b,c,d) + w + k + rotlFixed(a,5); b = rotlFixed(b,30);
#define T5(a,b,c,d,e, fx, ww) \
e += fx(b,c,d) + ww + rotlFixed(a, 5); \
b = rotlFixed(b, 30); \
#define R0(a,b,c,d,e, i) RK(a,b,c,d,e, f1, w0(i), 0x5A827999)
#define R1(a,b,c,d,e, i) RK(a,b,c,d,e, f1, w1(i), 0x5A827999)
#define R2(a,b,c,d,e, i) RK(a,b,c,d,e, f2, w1(i), 0x6ED9EBA1)
#define R3(a,b,c,d,e, i) RK(a,b,c,d,e, f3, w1(i), 0x8F1BBCDC)
#define R4(a,b,c,d,e, i) RK(a,b,c,d,e, f4, w1(i), 0xCA62C1D6)
#define RX_1_4(rx1, rx4, i) \
rx1(a,b,c,d,e, i); \
rx4(e,a,b,c,d, i+1); \
rx4(d,e,a,b,c, i+2); \
rx4(c,d,e,a,b, i+3); \
rx4(b,c,d,e,a, i+4); \
/*
#define R1(i, fx, wx) \
T1 ( fx, wx(i)); \
#define RX_5(rx, i) RX_1_4(rx, rx, i);
#define R2(i, fx, wx) \
R1 ( (i) , fx, wx); \
R1 ( (i) + 1, fx, wx); \
#ifdef _SHA1_UNROLL
#define R4(i, fx, wx) \
R2 ( (i) , fx, wx); \
R2 ( (i) + 2, fx, wx); \
*/
#define RX_15 \
RX_5(R0, 0); \
RX_5(R0, 5); \
RX_5(R0, 10);
#define M5(i, fx, wx0, wx1) \
T5 ( a,b,c,d,e, fx, wx0((i) ) ); \
T5 ( e,a,b,c,d, fx, wx1((i)+1) ); \
T5 ( d,e,a,b,c, fx, wx1((i)+2) ); \
T5 ( c,d,e,a,b, fx, wx1((i)+3) ); \
T5 ( b,c,d,e,a, fx, wx1((i)+4) ); \
#define RX_20(rx, i) \
RX_5(rx, i); \
RX_5(rx, i + 5); \
RX_5(rx, i + 10); \
RX_5(rx, i + 15);
#define R5(i, fx, wx) \
M5 ( i, fx, wx, wx) \
#if STEP_PRE > 5
#define R20_START \
R5 ( 0, f0, w0); \
R5 ( 5, f0, w0); \
R5 ( 10, f0, w0); \
M5 ( 15, f0, w0, w1); \
#elif STEP_PRE == 5
#define R20_START \
{ size_t i; for (i = 0; i < 15; i += STEP_PRE) \
{ R5(i, f0, w0); } } \
M5 ( 15, f0, w0, w1); \
#else
#define RX_15 { size_t i; for (i = 0; i < 15; i += 5) { RX_5(R0, i); } }
#define RX_20(rx, ii) { size_t i; i = ii; for (; i < ii + 20; i += 5) { RX_5(rx, i); } }
#if STEP_PRE == 1
#define R_PRE R1
#elif STEP_PRE == 2
#define R_PRE R2
#elif STEP_PRE == 4
#define R_PRE R4
#endif
#define R20_START \
{ size_t i; for (i = 0; i < 16; i += STEP_PRE) \
{ R_PRE(i, f0, w0); } } \
R4 ( 16, f0, w1); \
#endif
void Sha1_Init(CSha1 *p)
#if STEP_MAIN > 5
#define R20(ii, fx) \
R5 ( (ii) , fx, w1); \
R5 ( (ii) + 5 , fx, w1); \
R5 ( (ii) + 10, fx, w1); \
R5 ( (ii) + 15, fx, w1); \
#else
#if STEP_MAIN == 1
#define R_MAIN R1
#elif STEP_MAIN == 2
#define R_MAIN R2
#elif STEP_MAIN == 4
#define R_MAIN R4
#elif STEP_MAIN == 5
#define R_MAIN R5
#endif
#define R20(ii, fx) \
{ size_t i; for (i = (ii); i < (ii) + 20; i += STEP_MAIN) \
{ R_MAIN(i, fx, w1); } } \
#endif
void Sha1_InitState(CSha1 *p)
{
p->count = 0;
p->state[0] = 0x67452301;
p->state[1] = 0xEFCDAB89;
p->state[2] = 0x98BADCFE;
p->state[3] = 0x10325476;
p->state[4] = 0xC3D2E1F0;
p->count = 0;
}
void Sha1_GetBlockDigest(CSha1 *p, const UInt32 *data, UInt32 *destDigest)
void Sha1_Init(CSha1 *p)
{
p->func_UpdateBlocks =
#ifdef _SHA_SUPPORTED
g_FUNC_UPDATE_BLOCKS;
#else
NULL;
#endif
Sha1_InitState(p);
}
MY_NO_INLINE
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks)
{
UInt32 a, b, c, d, e;
UInt32 W[kNumW];
// if (numBlocks != 0x1264378347) return;
if (numBlocks == 0)
return;
a = p->state[0];
b = p->state[1];
c = p->state[2];
d = p->state[3];
e = p->state[4];
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
RX_15
RX_1_4(R0, R1, 15);
RX_20(R2, 20);
RX_20(R3, 40);
RX_20(R4, 60);
destDigest[0] = p->state[0] + a;
destDigest[1] = p->state[1] + b;
destDigest[2] = p->state[2] + c;
destDigest[3] = p->state[3] + d;
destDigest[4] = p->state[4] + e;
}
void Sha1_UpdateBlock_Rar(CSha1 *p, UInt32 *data, int returnRes)
{
UInt32 a, b, c, d, e;
UInt32 W[kNumW];
a = p->state[0];
b = p->state[1];
c = p->state[2];
d = p->state[3];
e = p->state[4];
RX_15
RX_1_4(R0, R1, 15);
RX_20(R2, 20);
RX_20(R3, 40);
RX_20(R4, 60);
p->state[0] += a;
p->state[1] += b;
p->state[2] += c;
p->state[3] += d;
p->state[4] += e;
if (returnRes)
do
{
size_t i;
for (i = 0 ; i < SHA1_NUM_BLOCK_WORDS; i++)
data[i] = W[kNumW - SHA1_NUM_BLOCK_WORDS + i];
#if STEP_PRE < 5 || STEP_MAIN < 5
UInt32 tmp;
#endif
R20_START
R20(20, f1);
R20(40, f2);
R20(60, f3);
a += state[0];
b += state[1];
c += state[2];
d += state[3];
e += state[4];
state[0] = a;
state[1] = b;
state[2] = c;
state[3] = d;
state[4] = e;
data += 64;
}
while (--numBlocks);
}
#define Sha1_UpdateBlock(p) Sha1_GetBlockDigest(p, p->buffer, p->state)
#define Sha1_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
{
unsigned pos, pos2;
if (size == 0)
return;
pos = (unsigned)p->count & 0x3F;
p->count += size;
pos2 = pos & 3;
pos >>= 2;
if (pos2 != 0)
{
UInt32 w;
pos2 = (3 - pos2) * 8;
w = ((UInt32)*data++) << pos2;
if (--size && pos2)
{
pos2 -= 8;
w |= ((UInt32)*data++) << pos2;
if (--size && pos2)
{
pos2 -= 8;
w |= ((UInt32)*data++) << pos2;
size--;
}
}
p->buffer[pos] |= w;
if (pos2 == 0)
pos++;
}
for (;;)
{
if (pos == SHA1_NUM_BLOCK_WORDS)
{
for (;;)
{
size_t i;
Sha1_UpdateBlock(p);
if (size < SHA1_BLOCK_SIZE)
break;
size -= SHA1_BLOCK_SIZE;
for (i = 0; i < SHA1_NUM_BLOCK_WORDS; i += 2)
{
p->buffer[i ] = GetBe32(data);
p->buffer[i + 1] = GetBe32(data + 4);
data += 8;
}
}
pos = 0;
}
if (size < 4)
break;
p->buffer[pos] = GetBe32(data);
data += 4;
size -= 4;
pos++;
}
if (size != 0)
{
UInt32 w = ((UInt32)data[0]) << 24;
if (size > 1)
{
w |= ((UInt32)data[1]) << 16;
if (size > 2)
w |= ((UInt32)data[2]) << 8;
}
p->buffer[pos] = w;
}
}
void Sha1_Update_Rar(CSha1 *p, Byte *data, size_t size /* , int rar350Mode */)
{
int returnRes = False;
unsigned pos = (unsigned)p->count & 0x3F;
unsigned num;
p->count += size;
while (size--)
num = 64 - pos;
if (num > size)
{
unsigned pos2 = (pos & 3);
UInt32 v = ((UInt32)*data++) << (8 * (3 - pos2));
UInt32 *ref = &(p->buffer[pos >> 2]);
pos++;
if (pos2 == 0)
{
*ref = v;
continue;
memcpy(p->buffer + pos, data, size);
return;
}
*ref |= v;
if (pos == SHA1_BLOCK_SIZE)
if (pos != 0)
{
pos = 0;
Sha1_UpdateBlock_Rar(p, p->buffer, returnRes);
if (returnRes)
{
size_t i;
for (i = 0; i < SHA1_NUM_BLOCK_WORDS; i++)
{
UInt32 d = p->buffer[i];
Byte *prev = data + i * 4 - SHA1_BLOCK_SIZE;
SetUi32(prev, d);
size -= num;
memcpy(p->buffer + pos, data, num);
data += num;
Sha1_UpdateBlock(p);
}
}
// returnRes = rar350Mode;
returnRes = True;
}
{
size_t numBlocks = size >> 6;
UPDATE_BLOCKS(p)(p->state, data, numBlocks);
size &= 0x3F;
if (size == 0)
return;
data += (numBlocks << 6);
memcpy(p->buffer, data, size);
}
}
void Sha1_Final(CSha1 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
unsigned pos2 = (pos & 3);
UInt64 numBits;
UInt32 w;
unsigned i;
pos >>= 2;
w = 0;
if (pos2 != 0)
w = p->buffer[pos];
p->buffer[pos++] = w | (((UInt32)0x80000000) >> (8 * pos2));
p->buffer[pos++] = 0x80;
while (pos != (SHA1_NUM_BLOCK_WORDS - 2))
if (pos > (64 - 8))
{
pos &= 0xF;
if (pos == 0)
while (pos != 64) { p->buffer[pos++] = 0; }
// memset(&p->buf.buffer[pos], 0, 64 - pos);
Sha1_UpdateBlock(p);
p->buffer[pos++] = 0;
}
numBits = (p->count << 3);
p->buffer[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
p->buffer[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
Sha1_UpdateBlock(p);
for (i = 0; i < SHA1_NUM_DIGEST_WORDS; i++)
{
UInt32 v = p->state[i];
SetBe32(digest, v);
digest += 4;
}
Sha1_Init(p);
}
void Sha1_32_PrepareBlock(const CSha1 *p, UInt32 *block, unsigned size)
{
const UInt64 numBits = (p->count + size) << 5;
block[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
block[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
block[size++] = 0x80000000;
while (size != (SHA1_NUM_BLOCK_WORDS - 2))
block[size++] = 0;
}
void Sha1_32_Update(CSha1 *p, const UInt32 *data, size_t size)
{
unsigned pos = (unsigned)p->count & 0xF;
p->count += size;
while (size--)
{
p->buffer[pos++] = *data++;
if (pos == SHA1_NUM_BLOCK_WORDS)
{
pos = 0;
Sha1_UpdateBlock(p);
}
}
}
void Sha1_32_Final(CSha1 *p, UInt32 *digest)
{
UInt64 numBits;
unsigned pos = (unsigned)p->count & 0xF;
p->buffer[pos++] = 0x80000000;
while (pos != (SHA1_NUM_BLOCK_WORDS - 2))
/*
if (pos & 3)
{
pos &= 0xF;
if (pos == 0)
Sha1_UpdateBlock(p);
p->buffer[pos++] = 0;
p->buffer[pos] = 0;
p->buffer[pos + 1] = 0;
p->buffer[pos + 2] = 0;
pos += 3;
pos &= ~3;
}
{
for (; pos < 64 - 8; pos += 4)
*(UInt32 *)(&p->buffer[pos]) = 0;
}
*/
memset(&p->buffer[pos], 0, (64 - 8) - pos);
{
UInt64 numBits = (p->count << 3);
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
}
numBits = (p->count << 5);
p->buffer[SHA1_NUM_BLOCK_WORDS - 2] = (UInt32)(numBits >> 32);
p->buffer[SHA1_NUM_BLOCK_WORDS - 1] = (UInt32)(numBits);
Sha1_UpdateBlock(p);
Sha1_GetBlockDigest(p, p->buffer, digest);
SetBe32(digest, p->state[0]);
SetBe32(digest + 4, p->state[1]);
SetBe32(digest + 8, p->state[2]);
SetBe32(digest + 12, p->state[3]);
SetBe32(digest + 16, p->state[4]);
Sha1_Init(p);
Sha1_InitState(p);
}
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size)
{
const UInt64 numBits = (p->count + size) << 3;
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32));
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits));
// SetBe32((UInt32 *)(block + size), 0x80000000);
SetUi32((UInt32 *)(void *)(block + size), 0x80);
size += 4;
while (size != (SHA1_NUM_BLOCK_WORDS - 2) * 4)
{
*((UInt32 *)(void *)(block + size)) = 0;
size += 4;
}
}
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest)
{
MY_ALIGN (16)
UInt32 st[SHA1_NUM_DIGEST_WORDS];
st[0] = p->state[0];
st[1] = p->state[1];
st[2] = p->state[2];
st[3] = p->state[3];
st[4] = p->state[4];
UPDATE_BLOCKS(p)(st, data, 1);
SetBe32(destDigest + 0 , st[0]);
SetBe32(destDigest + 1 * 4, st[1]);
SetBe32(destDigest + 2 * 4, st[2]);
SetBe32(destDigest + 3 * 4, st[3]);
SetBe32(destDigest + 4 * 4, st[4]);
}
void Sha1Prepare()
{
#ifdef _SHA_SUPPORTED
SHA1_FUNC_UPDATE_BLOCKS f, f_hw;
f = Sha1_UpdateBlocks;
f_hw = NULL;
#ifdef MY_CPU_X86_OR_AMD64
#ifndef USE_MY_MM
if (CPU_IsSupported_SHA()
&& CPU_IsSupported_SSSE3()
// && CPU_IsSupported_SSE41()
)
#endif
#else
if (CPU_IsSupported_SHA1())
#endif
{
// printf("\n========== HW SHA1 ======== \n");
f = f_hw = Sha1_UpdateBlocks_HW;
}
g_FUNC_UPDATE_BLOCKS = f;
g_FUNC_UPDATE_BLOCKS_HW = f_hw;
#endif
}

View File

@@ -1,5 +1,5 @@
/* Sha1.h -- SHA-1 Hash
2016-05-20 : Igor Pavlov : Public domain */
2021-02-08 : Igor Pavlov : Public domain */
#ifndef __7Z_SHA1_H
#define __7Z_SHA1_H
@@ -14,24 +14,62 @@ EXTERN_C_BEGIN
#define SHA1_BLOCK_SIZE (SHA1_NUM_BLOCK_WORDS * 4)
#define SHA1_DIGEST_SIZE (SHA1_NUM_DIGEST_WORDS * 4)
typedef void (MY_FAST_CALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks);
/*
if (the system supports different SHA1 code implementations)
{
(CSha1::func_UpdateBlocks) will be used
(CSha1::func_UpdateBlocks) can be set by
Sha1_Init() - to default (fastest)
Sha1_SetFunction() - to any algo
}
else
{
(CSha1::func_UpdateBlocks) is ignored.
}
*/
typedef struct
{
UInt32 state[SHA1_NUM_DIGEST_WORDS];
SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
UInt32 buffer[SHA1_NUM_BLOCK_WORDS];
UInt64 __pad_2[2];
UInt32 state[SHA1_NUM_DIGEST_WORDS];
UInt32 __pad_3[3];
Byte buffer[SHA1_BLOCK_SIZE];
} CSha1;
void Sha1_Init(CSha1 *p);
void Sha1_GetBlockDigest(CSha1 *p, const UInt32 *data, UInt32 *destDigest);
#define SHA1_ALGO_DEFAULT 0
#define SHA1_ALGO_SW 1
#define SHA1_ALGO_HW 2
/*
Sha1_SetFunction()
return:
0 - (algo) value is not supported, and func_UpdateBlocks was not changed
1 - func_UpdateBlocks was set according (algo) value.
*/
BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo);
void Sha1_InitState(CSha1 *p);
void Sha1_Init(CSha1 *p);
void Sha1_Update(CSha1 *p, const Byte *data, size_t size);
void Sha1_Final(CSha1 *p, Byte *digest);
void Sha1_Update_Rar(CSha1 *p, Byte *data, size_t size /* , int rar350Mode */);
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size);
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest);
void Sha1_32_PrepareBlock(const CSha1 *p, UInt32 *block, unsigned size);
void Sha1_32_Update(CSha1 *p, const UInt32 *data, size_t size);
void Sha1_32_Final(CSha1 *p, UInt32 *digest);
// void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
/*
call Sha1Prepare() once at program start.
It prepares all supported implementations, and detects the fastest implementation.
*/
void Sha1Prepare(void);
EXTERN_C_END

373
C/Sha1Opt.c Normal file
View File

@@ -0,0 +1,373 @@
/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
// #define USE_MY_MM
#endif
#endif
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
#if defined(_MSC_VER)
// SSSE3: for clang-cl:
#include <tmmintrin.h>
#define __SHA__
#endif
#endif
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
// #pragma GCC target("sha,ssse3")
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#ifdef USE_MY_MM
#define USE_VER_MIN 1300
#else
#define USE_VER_MIN 1910
#endif
#if _MSC_VER >= USE_VER_MIN
#define USE_HW_SHA
#endif
#endif
// #endif // MY_CPU_X86_OR_AMD64
#ifdef USE_HW_SHA
// #pragma message("Sha1 HW")
// #include <wmmintrin.h>
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
#include <immintrin.h>
#else
#include <emmintrin.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
// #include <intrin.h>
#endif
#ifdef USE_MY_MM
#include "My_mm.h"
#endif
#endif
/*
SHA1 uses:
SSE2:
_mm_loadu_si128
_mm_storeu_si128
_mm_set_epi32
_mm_add_epi32
_mm_shuffle_epi32 / pshufd
_mm_xor_si128
_mm_cvtsi128_si32
_mm_cvtsi32_si128
SSSE3:
_mm_shuffle_epi8 / pshufb
SHA:
_mm_sha1*
*/
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src);
#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask);
#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask);
#define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f);
#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
#define LOAD_SHUFFLE(m, k) \
m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
SHUFFLE_EPI8(m, mask); \
#define SM1(m0, m1, m2, m3) \
SHA1_MSG1(m0, m1); \
#define SM2(m0, m1, m2, m3) \
XOR_SI128(m3, m1); \
SHA1_MSG2(m3, m2); \
#define SM3(m0, m1, m2, m3) \
XOR_SI128(m3, m1); \
SM1(m0, m1, m2, m3) \
SHA1_MSG2(m3, m2); \
#define NNN(m0, m1, m2, m3)
#define R4(k, e0, e1, m0, m1, m2, m3, OP) \
e1 = abcd; \
SHA1_RND4(abcd, e0, (k) / 5); \
SHA1_NEXTE(e1, m1); \
OP(m0, m1, m2, m3); \
#define R16(k, mx, OP0, OP1, OP2, OP3) \
R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \
R4 ( (k)*4+1, e1,e0, m1,m2,m3,m0, OP1 ) \
R4 ( (k)*4+2, e0,e1, m2,m3,m0,m1, OP2 ) \
R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \
#define PREPARE_STATE \
SHUFFLE_EPI32 (abcd, 0x1B); \
SHUFFLE_EPI32 (e0, 0x1B); \
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
{
const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
__m128i abcd, e0;
if (numBlocks == 0)
return;
abcd = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); // dbca
e0 = _mm_cvtsi32_si128((int)state[4]); // 000e
PREPARE_STATE
do
{
__m128i abcd_save, e2;
__m128i m0, m1, m2, m3;
__m128i e1;
abcd_save = abcd;
e2 = e0;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
ADD_EPI32(e0, m0);
R16 ( 0, m0, SM1, SM3, SM3, SM3 );
R16 ( 1, m0, SM3, SM3, SM3, SM3 );
R16 ( 2, m0, SM3, SM3, SM3, SM3 );
R16 ( 3, m0, SM3, SM3, SM3, SM3 );
R16 ( 4, e2, SM2, NNN, NNN, NNN );
ADD_EPI32(abcd, abcd_save);
data += 64;
}
while (--numBlocks);
PREPARE_STATE
_mm_storeu_si128((__m128i *) (void *) state, abcd);
*(state+4) = (UInt32)_mm_cvtsi128_si32(e0);
}
#endif // USE_HW_SHA
#elif defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_SHA
#endif
#endif
#ifdef USE_HW_SHA
// #pragma message("=== Sha1 HW === ")
#if defined(__clang__) || defined(__GNUC__)
#ifdef MY_CPU_ARM64
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
#else
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#else
// _MSC_VER
// for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
typedef uint32x4_t v128;
// typedef __n128 v128; // MSVC
#ifdef MY_CPU_BE
#define MY_rev32_for_LE(x)
#else
#define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
#endif
#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
#define LOAD_SHUFFLE(m, k) \
m = LOAD_128((data + (k) * 16)); \
MY_rev32_for_LE(m); \
#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3);
#define SU1(dest, src) dest = vsha1su1q_u32(dest, src);
#define C(e) abcd = vsha1cq_u32(abcd, e, t);
#define P(e) abcd = vsha1pq_u32(abcd, e, t);
#define M(e) abcd = vsha1mq_u32(abcd, e, t);
#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
#define T(m, c) t = vaddq_u32(m, c)
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
v128 abcd;
v128 c0, c1, c2, c3;
uint32_t e0;
if (numBlocks == 0)
return;
c0 = vdupq_n_u32(0x5a827999);
c1 = vdupq_n_u32(0x6ed9eba1);
c2 = vdupq_n_u32(0x8f1bbcdc);
c3 = vdupq_n_u32(0xca62c1d6);
abcd = LOAD_128(&state[0]);
e0 = state[4];
do
{
v128 abcd_save;
v128 m0, m1, m2, m3;
v128 t;
uint32_t e0_save, e1;
abcd_save = abcd;
e0_save = e0;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
T(m0, c0); H(e1); C(e0);
T(m1, c0); SU0(m0, m1, m2); H(e0); C(e1);
T(m2, c0); SU0(m1, m2, m3); SU1(m0, m3); H(e1); C(e0);
T(m3, c0); SU0(m2, m3, m0); SU1(m1, m0); H(e0); C(e1);
T(m0, c0); SU0(m3, m0, m1); SU1(m2, m1); H(e1); C(e0);
T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
T(m2, c1); SU0(m1, m2, m3); SU1(m0, m3); H(e1); P(e0);
T(m3, c1); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
T(m0, c1); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
T(m3, c2); SU0(m2, m3, m0); SU1(m1, m0); H(e0); M(e1);
T(m0, c2); SU0(m3, m0, m1); SU1(m2, m1); H(e1); M(e0);
T(m1, c2); SU0(m0, m1, m2); SU1(m3, m2); H(e0); M(e1);
T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
T(m3, c3); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
T(m0, c3); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
T(m1, c3); SU1(m3, m2); H(e0); P(e1);
T(m2, c3); H(e1); P(e0);
T(m3, c3); H(e0); P(e1);
abcd = vaddq_u32(abcd, abcd_save);
e0 += e0_save;
data += 64;
}
while (--numBlocks);
STORE_128(&state[0], abcd);
state[4] = e0;
}
#endif // USE_HW_SHA
#endif // MY_CPU_ARM_OR_ARM64
#ifndef USE_HW_SHA
// #error Stop_Compiling_UNSUPPORTED_SHA
// #include <stdlib.h>
// #include "Sha1.h"
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
#pragma message("Sha1 HW-SW stub was used")
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
{
Sha1_UpdateBlocks(state, data, numBlocks);
/*
UNUSED_VAR(state);
UNUSED_VAR(data);
UNUSED_VAR(numBlocks);
exit(1);
return;
*/
}
#endif

View File

@@ -1,5 +1,5 @@
/* Crypto/Sha256.c -- SHA-256 Hash
2017-04-03 : Igor Pavlov : Public domain
/* Sha256.c -- SHA-256 Hash
2021-04-01 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -10,16 +10,107 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "RotateDefs.h"
#include "Sha256.h"
/* define it for speed optimization */
#ifndef _SFX
#define _SHA256_UNROLL
#define _SHA256_UNROLL2
#if defined(_MSC_VER) && (_MSC_VER < 1900)
// #define USE_MY_MM
#endif
/* #define _SHA256_UNROLL2 */
#ifdef MY_CPU_X86_OR_AMD64
#ifdef _MSC_VER
#if _MSC_VER >= 1200
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define _SHA_SUPPORTED
#endif
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
#if _MSC_VER >= 1910
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define _SHA_SUPPORTED
#endif
#endif
#endif
void Sha256_Init(CSha256 *p)
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef _SHA_SUPPORTED
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
#define UPDATE_BLOCKS(p) p->func_UpdateBlocks
#else
#define UPDATE_BLOCKS(p) Sha256_UpdateBlocks
#endif
BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
{
SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
#ifdef _SHA_SUPPORTED
if (algo != SHA256_ALGO_SW)
{
if (algo == SHA256_ALGO_DEFAULT)
func = g_FUNC_UPDATE_BLOCKS;
else
{
if (algo != SHA256_ALGO_HW)
return False;
func = g_FUNC_UPDATE_BLOCKS_HW;
if (!func)
return False;
}
}
#else
if (algo > 1)
return False;
#endif
p->func_UpdateBlocks = func;
return True;
}
/* define it for speed optimization */
#ifdef _SFX
#define STEP_PRE 1
#define STEP_MAIN 1
#else
#define STEP_PRE 2
#define STEP_MAIN 4
// #define _SHA256_UNROLL
#endif
#if STEP_MAIN != 16
#define _SHA256_BIG_W
#endif
void Sha256_InitState(CSha256 *p)
{
p->count = 0;
p->state[0] = 0x6a09e667;
p->state[1] = 0xbb67ae85;
p->state[2] = 0x3c6ef372;
@@ -28,7 +119,17 @@ void Sha256_Init(CSha256 *p)
p->state[5] = 0x9b05688c;
p->state[6] = 0x1f83d9ab;
p->state[7] = 0x5be0cd19;
p->count = 0;
}
void Sha256_Init(CSha256 *p)
{
p->func_UpdateBlocks =
#ifdef _SHA_SUPPORTED
g_FUNC_UPDATE_BLOCKS;
#else
NULL;
#endif
Sha256_InitState(p);
}
#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
@@ -36,61 +137,100 @@ void Sha256_Init(CSha256 *p)
#define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
#define blk0(i) (W[i])
#define blk2(i) (W[i] += s1(W[((i)-2)&15]) + W[((i)-7)&15] + s0(W[((i)-15)&15]))
#define Ch(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
#ifdef _SHA256_UNROLL2
#define R(a,b,c,d,e,f,g,h, i) \
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \
#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
#ifdef _SHA256_BIG_W
// we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
#define w(j, i) W[(size_t)(j) + i]
#define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
#else
#if STEP_MAIN == 16
#define w(j, i) W[(i) & 15]
#else
#define w(j, i) W[((size_t)(j) + (i)) & 15]
#endif
#define blk2(j, i) (w(j, i) += blk2_main(j, i))
#endif
#define W_MAIN(i) blk2(j, i)
#define T1(wx, i) \
tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
h = g; \
g = f; \
f = e; \
e = d + tmp; \
tmp += S0(a) + Maj(a, b, c); \
d = c; \
c = b; \
b = a; \
a = tmp; \
#define R1_PRE(i) T1( W_PRE, i)
#define R1_MAIN(i) T1( W_MAIN, i)
#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
#define R2_MAIN(i) \
R1_MAIN(i) \
R1_MAIN(i + 1) \
#endif
#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
#define T4( a,b,c,d,e,f,g,h, wx, i) \
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
tmp = h; \
h += d; \
d = tmp + S0(a) + Maj(a, b, c); \
#define R4( wx, i) \
T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
#define R4_PRE(i) R4( W_PRE, i)
#define R4_MAIN(i) R4( W_MAIN, i)
#define T8( a,b,c,d,e,f,g,h, wx, i) \
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
d += h; \
h += S0(a) + Maj(a, b, c)
h += S0(a) + Maj(a, b, c); \
#define RX_8(i) \
R(a,b,c,d,e,f,g,h, i); \
R(h,a,b,c,d,e,f,g, i+1); \
R(g,h,a,b,c,d,e,f, i+2); \
R(f,g,h,a,b,c,d,e, i+3); \
R(e,f,g,h,a,b,c,d, i+4); \
R(d,e,f,g,h,a,b,c, i+5); \
R(c,d,e,f,g,h,a,b, i+6); \
R(b,c,d,e,f,g,h,a, i+7)
#define R8( wx, i) \
T8 ( a,b,c,d,e,f,g,h, wx, i ); \
T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
#define RX_16 RX_8(0); RX_8(8);
#else
#define a(i) T[(0-(i))&7]
#define b(i) T[(1-(i))&7]
#define c(i) T[(2-(i))&7]
#define d(i) T[(3-(i))&7]
#define e(i) T[(4-(i))&7]
#define f(i) T[(5-(i))&7]
#define g(i) T[(6-(i))&7]
#define h(i) T[(7-(i))&7]
#define R(i) \
h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[(i)+(size_t)(j)] + (j ? blk2(i) : blk0(i)); \
d(i) += h(i); \
h(i) += S0(a(i)) + Maj(a(i), b(i), c(i)) \
#ifdef _SHA256_UNROLL
#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
#define RX_16 RX_8(0); RX_8(8);
#else
#define RX_16 unsigned i; for (i = 0; i < 16; i++) { R(i); }
#define R8_PRE(i) R8( W_PRE, i)
#define R8_MAIN(i) R8( W_MAIN, i)
#endif
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
static const UInt32 K[64] = {
// static
extern MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
@@ -109,30 +249,27 @@ static const UInt32 K[64] = {
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
static void Sha256_WriteByteBlock(CSha256 *p)
{
UInt32 W[16];
unsigned j;
UInt32 *state;
#define K SHA256_K_ARRAY
#ifdef _SHA256_UNROLL2
UInt32 a,b,c,d,e,f,g,h;
MY_NO_INLINE
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
{
UInt32 W
#ifdef _SHA256_BIG_W
[64];
#else
UInt32 T[8];
[16];
#endif
for (j = 0; j < 16; j += 4)
{
const Byte *ccc = p->buffer + j * 4;
W[j ] = GetBe32(ccc);
W[j + 1] = GetBe32(ccc + 4);
W[j + 2] = GetBe32(ccc + 8);
W[j + 3] = GetBe32(ccc + 12);
}
unsigned j;
state = p->state;
UInt32 a,b,c,d,e,f,g,h;
#if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
UInt32 tmp;
#endif
#ifdef _SHA256_UNROLL2
a = state[0];
b = state[1];
c = state[2];
@@ -141,39 +278,96 @@ static void Sha256_WriteByteBlock(CSha256 *p)
f = state[5];
g = state[6];
h = state[7];
while (numBlocks)
{
for (j = 0; j < 16; j += STEP_PRE)
{
#if STEP_PRE > 4
#if STEP_PRE < 8
R4_PRE(0);
#else
for (j = 0; j < 8; j++)
T[j] = state[j];
R8_PRE(0);
#if STEP_PRE == 16
R8_PRE(8);
#endif
#endif
for (j = 0; j < 64; j += 16)
{
RX_16
#else
R1_PRE(0);
#if STEP_PRE >= 2
R1_PRE(1);
#if STEP_PRE >= 4
R1_PRE(2);
R1_PRE(3);
#endif
#endif
#endif
}
#ifdef _SHA256_UNROLL2
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
for (j = 16; j < 64; j += STEP_MAIN)
{
#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
#if STEP_MAIN < 8
R4_MAIN(0);
#else
for (j = 0; j < 8; j++)
state[j] += T[j];
R8_MAIN(0);
#if STEP_MAIN == 16
R8_MAIN(8);
#endif
#endif
#else
R1_MAIN(0);
#if STEP_MAIN >= 2
R1_MAIN(1);
#if STEP_MAIN >= 4
R2_MAIN(2);
#if STEP_MAIN >= 8
R2_MAIN(4);
R2_MAIN(6);
#if STEP_MAIN >= 16
R2_MAIN(8);
R2_MAIN(10);
R2_MAIN(12);
R2_MAIN(14);
#endif
#endif
#endif
#endif
#endif
}
a += state[0]; state[0] = a;
b += state[1]; state[1] = b;
c += state[2]; state[2] = c;
d += state[3]; state[3] = d;
e += state[4]; state[4] = e;
f += state[5]; state[5] = f;
g += state[6]; state[6] = g;
h += state[7]; state[7] = h;
data += 64;
numBlocks--;
}
/* Wipe variables */
/* memset(W, 0, sizeof(W)); */
/* memset(T, 0, sizeof(T)); */
}
#undef S0
#undef S1
#undef s0
#undef s1
#undef K
#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
{
@@ -193,25 +387,26 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
return;
}
if (pos != 0)
{
size -= num;
memcpy(p->buffer + pos, data, num);
data += num;
Sha256_UpdateBlock(p);
}
}
for (;;)
{
Sha256_WriteByteBlock(p);
if (size < 64)
break;
size -= 64;
memcpy(p->buffer, data, 64);
data += 64;
}
if (size != 0)
size_t numBlocks = size >> 6;
UPDATE_BLOCKS(p)(p->state, data, numBlocks);
size &= 0x3F;
if (size == 0)
return;
data += (numBlocks << 6);
memcpy(p->buffer, data, size);
}
}
void Sha256_Final(CSha256 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
@@ -219,30 +414,73 @@ void Sha256_Final(CSha256 *p, Byte *digest)
p->buffer[pos++] = 0x80;
while (pos != (64 - 8))
if (pos > (64 - 8))
{
pos &= 0x3F;
if (pos == 0)
Sha256_WriteByteBlock(p);
p->buffer[pos++] = 0;
while (pos != 64) { p->buffer[pos++] = 0; }
// memset(&p->buf.buffer[pos], 0, 64 - pos);
Sha256_UpdateBlock(p);
pos = 0;
}
/*
if (pos & 3)
{
p->buffer[pos] = 0;
p->buffer[pos + 1] = 0;
p->buffer[pos + 2] = 0;
pos += 3;
pos &= ~3;
}
{
for (; pos < 64 - 8; pos += 4)
*(UInt32 *)(&p->buffer[pos]) = 0;
}
*/
memset(&p->buffer[pos], 0, (64 - 8) - pos);
{
UInt64 numBits = (p->count << 3);
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
}
Sha256_WriteByteBlock(p);
Sha256_UpdateBlock(p);
for (i = 0; i < 8; i += 2)
{
UInt32 v0 = p->state[i];
UInt32 v1 = p->state[i + 1];
UInt32 v1 = p->state[(size_t)i + 1];
SetBe32(digest , v0);
SetBe32(digest + 4, v1);
digest += 8;
}
Sha256_Init(p);
Sha256_InitState(p);
}
void Sha256Prepare()
{
#ifdef _SHA_SUPPORTED
SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
f = Sha256_UpdateBlocks;
f_hw = NULL;
#ifdef MY_CPU_X86_OR_AMD64
#ifndef USE_MY_MM
if (CPU_IsSupported_SHA()
&& CPU_IsSupported_SSSE3()
// && CPU_IsSupported_SSE41()
)
#endif
#else
if (CPU_IsSupported_SHA2())
#endif
{
// printf("\n========== HW SHA256 ======== \n");
f = f_hw = Sha256_UpdateBlocks_HW;
}
g_FUNC_UPDATE_BLOCKS = f;
g_FUNC_UPDATE_BLOCKS_HW = f_hw;
#endif
}

View File

@@ -1,26 +1,76 @@
/* Sha256.h -- SHA-256 Hash
2013-01-18 : Igor Pavlov : Public domain */
2021-01-01 : Igor Pavlov : Public domain */
#ifndef __CRYPTO_SHA256_H
#define __CRYPTO_SHA256_H
#ifndef __7Z_SHA256_H
#define __7Z_SHA256_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define SHA256_DIGEST_SIZE 32
#define SHA256_NUM_BLOCK_WORDS 16
#define SHA256_NUM_DIGEST_WORDS 8
#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4)
#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4)
typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
/*
if (the system supports different SHA256 code implementations)
{
(CSha256::func_UpdateBlocks) will be used
(CSha256::func_UpdateBlocks) can be set by
Sha256_Init() - to default (fastest)
Sha256_SetFunction() - to any algo
}
else
{
(CSha256::func_UpdateBlocks) is ignored.
}
*/
typedef struct
{
UInt32 state[8];
SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
Byte buffer[64];
UInt64 __pad_2[2];
UInt32 state[SHA256_NUM_DIGEST_WORDS];
Byte buffer[SHA256_BLOCK_SIZE];
} CSha256;
#define SHA256_ALGO_DEFAULT 0
#define SHA256_ALGO_SW 1
#define SHA256_ALGO_HW 2
/*
Sha256_SetFunction()
return:
0 - (algo) value is not supported, and func_UpdateBlocks was not changed
1 - func_UpdateBlocks was set according (algo) value.
*/
BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo);
void Sha256_InitState(CSha256 *p);
void Sha256_Init(CSha256 *p);
void Sha256_Update(CSha256 *p, const Byte *data, size_t size);
void Sha256_Final(CSha256 *p, Byte *digest);
// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
/*
call Sha256Prepare() once at program start.
It prepares all supported implementations, and detects the fastest implementation.
*/
void Sha256Prepare(void);
EXTERN_C_END
#endif

373
C/Sha256Opt.c Normal file
View File

@@ -0,0 +1,373 @@
/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
// #define USE_MY_MM
#endif
#endif
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
#if defined(_MSC_VER)
// SSSE3: for clang-cl:
#include <tmmintrin.h>
#define __SHA__
#endif
#endif
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
// #pragma GCC target("sha,ssse3")
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#ifdef USE_MY_MM
#define USE_VER_MIN 1300
#else
#define USE_VER_MIN 1910
#endif
#if _MSC_VER >= USE_VER_MIN
#define USE_HW_SHA
#endif
#endif
// #endif // MY_CPU_X86_OR_AMD64
#ifdef USE_HW_SHA
// #pragma message("Sha256 HW")
// #include <wmmintrin.h>
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
#include <immintrin.h>
#else
#include <emmintrin.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
// #include <intrin.h>
#endif
#ifdef USE_MY_MM
#include "My_mm.h"
#endif
#endif
/*
SHA256 uses:
SSE2:
_mm_loadu_si128
_mm_storeu_si128
_mm_set_epi32
_mm_add_epi32
_mm_shuffle_epi32 / pshufd
SSSE3:
_mm_shuffle_epi8 / pshufb
_mm_alignr_epi8
SHA:
_mm_sha256*
*/
// K array must be aligned for 16-bytes at least.
// The compiler can look align attribute and selects
// movdqu - for code without align attribute
// movdqa - for code with align attribute
extern
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];
#define K SHA256_K_ARRAY
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
#define LOAD_SHUFFLE(m, k) \
m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
m = _mm_shuffle_epi8(m, mask); \
#define SM1(g0, g1, g2, g3) \
SHA256_MSG1(g3, g0); \
#define SM2(g0, g1, g2, g3) \
tmp = _mm_alignr_epi8(g1, g0, 4); \
ADD_EPI32(g2, tmp); \
SHA25G_MSG2(g2, g1); \
// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1)
#define NNN(g0, g1, g2, g3)
#define RND2(t0, t1) \
t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
#define RND2_0(m, k) \
msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \
RND2(state0, state1); \
msg = _mm_shuffle_epi32(msg, 0x0E); \
#define RND2_1 \
RND2(state1, state0); \
// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
#define R4(k, g0, g1, g2, g3, OP0, OP1) \
RND2_0(g0, k); \
OP0(g0, g1, g2, g3); \
RND2_1; \
OP1(g0, g1, g2, g3); \
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
#define PREPARE_STATE \
tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \
state1 = state0; \
state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \
state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
__m128i tmp;
__m128i state0, state1;
if (numBlocks == 0)
return;
state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]);
state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]);
PREPARE_STATE
do
{
__m128i state0_save, state1_save;
__m128i m0, m1, m2, m3;
__m128i msg;
// #define msg tmp
state0_save = state0;
state1_save = state1;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
ADD_EPI32(state0, state0_save);
ADD_EPI32(state1, state1_save);
data += 64;
}
while (--numBlocks);
PREPARE_STATE
_mm_storeu_si128((__m128i *) (void *) &state[0], state0);
_mm_storeu_si128((__m128i *) (void *) &state[4], state1);
}
#endif // USE_HW_SHA
#elif defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_SHA
#endif
#endif
#ifdef USE_HW_SHA
// #pragma message("=== Sha256 HW === ")
#if defined(__clang__) || defined(__GNUC__)
#ifdef MY_CPU_ARM64
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
#else
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#else
// _MSC_VER
// for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
typedef uint32x4_t v128;
// typedef __n128 v128; // MSVC
#ifdef MY_CPU_BE
#define MY_rev32_for_LE(x)
#else
#define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
#endif
#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
#define LOAD_SHUFFLE(m, k) \
m = LOAD_128((data + (k) * 16)); \
MY_rev32_for_LE(m); \
// K array must be aligned for 16-bytes at least.
extern
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];
#define K SHA256_K_ARRAY
#define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src);
#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
#define SM1(g0, g1, g2, g3) SHA256_SU0(g3, g0)
#define SM2(g0, g1, g2, g3) SHA25G_SU1(g2, g0, g1)
#define NNN(g0, g1, g2, g3)
#define R4(k, g0, g1, g2, g3, OP0, OP1) \
msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \
tmp = state0; \
state0 = vsha256hq_u32( state0, state1, msg ); \
state1 = vsha256h2q_u32( state1, tmp, msg ); \
OP0(g0, g1, g2, g3); \
OP1(g0, g1, g2, g3); \
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
v128 state0, state1;
if (numBlocks == 0)
return;
state0 = LOAD_128(&state[0]);
state1 = LOAD_128(&state[4]);
do
{
v128 state0_save, state1_save;
v128 m0, m1, m2, m3;
v128 msg, tmp;
state0_save = state0;
state1_save = state1;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
state0 = vaddq_u32(state0, state0_save);
state1 = vaddq_u32(state1, state1_save);
data += 64;
}
while (--numBlocks);
STORE_128(&state[0], state0);
STORE_128(&state[4], state1);
}
#endif // USE_HW_SHA
#endif // MY_CPU_ARM_OR_ARM64
#ifndef USE_HW_SHA
// #error Stop_Compiling_UNSUPPORTED_SHA
// #include <stdlib.h>
// #include "Sha256.h"
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
#pragma message("Sha256 HW-SW stub was used")
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
Sha256_UpdateBlocks(state, data, numBlocks);
/*
UNUSED_VAR(state);
UNUSED_VAR(data);
UNUSED_VAR(numBlocks);
exit(1);
return;
*/
}
#endif

View File

@@ -1,8 +1,10 @@
/* Threads.c -- multithreading library
2017-06-26 : Igor Pavlov : Public domain */
2021-04-25 : Igor Pavlov : Public domain */
#include "Precomp.h"
#ifdef _WIN32
#ifndef UNDER_CE
#include <process.h>
#endif
@@ -29,7 +31,33 @@ WRes HandlePtr_Close(HANDLE *p)
return 0;
}
WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); }
WRes Handle_WaitObject(HANDLE h)
{
DWORD dw = WaitForSingleObject(h, INFINITE);
/*
(dw) result:
WAIT_OBJECT_0 // 0
WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space
WAIT_FAILED // 0xFFFFFFFF
*/
if (dw == WAIT_FAILED)
{
dw = GetLastError();
if (dw == 0)
return WAIT_FAILED;
}
return (WRes)dw;
}
#define Thread_Wait(p) Handle_WaitObject(*(p))
WRes Thread_Wait_Close(CThread *p)
{
WRes res = Thread_Wait(p);
WRes res2 = Thread_Close(p);
return (res != 0 ? res : res2);
}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
@@ -43,7 +71,7 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
#else
unsigned threadId;
*p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId);
*p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
#endif
@@ -51,6 +79,55 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
return HandleToWRes(*p);
}
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
#ifdef UNDER_CE
UNUSED_VAR(affinity)
return Thread_Create(p, func, param);
#else
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
HANDLE h;
WRes wres;
unsigned threadId;
h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
*p = h;
wres = HandleToWRes(h);
if (h)
{
{
// DWORD_PTR prevMask =
SetThreadAffinityMask(h, (DWORD_PTR)affinity);
/*
if (prevMask == 0)
{
// affinity change is non-critical error, so we can ignore it
// wres = GetError();
}
*/
}
{
DWORD prevSuspendCount = ResumeThread(h);
/* ResumeThread() returns:
0 : was_not_suspended
1 : was_resumed
-1 : error
*/
if (prevSuspendCount == (DWORD)-1)
wres = GetError();
}
}
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return wres;
#endif
}
static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
{
*p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
@@ -68,6 +145,7 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEven
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
// negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
*p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
return HandleToWRes(*p);
}
@@ -93,3 +171,336 @@ WRes CriticalSection_Init(CCriticalSection *p)
#endif
return 0;
}
#else // _WIN32
// ---------- POSIX ----------
#ifndef __APPLE__
#ifndef _7ZIP_AFFINITY_DISABLE
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
#define _GNU_SOURCE
#endif
#endif
#include "Threads.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifdef _7ZIP_AFFINITY_SUPPORTED
// #include <sched.h>
#endif
// #include <stdio.h>
// #define PRF(p) p
#define PRF(p)
#define Print(s) PRF(printf("\n%s\n", s))
// #include <stdio.h>
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
{
// new thread in Posix probably inherits affinity from parrent thread
Print("Thread_Create_With_CpuSet");
pthread_attr_t attr;
int ret;
// int ret2;
p->_created = 0;
RINOK(pthread_attr_init(&attr));
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
if (!ret)
{
if (cpuSet)
{
#ifdef _7ZIP_AFFINITY_SUPPORTED
/*
printf("\n affinity :");
unsigned i;
for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
{
Byte b = *((const Byte *)cpuSet + i);
char temp[32];
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
temp[0] = GET_HEX_CHAR((b & 0xF));
temp[1] = GET_HEX_CHAR((b >> 4));
// temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian
// temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian
temp[2] = 0;
printf("%s", temp);
}
printf("\n");
*/
// ret2 =
pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
#endif
}
ret = pthread_create(&p->_tid, &attr, func, param);
if (!ret)
{
p->_created = 1;
/*
if (cpuSet)
{
// ret2 =
pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
}
*/
}
}
// ret2 =
pthread_attr_destroy(&attr);
// if (ret2 != 0) ret = ret2;
return ret;
}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
return Thread_Create_With_CpuSet(p, func, param, NULL);
}
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
Print("Thread_Create_WithAffinity");
CCpuSet cs;
unsigned i;
CpuSet_Zero(&cs);
for (i = 0; i < sizeof(affinity) * 8; i++)
{
if (affinity == 0)
break;
if (affinity & 1)
{
CpuSet_Set(&cs, i);
}
affinity >>= 1;
}
return Thread_Create_With_CpuSet(p, func, param, &cs);
}
WRes Thread_Close(CThread *p)
{
// Print("Thread_Close");
int ret;
if (!p->_created)
return 0;
ret = pthread_detach(p->_tid);
p->_tid = 0;
p->_created = 0;
return ret;
}
WRes Thread_Wait_Close(CThread *p)
{
// Print("Thread_Wait_Close");
void *thread_return;
int ret;
if (!p->_created)
return EINVAL;
ret = pthread_join(p->_tid, &thread_return);
// probably we can't use that (_tid) after pthread_join(), so we close thread here
p->_created = 0;
p->_tid = 0;
return ret;
}
static WRes Event_Create(CEvent *p, int manualReset, int signaled)
{
RINOK(pthread_mutex_init(&p->_mutex, NULL));
RINOK(pthread_cond_init(&p->_cond, NULL));
p->_manual_reset = manualReset;
p->_state = (signaled ? True : False);
p->_created = 1;
return 0;
}
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
{ return Event_Create(p, True, signaled); }
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
{ return ManualResetEvent_Create(p, 0); }
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
{ return Event_Create(p, False, signaled); }
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
{ return AutoResetEvent_Create(p, 0); }
WRes Event_Set(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
p->_state = True;
int res1 = pthread_cond_broadcast(&p->_cond);
int res2 = pthread_mutex_unlock(&p->_mutex);
return (res2 ? res2 : res1);
}
WRes Event_Reset(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
p->_state = False;
return pthread_mutex_unlock(&p->_mutex);
}
WRes Event_Wait(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
while (p->_state == False)
{
// ETIMEDOUT
// ret =
pthread_cond_wait(&p->_cond, &p->_mutex);
// if (ret != 0) break;
}
if (p->_manual_reset == False)
{
p->_state = False;
}
return pthread_mutex_unlock(&p->_mutex);
}
WRes Event_Close(CEvent *p)
{
if (!p->_created)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
if (initCount > maxCount || maxCount < 1)
return EINVAL;
RINOK(pthread_mutex_init(&p->_mutex, NULL));
RINOK(pthread_cond_init(&p->_cond, NULL));
p->_count = initCount;
p->_maxCount = maxCount;
p->_created = 1;
return 0;
}
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
{
UInt32 newCount;
int ret;
if (releaseCount < 1)
return EINVAL;
RINOK(pthread_mutex_lock(&p->_mutex));
newCount = p->_count + releaseCount;
if (newCount > p->_maxCount)
ret = ERROR_TOO_MANY_POSTS; // EINVAL;
else
{
p->_count = newCount;
ret = pthread_cond_broadcast(&p->_cond);
}
RINOK(pthread_mutex_unlock(&p->_mutex));
return ret;
}
WRes Semaphore_Wait(CSemaphore *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
while (p->_count < 1)
{
pthread_cond_wait(&p->_cond, &p->_mutex);
}
p->_count--;
return pthread_mutex_unlock(&p->_mutex);
}
WRes Semaphore_Close(CSemaphore *p)
{
if (!p->_created)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
WRes CriticalSection_Init(CCriticalSection *p)
{
// Print("CriticalSection_Init");
if (!p)
return EINTR;
return pthread_mutex_init(&p->_mutex, NULL);
}
void CriticalSection_Enter(CCriticalSection *p)
{
// Print("CriticalSection_Enter");
if (p)
{
// int ret =
pthread_mutex_lock(&p->_mutex);
}
}
void CriticalSection_Leave(CCriticalSection *p)
{
// Print("CriticalSection_Leave");
if (p)
{
// int ret =
pthread_mutex_unlock(&p->_mutex);
}
}
void CriticalSection_Delete(CCriticalSection *p)
{
// Print("CriticalSection_Delete");
if (p)
{
// int ret =
pthread_mutex_destroy(&p->_mutex);
}
}
LONG InterlockedIncrement(LONG volatile *addend)
{
// Print("InterlockedIncrement");
#ifdef USE_HACK_UNSAFE_ATOMIC
LONG val = *addend + 1;
*addend = val;
return val;
#else
return __sync_add_and_fetch(addend, 1);
#endif
}
#endif // _WIN32

View File

@@ -1,38 +1,106 @@
/* Threads.h -- multithreading library
2017-06-18 : Igor Pavlov : Public domain */
2021-04-25 : Igor Pavlov : Public domain */
#ifndef __7Z_THREADS_H
#define __7Z_THREADS_H
#ifdef _WIN32
#include <windows.h>
#include <Windows.h>
#else
#if !defined(__APPLE__) && !defined(_AIX)
#ifndef _7ZIP_AFFINITY_DISABLE
#define _7ZIP_AFFINITY_SUPPORTED
// #define _GNU_SOURCE
#endif
#endif
#include <pthread.h>
#endif
#include "7zTypes.h"
EXTERN_C_BEGIN
#ifdef _WIN32
WRes HandlePtr_Close(HANDLE *h);
WRes Handle_WaitObject(HANDLE h);
typedef HANDLE CThread;
#define Thread_Construct(p) *(p) = NULL
#define Thread_Construct(p) { *(p) = NULL; }
#define Thread_WasCreated(p) (*(p) != NULL)
#define Thread_Close(p) HandlePtr_Close(p)
#define Thread_Wait(p) Handle_WaitObject(*(p))
// #define Thread_Wait(p) Handle_WaitObject(*(p))
typedef
#ifdef UNDER_CE
#ifdef UNDER_CE
DWORD
#else
#else
unsigned
#endif
#endif
THREAD_FUNC_RET_TYPE;
typedef DWORD_PTR CAffinityMask;
typedef DWORD_PTR CCpuSet;
#define CpuSet_Zero(p) { *(p) = 0; }
#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); }
#else // _WIN32
typedef struct _CThread
{
pthread_t _tid;
int _created;
} CThread;
#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; }
#define Thread_WasCreated(p) ((p)->_created != 0)
WRes Thread_Close(CThread *p);
// #define Thread_Wait Thread_Wait_Close
typedef void * THREAD_FUNC_RET_TYPE;
typedef UInt64 CAffinityMask;
#ifdef _7ZIP_AFFINITY_SUPPORTED
typedef cpu_set_t CCpuSet;
#define CpuSet_Zero(p) CPU_ZERO(p)
#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
#else
typedef UInt64 CCpuSet;
#define CpuSet_Zero(p) { *(p) = 0; }
#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); }
#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
#endif
#endif // _WIN32
#define THREAD_FUNC_CALL_TYPE MY_STD_CALL
#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);
WRes Thread_Wait_Close(CThread *p);
#ifdef _WIN32
#define Thread_Create_With_CpuSet(p, func, param, cs) \
Thread_Create_With_Affinity(p, func, param, *cs)
#else
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
#endif
#ifdef _WIN32
typedef HANDLE CEvent;
typedef CEvent CAutoResetEvent;
@@ -63,6 +131,67 @@ WRes CriticalSection_Init(CCriticalSection *p);
#define CriticalSection_Enter(p) EnterCriticalSection(p)
#define CriticalSection_Leave(p) LeaveCriticalSection(p)
#else // _WIN32
typedef struct _CEvent
{
int _created;
int _manual_reset;
int _state;
pthread_mutex_t _mutex;
pthread_cond_t _cond;
} CEvent;
typedef CEvent CAutoResetEvent;
typedef CEvent CManualResetEvent;
#define Event_Construct(p) (p)->_created = 0
#define Event_IsCreated(p) ((p)->_created)
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
WRes Event_Set(CEvent *p);
WRes Event_Reset(CEvent *p);
WRes Event_Wait(CEvent *p);
WRes Event_Close(CEvent *p);
typedef struct _CSemaphore
{
int _created;
UInt32 _count;
UInt32 _maxCount;
pthread_mutex_t _mutex;
pthread_cond_t _cond;
} CSemaphore;
#define Semaphore_Construct(p) (p)->_created = 0
#define Semaphore_IsCreated(p) ((p)->_created)
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
WRes Semaphore_Wait(CSemaphore *p);
WRes Semaphore_Close(CSemaphore *p);
typedef struct _CCriticalSection
{
pthread_mutex_t _mutex;
} CCriticalSection;
WRes CriticalSection_Init(CCriticalSection *p);
void CriticalSection_Delete(CCriticalSection *cs);
void CriticalSection_Enter(CCriticalSection *cs);
void CriticalSection_Leave(CCriticalSection *cs);
LONG InterlockedIncrement(LONG volatile *addend);
#endif // _WIN32
EXTERN_C_END
#endif

View File

@@ -1,5 +1,5 @@
/* 7zMain.c - Test application for 7z Decoder
2019-02-02 : Igor Pavlov : Public domain */
2021-04-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -20,6 +20,13 @@
#ifdef _WIN32
#include <direct.h>
#else
#include <stdlib.h>
#include <time.h>
#ifdef __GNUC__
#include <sys/time.h>
#endif
#include <fcntl.h>
// #include <utime.h>
#include <sys/stat.h>
#include <errno.h>
#endif
@@ -108,7 +115,7 @@ static Byte *Utf16_To_Utf8(Byte *dest, const UInt16 *src, const UInt16 *srcLim)
if (val < 0x80)
{
*dest++ = (char)val;
*dest++ = (Byte)val;
continue;
}
@@ -162,21 +169,21 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s
)
{
unsigned len = 0;
for (len = 0; s[len] != 0; len++);
for (len = 0; s[len] != 0; len++) {}
#ifndef _USE_UTF8
{
unsigned size = len * 3 + 100;
const unsigned size = len * 3 + 100;
if (!Buf_EnsureSize(buf, size))
return SZ_ERROR_MEM;
{
buf->data[0] = 0;
if (len != 0)
{
char defaultChar = '_';
const char defaultChar = '_';
BOOL defUsed;
unsigned numChars = 0;
numChars = WideCharToMultiByte(codePage, 0, (LPCWSTR)s, len, (char *)buf->data, size, &defaultChar, &defUsed);
const unsigned numChars = (unsigned)WideCharToMultiByte(
codePage, 0, (LPCWSTR)s, (int)len, (char *)buf->data, (int)size, &defaultChar, &defUsed);
if (numChars == 0 || numChars >= size)
return SZ_ERROR_FAIL;
buf->data[numChars] = 0;
@@ -192,8 +199,8 @@ static SRes Utf16_To_Char(CBuf *buf, const UInt16 *s
#ifdef _WIN32
#ifndef USE_WINDOWS_FILE
static UINT g_FileCodePage = CP_ACP;
#endif
#define MY_FILE_CODE_PAGE_PARAM ,g_FileCodePage
#endif
#else
#define MY_FILE_CODE_PAGE_PARAM
#endif
@@ -300,17 +307,142 @@ static void UIntToStr_2(char *s, unsigned value)
s[1] = (char)('0' + (value % 10));
}
#define PERIOD_4 (4 * 365 + 1)
#define PERIOD_100 (PERIOD_4 * 25 - 1)
#define PERIOD_400 (PERIOD_100 * 4 + 1)
static void ConvertFileTimeToString(const CNtfsFileTime *nt, char *s)
#ifndef _WIN32
// MS uses long for BOOL, but long is 32-bit in MS. So we use int.
// typedef long BOOL;
typedef int BOOL;
typedef struct _FILETIME
{
DWORD dwLowDateTime;
DWORD dwHighDateTime;
} FILETIME;
static LONG TIME_GetBias()
{
time_t utc = time(NULL);
struct tm *ptm = localtime(&utc);
int localdaylight = ptm->tm_isdst; /* daylight for local timezone */
ptm = gmtime(&utc);
ptm->tm_isdst = localdaylight; /* use local daylight, not that of Greenwich */
LONG bias = (int)(mktime(ptm)-utc);
return bias;
}
#define TICKS_PER_SEC 10000000
#define GET_TIME_64(pft) ((pft)->dwLowDateTime | ((UInt64)(pft)->dwHighDateTime << 32))
#define SET_FILETIME(ft, v64) \
(ft)->dwLowDateTime = (DWORD)v64; \
(ft)->dwHighDateTime = (DWORD)(v64 >> 32);
#define WINAPI
#define TRUE 1
static BOOL WINAPI FileTimeToLocalFileTime(const FILETIME *fileTime, FILETIME *localFileTime)
{
UInt64 v = GET_TIME_64(fileTime);
v = (UInt64)((Int64)v - (Int64)TIME_GetBias() * TICKS_PER_SEC);
SET_FILETIME(localFileTime, v);
return TRUE;
}
static const UInt32 kNumTimeQuantumsInSecond = 10000000;
static const UInt32 kFileTimeStartYear = 1601;
static const UInt32 kUnixTimeStartYear = 1970;
static const UInt64 kUnixTimeOffset =
(UInt64)60 * 60 * 24 * (89 + 365 * (kUnixTimeStartYear - kFileTimeStartYear));
static Int64 Time_FileTimeToUnixTime64(const FILETIME *ft)
{
UInt64 winTime = GET_TIME_64(ft);
return (Int64)(winTime / kNumTimeQuantumsInSecond) - (Int64)kUnixTimeOffset;
}
#if defined(_AIX)
#define MY_ST_TIMESPEC st_timespec
#else
#define MY_ST_TIMESPEC timespec
#endif
static void FILETIME_To_timespec(const FILETIME *ft, struct MY_ST_TIMESPEC *ts)
{
if (ft)
{
const Int64 sec = Time_FileTimeToUnixTime64(ft);
// time_t is long
const time_t sec2 = (time_t)sec;
if (sec2 == sec)
{
ts->tv_sec = sec2;
UInt64 winTime = GET_TIME_64(ft);
ts->tv_nsec = (long)((winTime % 10000000) * 100);;
return;
}
}
// else
{
ts->tv_sec = 0;
// ts.tv_nsec = UTIME_NOW; // set to the current time
ts->tv_nsec = UTIME_OMIT; // keep old timesptamp
}
}
static WRes Set_File_FILETIME(const UInt16 *name, const FILETIME *mTime)
{
struct timespec times[2];
const int flags = 0; // follow link
// = AT_SYMLINK_NOFOLLOW; // don't follow link
CBuf buf;
int res;
Buf_Init(&buf);
RINOK(Utf16_To_Char(&buf, name MY_FILE_CODE_PAGE_PARAM));
FILETIME_To_timespec(NULL, &times[0]);
FILETIME_To_timespec(mTime, &times[1]);
res = utimensat(AT_FDCWD, (const char *)buf.data, times, flags);
Buf_Free(&buf, &g_Alloc);
if (res == 0)
return 0;
return errno;
}
#endif
static void NtfsFileTime_to_FILETIME(const CNtfsFileTime *t, FILETIME *ft)
{
ft->dwLowDateTime = (DWORD)(t->Low);
ft->dwHighDateTime = (DWORD)(t->High);
}
static void ConvertFileTimeToString(const CNtfsFileTime *nTime, char *s)
{
unsigned year, mon, hour, min, sec;
Byte ms[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
unsigned t;
UInt32 v;
UInt64 v64 = nt->Low | ((UInt64)nt->High << 32);
// UInt64 v64 = nt->Low | ((UInt64)nt->High << 32);
UInt64 v64;
{
FILETIME fileTime, locTime;
NtfsFileTime_to_FILETIME(nTime, &fileTime);
if (!FileTimeToLocalFileTime(&fileTime, &locTime))
{
locTime.dwHighDateTime =
locTime.dwLowDateTime = 0;
}
v64 = locTime.dwLowDateTime | ((UInt64)locTime.dwHighDateTime << 32);
}
v64 /= 10000000;
sec = (unsigned)(v64 % 60); v64 /= 60;
min = (unsigned)(v64 % 60); v64 /= 60;
@@ -354,6 +486,43 @@ static void PrintError(char *s)
PrintLF();
}
static void PrintError_WRes(const char *message, WRes wres)
{
Print("\nERROR: ");
Print(message);
PrintLF();
{
char s[32];
UIntToStr(s, (unsigned)wres, 1);
Print("System error code: ");
Print(s);
}
// sprintf(buffer + strlen(buffer), "\nSystem error code: %d", (unsigned)wres);
#ifdef _WIN32
{
char *s = NULL;
if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, wres, 0, (LPSTR) &s, 0, NULL) != 0 && s)
{
Print(" : ");
Print(s);
LocalFree(s);
}
}
#else
{
const char *s = strerror(wres);
if (s)
{
Print(" : ");
Print(s);
}
}
#endif
PrintLF();
}
static void GetAttribString(UInt32 wa, BoolInt isDir, char *s)
{
#ifdef USE_WINDOWS_FILE
@@ -413,17 +582,22 @@ int MY_CDECL main(int numargs, char *args[])
allocImp = g_Alloc;
allocTempImp = g_Alloc;
#ifdef UNDER_CE
if (InFile_OpenW(&archiveStream.file, L"\test.7z"))
#else
if (InFile_Open(&archiveStream.file, args[2]))
#endif
{
PrintError("can not open input file");
WRes wres =
#ifdef UNDER_CE
InFile_OpenW(&archiveStream.file, L"\test.7z"); // change it
#else
InFile_Open(&archiveStream.file, args[2]);
#endif
if (wres != 0)
{
PrintError_WRes("cannot open input file", wres);
return 1;
}
}
FileInStream_CreateVTable(&archiveStream);
archiveStream.wres = 0;
LookToRead2_CreateVTable(&lookStream, False);
lookStream.buf = NULL;
@@ -483,7 +657,7 @@ int MY_CDECL main(int numargs, char *args[])
size_t outSizeProcessed = 0;
// const CSzFileItem *f = db.Files + i;
size_t len;
unsigned isDir = SzArEx_IsDir(&db, i);
const BoolInt isDir = SzArEx_IsDir(&db, i);
if (listCommand == 0 && isDir && !fullPaths)
continue;
len = SzArEx_GetFileNameUtf16(&db, i, NULL);
@@ -546,8 +720,8 @@ int MY_CDECL main(int numargs, char *args[])
}
Print(testCommand ?
"Testing ":
"Extracting ");
"T ":
"- ");
res = PrintString(temp);
if (res != SZ_OK)
break;
@@ -591,27 +765,37 @@ int MY_CDECL main(int numargs, char *args[])
PrintLF();
continue;
}
else if (OutFile_OpenUtf16(&outFile, destPath))
else
{
PrintError("can not open output file");
WRes wres = OutFile_OpenUtf16(&outFile, destPath);
if (wres != 0)
{
PrintError_WRes("cannot open output file", wres);
res = SZ_ERROR_FAIL;
break;
}
}
processedSize = outSizeProcessed;
if (File_Write(&outFile, outBuffer + offset, &processedSize) != 0 || processedSize != outSizeProcessed)
{
PrintError("can not write output file");
WRes wres = File_Write(&outFile, outBuffer + offset, &processedSize);
if (wres != 0 || processedSize != outSizeProcessed)
{
PrintError_WRes("cannot write output file", wres);
res = SZ_ERROR_FAIL;
break;
}
}
{
FILETIME mtime;
FILETIME *mtimePtr = NULL;
#ifdef USE_WINDOWS_FILE
{
FILETIME mtime, ctime;
FILETIME *mtimePtr = NULL;
FILETIME ctime;
FILETIME *ctimePtr = NULL;
#endif
if (SzBitWithVals_Check(&db.MTime, i))
{
@@ -620,6 +804,8 @@ int MY_CDECL main(int numargs, char *args[])
mtime.dwHighDateTime = (DWORD)(t->High);
mtimePtr = &mtime;
}
#ifdef USE_WINDOWS_FILE
if (SzBitWithVals_Check(&db.CTime, i))
{
const CNtfsFileTime *t = &db.CTime.Vals[i];
@@ -627,17 +813,30 @@ int MY_CDECL main(int numargs, char *args[])
ctime.dwHighDateTime = (DWORD)(t->High);
ctimePtr = &ctime;
}
if (mtimePtr || ctimePtr)
SetFileTime(outFile.handle, ctimePtr, NULL, mtimePtr);
}
#endif
if (File_Close(&outFile))
{
PrintError("can not close output file");
WRes wres = File_Close(&outFile);
if (wres != 0)
{
PrintError_WRes("cannot close output file", wres);
res = SZ_ERROR_FAIL;
break;
}
}
#ifndef USE_WINDOWS_FILE
#ifdef _WIN32
mtimePtr = mtimePtr;
#else
if (mtimePtr)
Set_File_FILETIME(destPath, mtimePtr);
#endif
#endif
}
#ifdef USE_WINDOWS_FILE
if (SzBitWithVals_Check(&db.Attribs, i))
@@ -672,13 +871,15 @@ int MY_CDECL main(int numargs, char *args[])
if (res == SZ_ERROR_UNSUPPORTED)
PrintError("decoder doesn't support this archive");
else if (res == SZ_ERROR_MEM)
PrintError("can not allocate memory");
PrintError("cannot allocate memory");
else if (res == SZ_ERROR_CRC)
PrintError("CRC error");
else if (res == SZ_ERROR_READ /* || archiveStream.Res != 0 */)
PrintError_WRes("Read Error", archiveStream.wres);
else
{
char s[32];
UInt64ToStr(res, s, 0);
UInt64ToStr((unsigned)res, s, 0);
PrintError(s);
}

View File

@@ -1,75 +1,34 @@
PROG = 7zDec
CXX = gcc
LIB =
RM = rm -f
CFLAGS = -c -O2 -Wall
PROG = 7zdec
OBJS = 7zMain.o 7zAlloc.o 7zArcIn.o 7zBuf.o 7zBuf2.o 7zCrc.o 7zCrcOpt.o 7zDec.o CpuArch.o Delta.o LzmaDec.o Lzma2Dec.o Bra.o Bra86.o BraIA64.o Bcj2.o Ppmd7.o Ppmd7Dec.o 7zFile.o 7zStream.o
LOCAL_FLAGS = -D_7ZIP_PPMD_SUPPPORT
all: $(PROG)
include ../../../CPP/7zip/LzmaDec_gcc.mak
$(PROG): $(OBJS)
$(CXX) -o $(PROG) $(LDFLAGS) $(OBJS) $(LIB)
7zMain.o: 7zMain.c
$(CXX) $(CFLAGS) 7zMain.c
OBJS = \
$(LZMA_DEC_OPT_OBJS) \
$O/Bcj2.o \
$O/Bra.o \
$O/Bra86.o \
$O/BraIA64.o \
$O/CpuArch.o \
$O/Delta.o \
$O/Lzma2Dec.o \
$O/LzmaDec.o \
$O/Ppmd7.o \
$O/Ppmd7Dec.o \
$O/7zCrc.o \
$O/7zCrcOpt.o \
$O/Sha256.o \
$O/Sha256Opt.o \
$O/7zAlloc.o \
$O/7zArcIn.o \
$O/7zBuf.o \
$O/7zBuf2.o \
$O/7zDec.o \
$O/7zMain.o \
$O/7zFile.o \
$O/7zStream.o \
7zAlloc.o: ../../7zAlloc.c
$(CXX) $(CFLAGS) ../../7zAlloc.c
7zArcIn.o: ../../7zArcIn.c
$(CXX) $(CFLAGS) ../../7zArcIn.c
7zBuf.o: ../../7zBuf.c
$(CXX) $(CFLAGS) ../../7zBuf.c
7zBuf2.o: ../../7zBuf2.c
$(CXX) $(CFLAGS) ../../7zBuf2.c
7zCrc.o: ../../7zCrc.c
$(CXX) $(CFLAGS) ../../7zCrc.c
7zCrcOpt.o: ../../7zCrc.c
$(CXX) $(CFLAGS) ../../7zCrcOpt.c
7zDec.o: ../../7zDec.c
$(CXX) $(CFLAGS) -D_7ZIP_PPMD_SUPPPORT ../../7zDec.c
CpuArch.o: ../../CpuArch.c
$(CXX) $(CFLAGS) ../../CpuArch.c
Delta.o: ../../Delta.c
$(CXX) $(CFLAGS) ../../Delta.c
LzmaDec.o: ../../LzmaDec.c
$(CXX) $(CFLAGS) ../../LzmaDec.c
Lzma2Dec.o: ../../Lzma2Dec.c
$(CXX) $(CFLAGS) ../../Lzma2Dec.c
Bra.o: ../../Bra.c
$(CXX) $(CFLAGS) ../../Bra.c
Bra86.o: ../../Bra86.c
$(CXX) $(CFLAGS) ../../Bra86.c
BraIA64.o: ../../BraIA64.c
$(CXX) $(CFLAGS) ../../BraIA64.c
Bcj2.o: ../../Bcj2.c
$(CXX) $(CFLAGS) ../../Bcj2.c
Ppmd7.o: ../../Ppmd7.c
$(CXX) $(CFLAGS) ../../Ppmd7.c
Ppmd7Dec.o: ../../Ppmd7Dec.c
$(CXX) $(CFLAGS) ../../Ppmd7Dec.c
7zFile.o: ../../7zFile.c
$(CXX) $(CFLAGS) ../../7zFile.c
7zStream.o: ../../7zStream.c
$(CXX) $(CFLAGS) ../../7zStream.c
clean:
-$(RM) $(PROG) $(OBJS)
include ../../7zip_gcc_c.mak

View File

@@ -1,5 +1,5 @@
/* 7zipInstall.c - 7-Zip Installer
2019-02-19 : Igor Pavlov : Public domain */
2021-02-23 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -12,9 +12,6 @@
#include <windows.h>
#include <ShlObj.h>
#define LLL_(quote) L##quote
#define LLL(quote) LLL_(quote)
#include "../../7z.h"
#include "../../7zAlloc.h"
#include "../../7zCrc.h"
@@ -25,11 +22,15 @@
#include "resource.h"
#define LLL_(quote) L##quote
#define LLL(quote) LLL_(quote)
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#define wcscat lstrcatW
#define wcslen lstrlenW
#define wcscpy lstrcpyW
#define wcsncpy lstrcpynW
// wcsncpy() and lstrcpynW() work differently. We don't use them.
#define kInputBufSize ((size_t)1 << 18)
@@ -38,7 +39,7 @@
#define _7ZIP_CUR_VER ((MY_VER_MAJOR << 16) | MY_VER_MINOR)
#define _7ZIP_DLL_VER_COMPAT ((16 << 16) | 3)
static LPCWSTR const k_7zip = L"7-Zip";
static LPCSTR const k_7zip = "7-Zip";
static LPCWSTR const k_Reg_Software_7zip = L"Software\\7-Zip";
@@ -51,11 +52,27 @@ static LPCWSTR const k_Reg_Software_7zip = L"Software\\7-Zip";
#define k_7zip_with_Ver_base L"7-Zip " LLL(MY_VERSION)
#ifdef _64BIT_INSTALLER
#define k_7zip_with_Ver k_7zip_with_Ver_base L" (x64)"
// #define USE_7ZIP_32_DLL
#if defined(_M_ARM64) || defined(_M_ARM)
#define k_Postfix L" (arm64)"
#else
#define k_Postfix L" (x64)"
#define USE_7ZIP_32_DLL
#endif
#else
#define k_7zip_with_Ver k_7zip_with_Ver_base
#if defined(_M_ARM64) || defined(_M_ARM)
#define k_Postfix L" (arm)"
#else
// #define k_Postfix L" (x86)"
#define k_Postfix
#endif
#endif
#define k_7zip_with_Ver k_7zip_with_Ver_base k_Postfix
static LPCWSTR const k_7zip_with_Ver_str = k_7zip_with_Ver;
static LPCWSTR const k_7zip_Setup = k_7zip_with_Ver L" Setup";
@@ -100,24 +117,47 @@ static HWND g_Progress_HWND;
static DWORD g_TotalSize;
static WCHAR cmd[MAX_PATH + 4];
static WCHAR cmdError[MAX_PATH + 4];
static WCHAR path[MAX_PATH * 2 + 40];
#define MAKE_CHAR_UPPER(c) ((((c) >= 'a' && (c) <= 'z') ? (c) -= 0x20 : (c)))
// #define MAKE_CHAR_UPPER(c) ((((c) >= 'a' && (c) <= 'z') ? (c) -= 0x20 : (c)))
static void PrintErrorMessage(const char *s)
static void CpyAscii(wchar_t *dest, const char *s)
{
WCHAR s2[256 + 4];
unsigned i;
for (i = 0; i < 256; i++)
for (;;)
{
Byte b = s[i];
Byte b = (Byte)*s++;
*dest++ = b;
if (b == 0)
break;
s2[i] = b;
return;
}
s2[i] = 0;
MessageBoxW(g_HWND, s2, k_7zip_with_Ver_str, MB_ICONERROR);
}
static void CatAscii(wchar_t *dest, const char *s)
{
dest += wcslen(dest);
CpyAscii(dest, s);
}
static void PrintErrorMessage(const char *s1, const wchar_t *s2)
{
WCHAR m[MAX_PATH + 512];
m[0] = 0;
CatAscii(m, "ERROR:");
if (s1)
{
CatAscii(m, "\n");
CatAscii(m, s1);
}
if (s2)
{
CatAscii(m, "\n");
wcscat(m, s2);
}
MessageBoxW(g_HWND, m, k_7zip_with_Ver_str, MB_ICONERROR | MB_OK);
}
@@ -347,7 +387,7 @@ static LONG MyRegistry_CreateKeyAndVal(HKEY parentKey, LPCWSTR keyName, LPCWSTR
}
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
static LONG MyRegistry_CreateKey_32(HKEY parentKey, LPCWSTR name, HKEY *destKey)
{
@@ -441,7 +481,7 @@ static void HexToString(UInt32 val, WCHAR *s)
#ifndef UNDER_CE
int CALLBACK BrowseCallbackProc(HWND hwnd, UINT uMsg, LPARAM lp, LPARAM data)
static int CALLBACK BrowseCallbackProc(HWND hwnd, UINT uMsg, LPARAM lp, LPARAM data)
{
UNUSED_VAR(lp)
UNUSED_VAR(data)
@@ -560,11 +600,11 @@ static void Set7zipPostfix(WCHAR *s)
NormalizePrefix(s);
if (FindSubString(s, "7-Zip"))
return;
wcscat(s, L"7-Zip\\");
CatAscii(s, "7-Zip\\");
}
static int Install();
static int Install(void);
static void OnClose()
{
@@ -612,7 +652,7 @@ static INT_PTR CALLBACK MyDlgProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM
}
if (!g_Install_was_Pressed)
{
SendMessage(hwnd, WM_NEXTDLGCTL, (WPARAM)GetDlgItem(hwnd, IDCANCEL), TRUE);
SendMessage(hwnd, WM_NEXTDLGCTL, (WPARAM)(void *)GetDlgItem(hwnd, IDCANCEL), TRUE);
EnableWindow(g_Path_HWND, FALSE);
EnableWindow(GetDlgItem(hwnd, IDB_EXTRACT_SET_PATH), FALSE);
@@ -723,7 +763,7 @@ static void SetShellProgramsGroup(HWND hwndOwner)
{
#ifdef UNDER_CE
// wcscpy(link, L"\\Program Files\\");
// CpyAscii(link, "\\Program Files\\");
UNUSED_VAR(hwndOwner)
#else
@@ -744,8 +784,8 @@ static void SetShellProgramsGroup(HWND hwndOwner)
continue;
NormalizePrefix(link);
wcscat(link, k_7zip);
// wcscat(link, L"2");
CatAscii(link, k_7zip);
// CatAscii(link, "2");
if (i != 0)
MyCreateDir(link);
@@ -758,14 +798,14 @@ static void SetShellProgramsGroup(HWND hwndOwner)
for (k = 0; k < 2; k++)
{
wcscpy(link + baseLen, k == 0 ?
L"7-Zip File Manager.lnk" :
L"7-Zip Help.lnk"
CpyAscii(link + baseLen, k == 0 ?
"7-Zip File Manager.lnk" :
"7-Zip Help.lnk"
);
wcscpy(destPath, path);
wcscat(destPath, k == 0 ?
L"7zFM.exe" :
L"7-zip.chm");
CatAscii(destPath, k == 0 ?
"7zFM.exe" :
"7-zip.chm");
if (i == 0)
DeleteFileW(link);
@@ -789,7 +829,7 @@ static void WriteCLSID()
HKEY destKey;
LONG res;
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
MyRegistry_CreateKeyAndVal_32(HKEY_CLASSES_ROOT, k_Reg_CLSID_7zip, NULL, k_7zip_ShellExtension);
@@ -797,9 +837,9 @@ static void WriteCLSID()
if (res == ERROR_SUCCESS)
{
WCHAR destPath[MAX_PATH + 10];
WCHAR destPath[MAX_PATH + 40];
wcscpy(destPath, path);
wcscat(destPath, L"7-zip32.dll");
CatAscii(destPath, "7-zip32.dll");
/* res = */ MyRegistry_SetString(destKey, NULL, destPath);
/* res = */ MyRegistry_SetString(destKey, L"ThreadingModel", L"Apartment");
// DeleteRegValue(destKey, L"InprocServer32");
@@ -816,9 +856,9 @@ static void WriteCLSID()
if (res == ERROR_SUCCESS)
{
WCHAR destPath[MAX_PATH + 10];
WCHAR destPath[MAX_PATH + 40];
wcscpy(destPath, path);
wcscat(destPath, L"7-zip.dll");
CatAscii(destPath, "7-zip.dll");
/* res = */ MyRegistry_SetString(destKey, NULL, destPath);
/* res = */ MyRegistry_SetString(destKey, L"ThreadingModel", L"Apartment");
// DeleteRegValue(destKey, L"InprocServer32");
@@ -826,13 +866,13 @@ static void WriteCLSID()
}
}
static LPCWSTR const k_ShellEx_Items[] =
static LPCSTR const k_ShellEx_Items[] =
{
L"*\\shellex\\ContextMenuHandlers"
, L"Directory\\shellex\\ContextMenuHandlers"
, L"Folder\\shellex\\ContextMenuHandlers"
, L"Directory\\shellex\\DragDropHandlers"
, L"Drive\\shellex\\DragDropHandlers"
"*\\shellex\\ContextMenuHandlers"
, "Directory\\shellex\\ContextMenuHandlers"
, "Folder\\shellex\\ContextMenuHandlers"
, "Directory\\shellex\\DragDropHandlers"
, "Drive\\shellex\\DragDropHandlers"
};
static void WriteShellEx()
@@ -840,31 +880,31 @@ static void WriteShellEx()
unsigned i;
WCHAR destPath[MAX_PATH + 40];
for (i = 0; i < sizeof(k_ShellEx_Items) / sizeof(k_ShellEx_Items[0]); i++)
for (i = 0; i < ARRAY_SIZE(k_ShellEx_Items); i++)
{
wcscpy(destPath, k_ShellEx_Items[i]);
wcscat(destPath, L"\\7-Zip");
CpyAscii(destPath, k_ShellEx_Items[i]);
CatAscii(destPath, "\\7-Zip");
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
MyRegistry_CreateKeyAndVal_32(HKEY_CLASSES_ROOT, destPath, NULL, k_7zip_CLSID);
#endif
MyRegistry_CreateKeyAndVal (HKEY_CLASSES_ROOT, destPath, NULL, k_7zip_CLSID);
}
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
MyRegistry_CreateKeyAndVal_32(HKEY_LOCAL_MACHINE, k_Shell_Approved, k_7zip_CLSID, k_7zip_ShellExtension);
#endif
MyRegistry_CreateKeyAndVal (HKEY_LOCAL_MACHINE, k_Shell_Approved, k_7zip_CLSID, k_7zip_ShellExtension);
wcscpy(destPath, path);
CatAscii(destPath, "7zFM.exe");
{
HKEY destKey = 0;
LONG res = MyRegistry_CreateKey(HKEY_LOCAL_MACHINE, L"Software\\Microsoft\\Windows\\CurrentVersion\\App Paths\\7zFM.exe", &destKey);
if (res == ERROR_SUCCESS)
{
wcscpy(destPath, path);
wcscat(destPath, L"7zFM.exe");
MyRegistry_SetString(destKey, NULL, destPath);
MyRegistry_SetString(destKey, L"Path", path);
RegCloseKey(destKey);
@@ -877,17 +917,14 @@ static void WriteShellEx()
LONG res = MyRegistry_CreateKey(HKEY_LOCAL_MACHINE, L"Software\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\7-Zip", &destKey);
if (res == ERROR_SUCCESS)
{
// wcscpy(destPath, path);
// wcscat(destPath, L"7zFM.exe");
MyRegistry_SetString(destKey, L"DisplayName", k_7zip_with_Ver_str);
MyRegistry_SetString(destKey, L"DisplayVersion", LLL(MY_VERSION_NUMBERS));
MyRegistry_SetString(destKey, L"DisplayIcon", destPath);
MyRegistry_SetString(destKey, L"InstallLocation", path);
wcscpy(destPath, path);
MyRegistry_SetString(destKey, L"InstallLocation", destPath);
wcscat(destPath, L"Uninstall.exe");
// wcscat(destPath, L"\"");
destPath[0] = '\"';
wcscpy(destPath + 1, path);
CatAscii(destPath, "Uninstall.exe\"");
MyRegistry_SetString(destKey, L"UninstallString", destPath);
MyRegistry_SetDWORD(destKey, L"NoModify", 1);
@@ -912,17 +949,27 @@ static void WriteShellEx()
static const wchar_t *GetCmdParam(const wchar_t *s)
{
unsigned pos = 0;
BoolInt quoteMode = False;
for (;; s++)
{
wchar_t c = *s;
if (c == 0 || (c == L' ' && !quoteMode))
break;
if (c == L'\"')
{
quoteMode = !quoteMode;
else if (c == 0 || (c == L' ' && !quoteMode))
return s;
continue;
}
if (pos >= ARRAY_SIZE(cmd) - 1)
exit(1);
cmd[pos++] = c;
}
cmd[pos] = 0;
return s;
}
static void RemoveQuotes(wchar_t *s)
{
const wchar_t *src = s;
@@ -937,7 +984,7 @@ static void RemoveQuotes(wchar_t *s)
}
}
#define IS_LIMIT_CHAR(c) (c == 0 || c == ' ')
// #define IS_LIMIT_CHAR(c) (c == 0 || c == ' ')
typedef BOOL (WINAPI *Func_IsWow64Process)(HANDLE, PBOOL);
@@ -984,24 +1031,36 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
{
const wchar_t *s2 = GetCmdParam(s);
if (s[0] == '/')
BoolInt error = True;
if (cmd[0] == '/')
{
if (cmd[1] == 'S')
{
if (cmd[2] == 0)
{
if (s[1] == 'S' && IS_LIMIT_CHAR(s[2]))
g_SilentMode = True;
else if (s[1] == 'D' && s[2] == '=')
error = False;
}
}
else if (cmd[1] == 'D' && cmd[2] == '=')
{
size_t num;
s += 3;
num = s2 - s;
if (num > MAX_PATH)
num = MAX_PATH;
wcsncpy(path, s, (unsigned)num);
RemoveQuotes(path);
wcscpy(path, cmd + 3);
// RemoveQuotes(path);
error = False;
}
}
s = s2;
if (error && cmdError[0] == 0)
wcscpy(cmdError, cmd);
}
}
if (cmdError[0] != 0)
{
if (!g_SilentMode)
PrintErrorMessage("Unsupported command:", cmdError);
return 1;
}
}
#if defined(_64BIT_INSTALLER) && !defined(_WIN64)
@@ -1016,7 +1075,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
if (!isWow64)
{
if (!g_SilentMode)
PrintErrorMessage("This installation requires Windows x64");
PrintErrorMessage("This installation requires Windows " MY_CPU_NAME, NULL);
return 1;
}
}
@@ -1040,27 +1099,27 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
{
/*
#ifdef UNDER_CE
wcscpy(path, L"\\Program Files\\");
CpyAscii(path, "\\Program Files\\");
#else
#ifdef _64BIT_INSTALLER
{
DWORD ttt = GetEnvironmentVariableW(L"ProgramW6432", path, MAX_PATH);
if (ttt == 0 || ttt > MAX_PATH)
wcscpy(path, L"C:\\");
CpyAscii(path, "C:\\");
}
#else
if (!SHGetSpecialFolderPathW(0, path, CSIDL_PROGRAM_FILES, FALSE))
wcscpy(path, L"C:\\");
CpyAscii(path, "C:\\");
#endif
#endif
*/
if (!MyRegistry_QueryString2(HKEY_LOCAL_MACHINE, L"Software\\Microsoft\\Windows\\CurrentVersion", L"ProgramFilesDir", path))
wcscpy(path,
CpyAscii(path,
#ifdef UNDER_CE
L"\\Program Files\\"
"\\Program Files\\"
#else
L"C:\\"
"C:\\"
#endif
);
@@ -1122,7 +1181,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
SetDlgItemTextW(g_HWND, IDOK, L"Close");
EnableWindow(GetDlgItem(g_HWND, IDOK), TRUE);
EnableWindow(GetDlgItem(g_HWND, IDCANCEL), FALSE);
SendMessage(g_HWND, WM_NEXTDLGCTL, (WPARAM)GetDlgItem(g_HWND, IDOK), TRUE);
SendMessage(g_HWND, WM_NEXTDLGCTL, (WPARAM)(void *)GetDlgItem(g_HWND, IDOK), TRUE);
}
}
}
@@ -1155,7 +1214,7 @@ static BoolInt GetErrorMessage(DWORD errorCode, WCHAR *message)
static int Install()
static int Install(void)
{
CFileInStream archiveStream;
CLookToRead2 lookStream;
@@ -1212,6 +1271,7 @@ if (res == SZ_OK)
LookToRead2_CreateVTable(&lookStream, False);
lookStream.buf = NULL;
RemoveQuotes(path);
{
// Remove post spaces
unsigned endPos = 0;
@@ -1227,6 +1287,11 @@ if (res == SZ_OK)
}
path[endPos] = 0;
if (path[0] == 0)
{
PrintErrorMessage("Incorrect path", NULL);
return 1;
}
}
NormalizePrefix(path);
@@ -1367,7 +1432,7 @@ if (res == SZ_OK)
break;
}
wcscpy(path, origPath);
wcscat(path, L".tmp");
CatAscii(path, ".tmp");
if (tempIndex > 1)
HexToString(tempIndex, path + wcslen(path));
if (GetFileAttributesW(path) != INVALID_FILE_ATTRIBUTES)
@@ -1391,7 +1456,7 @@ if (res == SZ_OK)
}
if (FindSubString(temp, "7-zip.dll")
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
|| FindSubString(temp, "7-zip32.dll")
#endif
)
@@ -1411,9 +1476,9 @@ if (res == SZ_OK)
WCHAR message[MAX_PATH * 3 + 100];
int mbRes;
wcscpy(message, L"Can't open file\n");
CpyAscii(message, "Can't open file\n");
wcscat(message, path);
wcscat(message, L"\n");
CatAscii(message, "\n");
GetErrorMessage(openRes, message + wcslen(message));
@@ -1572,7 +1637,7 @@ if (res == SZ_OK)
WCHAR m[MAX_PATH + 100];
m[0] = 0;
GetErrorMessage(winRes, m);
MessageBoxW(g_HWND, m, k_7zip_with_Ver_str, MB_ICONERROR);
PrintErrorMessage(NULL, m);
}
else
{
@@ -1590,7 +1655,7 @@ if (res == SZ_OK)
if (!errorMessage)
errorMessage = "ERROR";
PrintErrorMessage(errorMessage);
PrintErrorMessage(errorMessage, NULL);
}
}

View File

@@ -1,15 +1,15 @@
/* 7zipUninstall.c - 7-Zip Uninstaller
2019-02-02 : Igor Pavlov : Public domain */
2021-02-23 : Igor Pavlov : Public domain */
#include "Precomp.h"
#define SZ_ERROR_ABORT 100
#ifdef _MSC_VER
#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
#pragma warning(disable : 4011) // vs2010: identifier truncated to _CRT_SECURE_CPP_OVERLOAD_SECURE
#endif
// #define SZ_ERROR_ABORT 100
#include <windows.h>
#include <ShlObj.h>
@@ -20,7 +20,9 @@
#define LLL_(quote) L##quote
#define LLL(quote) LLL_(quote)
// static const WCHAR * const k_7zip = L"7-Zip";
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
// static LPCWSTR const k_7zip = L"7-Zip";
// #define _64BIT_INSTALLER 1
@@ -31,18 +33,33 @@
#define k_7zip_with_Ver_base L"7-Zip " LLL(MY_VERSION)
#ifdef _64BIT_INSTALLER
#define k_7zip_with_Ver k_7zip_with_Ver_base L" (x64)"
// #define USE_7ZIP_32_DLL
#if defined(_M_ARM64) || defined(_M_ARM)
#define k_Postfix L" (arm64)"
#else
#define k_Postfix L" (x64)"
#define USE_7ZIP_32_DLL
#endif
#else
#define k_7zip_with_Ver k_7zip_with_Ver_base
#if defined(_M_ARM64) || defined(_M_ARM)
#define k_Postfix L" (arm)"
#else
// #define k_Postfix L" (x86)"
#define k_Postfix
#endif
#endif
// static const WCHAR * const k_7zip_with_Ver_str = k_7zip_with_Ver;
#define k_7zip_with_Ver k_7zip_with_Ver_base k_Postfix
static const WCHAR * const k_Reg_Software_7zip = L"Software\\7-Zip";
static LPCWSTR const k_7zip_with_Ver_Uninstall = k_7zip_with_Ver L" Uninstall";
static const WCHAR * const k_Reg_Path = L"Path";
static LPCWSTR const k_Reg_Software_7zip = L"Software\\7-Zip";
static const WCHAR * const k_Reg_Path32 = L"Path"
static LPCWSTR const k_Reg_Path = L"Path";
static LPCWSTR const k_Reg_Path32 = L"Path"
#ifdef _64BIT_INSTALLER
L"64"
#else
@@ -64,8 +81,8 @@ static const WCHAR * const k_Reg_Path32 = L"Path"
#define k_7zip_CLSID L"{23170F69-40C1-278A-1000-000100020000}"
static const WCHAR * const k_Reg_CLSID_7zip = L"CLSID\\" k_7zip_CLSID;
static const WCHAR * const k_Reg_CLSID_7zip_Inproc = L"CLSID\\" k_7zip_CLSID L"\\InprocServer32";
static LPCWSTR const k_Reg_CLSID_7zip = L"CLSID\\" k_7zip_CLSID;
static LPCWSTR const k_Reg_CLSID_7zip_Inproc = L"CLSID\\" k_7zip_CLSID L"\\InprocServer32";
#define g_AllUsers True
@@ -79,9 +96,12 @@ static HWND g_Path_HWND;
static HWND g_InfoLine_HWND;
static HWND g_Progress_HWND;
typedef WINADVAPI LONG (APIENTRY *Func_RegDeleteKeyExW)(HKEY hKey, LPCWSTR lpSubKey, REGSAM samDesired, DWORD Reserved);
// WINADVAPI
typedef LONG (APIENTRY *Func_RegDeleteKeyExW)(HKEY hKey, LPCWSTR lpSubKey, REGSAM samDesired, DWORD Reserved);
static Func_RegDeleteKeyExW func_RegDeleteKeyExW;
static WCHAR cmd[MAX_PATH + 4];
static WCHAR cmdError[MAX_PATH + 4];
static WCHAR path[MAX_PATH * 2 + 40];
static WCHAR workDir[MAX_PATH + 10];
static WCHAR modulePath[MAX_PATH + 10];
@@ -90,10 +110,47 @@ static WCHAR tempPath[MAX_PATH * 2 + 40];
static WCHAR cmdLine[MAX_PATH * 3 + 40];
static WCHAR copyPath[MAX_PATH * 2 + 40];
static const WCHAR * const kUninstallExe = L"Uninstall.exe";
static LPCWSTR const kUninstallExe = L"Uninstall.exe";
#define MAKE_CHAR_UPPER(c) ((((c) >= 'a' && (c) <= 'z') ? (c) -= 0x20 : (c)))
static void CpyAscii(wchar_t *dest, const char *s)
{
for (;;)
{
Byte b = (Byte)*s++;
*dest++ = b;
if (b == 0)
return;
}
}
static void CatAscii(wchar_t *dest, const char *s)
{
dest += wcslen(dest);
CpyAscii(dest, s);
}
static void PrintErrorMessage(const char *s1, const wchar_t *s2)
{
WCHAR m[MAX_PATH + 512];
m[0] = 0;
CatAscii(m, "ERROR:");
if (s1)
{
CatAscii(m, "\n");
CatAscii(m, s1);
}
if (s2)
{
CatAscii(m, "\n");
wcscat(m, s2);
}
MessageBoxW(g_HWND, m, k_7zip_with_Ver_Uninstall, MB_ICONERROR | MB_OK);
}
static BoolInt AreStringsEqual_NoCase(const wchar_t *s1, const wchar_t *s2)
{
for (;;)
@@ -171,7 +228,7 @@ static LONG MyRegistry_DeleteKey(HKEY parentKey, LPCWSTR name)
#endif
}
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
static int MyRegistry_QueryString2_32(HKEY hKey, LPCWSTR keyName, LPCWSTR valName, LPWSTR dest)
{
@@ -295,7 +352,7 @@ static void SetShellProgramsGroup(HWND hwndOwner)
continue;
NormalizePrefix(link);
wcscat(link, L"7-Zip\\");
CatAscii(link, "7-Zip\\");
{
const size_t baseLen = wcslen(link);
@@ -304,13 +361,13 @@ static void SetShellProgramsGroup(HWND hwndOwner)
for (k = 0; k < 2; k++)
{
wcscpy(link + baseLen, k == 0 ?
L"7-Zip File Manager.lnk" :
L"7-Zip Help.lnk");
CpyAscii(link + baseLen, k == 0 ?
"7-Zip File Manager.lnk" :
"7-Zip Help.lnk");
wcscpy(destPath, path);
wcscat(destPath, k == 0 ?
L"7zFM.exe" :
L"7-zip.chm");
CatAscii(destPath, k == 0 ?
"7zFM.exe" :
"7-zip.chm");
if (CreateShellLink(link, destPath) == S_OK)
{
@@ -331,20 +388,20 @@ static void SetShellProgramsGroup(HWND hwndOwner)
}
static const WCHAR * const k_ShellEx_Items[] =
static LPCSTR const k_ShellEx_Items[] =
{
L"*\\shellex\\ContextMenuHandlers"
, L"Directory\\shellex\\ContextMenuHandlers"
, L"Folder\\shellex\\ContextMenuHandlers"
, L"Directory\\shellex\\DragDropHandlers"
, L"Drive\\shellex\\DragDropHandlers"
"*\\shellex\\ContextMenuHandlers"
, "Directory\\shellex\\ContextMenuHandlers"
, "Folder\\shellex\\ContextMenuHandlers"
, "Directory\\shellex\\DragDropHandlers"
, "Drive\\shellex\\DragDropHandlers"
};
static const WCHAR * const k_Shell_Approved = L"Software\\Microsoft\\Windows\\CurrentVersion\\Shell Extensions\\Approved";
static LPCWSTR const k_Shell_Approved = L"Software\\Microsoft\\Windows\\CurrentVersion\\Shell Extensions\\Approved";
static const WCHAR * const k_AppPaths_7zFm = L"Software\\Microsoft\\Windows\\CurrentVersion\\App Paths\\7zFM.exe";
static LPCWSTR const k_AppPaths_7zFm = L"Software\\Microsoft\\Windows\\CurrentVersion\\App Paths\\7zFM.exe";
#define k_REG_Uninstall L"Software\\Microsoft\\Windows\\CurrentVersion\\Uninstall\\"
static const WCHAR * const k_Uninstall_7zip = k_REG_Uninstall L"7-Zip";
static LPCWSTR const k_Uninstall_7zip = k_REG_Uninstall L"7-Zip";
static BoolInt AreEqual_Path_PrefixName(const wchar_t *s, const wchar_t *prefix, const wchar_t *name)
@@ -370,11 +427,11 @@ static void WriteCLSID()
{
unsigned i;
for (i = 0; i < sizeof(k_ShellEx_Items) / sizeof(k_ShellEx_Items[0]); i++)
for (i = 0; i < ARRAY_SIZE(k_ShellEx_Items); i++)
{
WCHAR destPath[MAX_PATH];
wcscpy(destPath, k_ShellEx_Items[i]);
wcscat(destPath, L"\\7-Zip");
CpyAscii(destPath, k_ShellEx_Items[i]);
CatAscii(destPath, "\\7-Zip");
MyRegistry_DeleteKey(HKEY_CLASSES_ROOT, destPath);
}
@@ -393,7 +450,7 @@ static void WriteCLSID()
}
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
if (MyRegistry_QueryString2_32(HKEY_CLASSES_ROOT, k_Reg_CLSID_7zip_Inproc, NULL, s))
{
@@ -407,11 +464,11 @@ static void WriteCLSID()
{
unsigned i;
for (i = 0; i < sizeof(k_ShellEx_Items) / sizeof(k_ShellEx_Items[0]); i++)
for (i = 0; i < ARRAY_SIZE(k_ShellEx_Items); i++)
{
WCHAR destPath[MAX_PATH];
wcscpy(destPath, k_ShellEx_Items[i]);
wcscat(destPath, L"\\7-Zip");
CpyAscii(destPath, k_ShellEx_Items[i]);
CatAscii(destPath, "\\7-Zip");
MyRegistry_DeleteKey_32(HKEY_CLASSES_ROOT, destPath);
}
@@ -444,17 +501,27 @@ static void WriteCLSID()
static const wchar_t *GetCmdParam(const wchar_t *s)
{
unsigned pos = 0;
BoolInt quoteMode = False;
for (;; s++)
{
wchar_t c = *s;
if (c == 0 || (c == L' ' && !quoteMode))
break;
if (c == L'\"')
{
quoteMode = !quoteMode;
else if (c == 0 || (c == L' ' && !quoteMode))
return s;
continue;
}
if (pos >= ARRAY_SIZE(cmd) - 1)
exit(1);
cmd[pos++] = c;
}
cmd[pos] = 0;
return s;
}
/*
static void RemoveQuotes(wchar_t *s)
{
const wchar_t *src = s;
@@ -468,6 +535,7 @@ static void RemoveQuotes(wchar_t *s)
return;
}
}
*/
static BoolInt DoesFileOrDirExist()
{
@@ -489,7 +557,7 @@ static BOOL RemoveFileAfterReboot()
return RemoveFileAfterReboot2(path);
}
#define IS_LIMIT_CHAR(c) (c == 0 || c == ' ')
// #define IS_LIMIT_CHAR(c) (c == 0 || c == ' ')
static BoolInt IsThereSpace(const wchar_t *s)
{
@@ -507,10 +575,10 @@ static void AddPathParam(wchar_t *dest, const wchar_t *src)
{
BoolInt needQuote = IsThereSpace(src);
if (needQuote)
wcscat(dest, L"\"");
CatAscii(dest, "\"");
wcscat(dest, src);
if (needQuote)
wcscat(dest, L"\"");
CatAscii(dest, "\"");
}
@@ -543,12 +611,12 @@ static BOOL RemoveDir()
#define k_Lang L"Lang"
#define k_Lang "Lang"
// NUM_LANG_TXT_FILES files are placed before en.ttt
#define NUM_LANG_TXT_FILES 88
#define NUM_LANG_TXT_FILES 92
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
#define NUM_EXTRA_FILES_64BIT 1
#else
#define NUM_EXTRA_FILES_64BIT 0
@@ -560,7 +628,7 @@ static const char * const k_Names =
"af an ar ast az ba be bg bn br ca co cs cy da de el eo es et eu ext"
" fa fi fr fur fy ga gl gu he hi hr hu hy id io is it ja ka kaa kab kk ko ku ku-ckb ky"
" lij lt lv mk mn mng mng2 mr ms nb ne nl nn pa-in pl ps pt pt-br ro ru"
" sa si sk sl sq sr-spc sr-spl sv ta th tr tt ug uk uz va vi yo zh-cn zh-tw"
" sa si sk sl sq sr-spc sr-spl sv sw ta tg th tk tr tt ug uk uz uz-cyrl va vi yo zh-cn zh-tw"
" en.ttt"
" descript.ion"
" History.txt"
@@ -573,7 +641,7 @@ static const char * const k_Names =
" 7zG.exe"
" 7z.dll"
" 7zFM.exe"
#ifdef _64BIT_INSTALLER
#ifdef USE_7ZIP_32_DLL
" 7-zip32.dll"
#endif
" 7-zip.dll"
@@ -628,7 +696,7 @@ static int Install()
temp = path + pathLen;
if (i <= NUM_LANG_TXT_FILES)
wcscpy(temp, k_Lang L"\\");
CpyAscii(temp, k_Lang "\\");
{
WCHAR *dest = temp + wcslen(temp);
@@ -648,7 +716,7 @@ static int Install()
}
if (i < NUM_LANG_TXT_FILES)
wcscat(temp, L".txt");
CatAscii(temp, ".txt");
if (!g_SilentMode)
SetWindowTextW(g_InfoLine_HWND, temp);
@@ -673,7 +741,7 @@ static int Install()
}
}
wcscpy(path + pathLen, k_Lang);
CpyAscii(path + pathLen, k_Lang);
RemoveDir();
path[pathLen] = 0;
@@ -706,8 +774,8 @@ static int Install()
WCHAR m[MAX_PATH + 100];
m[0] = 0;
if (winRes == 0 || !GetErrorMessage(winRes, m))
wcscpy(m, L"ERROR");
MessageBoxW(g_HWND, m, L"Error", MB_ICONERROR | MB_OK);
CpyAscii(m, "ERROR");
PrintErrorMessage("System ERROR:", m);
}
return 1;
@@ -720,7 +788,7 @@ static void OnClose()
{
if (MessageBoxW(g_HWND,
L"Do you want to cancel uninstallation?",
k_7zip_with_Ver,
k_7zip_with_Ver_Uninstall,
MB_ICONQUESTION | MB_YESNO | MB_DEFBUTTON2) != IDYES)
return;
}
@@ -739,7 +807,7 @@ static INT_PTR CALLBACK MyDlgProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM
g_InfoLine_HWND = GetDlgItem(hwnd, IDT_CUR_FILE);
g_Progress_HWND = GetDlgItem(hwnd, IDC_PROGRESS);
SetWindowTextW(hwnd, k_7zip_with_Ver L" Uninstall");
SetWindowTextW(hwnd, k_7zip_with_Ver_Uninstall);
SetDlgItemTextW(hwnd, IDE_EXTRACT_PATH, path);
ShowWindow(g_Progress_HWND, SW_HIDE);
@@ -759,7 +827,7 @@ static INT_PTR CALLBACK MyDlgProc(HWND hwnd, UINT message, WPARAM wParam, LPARAM
}
if (!g_Install_was_Pressed)
{
SendMessage(hwnd, WM_NEXTDLGCTL, (WPARAM)GetDlgItem(hwnd, IDCANCEL), TRUE);
SendMessage(hwnd, WM_NEXTDLGCTL, (WPARAM)(void *)GetDlgItem(hwnd, IDCANCEL), TRUE);
EnableWindow(g_Path_HWND, FALSE);
EnableWindow(GetDlgItem(hwnd, IDOK), FALSE);
@@ -844,31 +912,46 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
{
const wchar_t *s2 = GetCmdParam(s);
if (s[0] == '/')
BoolInt error = True;
if (cmd[0] == '/')
{
if (cmd[1] == 'S')
{
if (cmd[2] == 0)
{
if (s[1] == 'S' && IS_LIMIT_CHAR(s[2]))
g_SilentMode = True;
else if (s[1] == 'N' && IS_LIMIT_CHAR(s[2]))
useTemp = False;
else if (s[1] == 'D' && s[2] == '=')
{
size_t num;
s += 3;
num = s2 - s;
if (num <= MAX_PATH)
{
wcsncpy(workDir, s, num);
workDir[num] = 0;
RemoveQuotes(workDir);
useTemp = False;
error = False;
}
}
else if (cmd[1] == 'N')
{
if (cmd[2] == 0)
{
useTemp = False;
error = False;
}
}
else if (cmd[1] == 'D' && cmd[2] == '=')
{
wcscpy(workDir, cmd + 3);
// RemoveQuotes(workDir);
useTemp = False;
error = False;
}
}
s = s2;
}
if (error && cmdError[0] == 0)
wcscpy(cmdError, cmd);
}
}
if (cmdError[0] != 0)
{
if (!g_SilentMode)
PrintErrorMessage("Unsupported command:", cmdError);
return 1;
}
}
{
wchar_t *name;
@@ -925,7 +1008,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
for (i = 0; i < 100; i++, d += GetTickCount())
{
wcscpy(path + pathLen, L"7z");
CpyAscii(path + pathLen, "7z");
{
wchar_t *s = path + wcslen(path);
@@ -944,7 +1027,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
continue;
if (CreateDirectoryW(path, NULL))
{
wcscat(path, WSTRING_PATH_SEPARATOR);
CatAscii(path, STRING_PATH_SEPARATOR);
wcscpy(tempPath, path);
break;
}
@@ -955,7 +1038,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
if (tempPath[0] != 0)
{
wcscpy(copyPath, tempPath);
wcscat(copyPath, L"Uninst.exe"); // we need not "Uninstall.exe" here
CatAscii(copyPath, "Uninst.exe"); // we need not "Uninstall.exe" here
if (CopyFileW(modulePath, copyPath, TRUE))
{
@@ -969,7 +1052,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
// maybe CreateProcess supports path with spaces even without quotes.
AddPathParam(cmdLine, copyPath);
wcscat(cmdLine, L" /N /D=");
CatAscii(cmdLine, " /N /D=");
AddPathParam(cmdLine, modulePrefix);
if (cmdParams[0] != 0 && wcslen(cmdParams) < MAX_PATH * 2 + 10)
@@ -1066,7 +1149,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
SetDlgItemTextW(g_HWND, IDOK, L"Close");
EnableWindow(GetDlgItem(g_HWND, IDOK), TRUE);
EnableWindow(GetDlgItem(g_HWND, IDCANCEL), FALSE);
SendMessage(g_HWND, WM_NEXTDLGCTL, (WPARAM)GetDlgItem(g_HWND, IDOK), TRUE);
SendMessage(g_HWND, WM_NEXTDLGCTL, (WPARAM)(void *)GetDlgItem(g_HWND, IDOK), TRUE);
}
}
}

View File

@@ -1,5 +1,5 @@
/* LzmaUtil.c -- Test application for LZMA compression
2018-07-04 : Igor Pavlov : Public domain */
2021-02-15 : Igor Pavlov : Public domain */
#include "../../Precomp.h"
@@ -15,9 +15,9 @@
#include "../../LzmaDec.h"
#include "../../LzmaEnc.h"
static const char * const kCantReadMessage = "Can not read input file";
static const char * const kCantWriteMessage = "Can not write output file";
static const char * const kCantAllocateMessage = "Can not allocate memory";
static const char * const kCantReadMessage = "Cannot read input file";
static const char * const kCantWriteMessage = "Cannot write output file";
static const char * const kCantAllocateMessage = "Cannot allocate memory";
static const char * const kDataErrorMessage = "Data error";
static void PrintHelp(char *buffer)
@@ -37,9 +37,25 @@ static int PrintError(char *buffer, const char *message)
return 1;
}
static int PrintError_WRes(char *buffer, const char *message, WRes wres)
{
strcat(buffer, "\nError: ");
strcat(buffer, message);
sprintf(buffer + strlen(buffer), "\nSystem error code: %d", (unsigned)wres);
#ifndef _WIN32
{
const char *s = strerror(wres);
if (s)
sprintf(buffer + strlen(buffer), " : %s", s);
}
#endif
strcat(buffer, "\n");
return 1;
}
static int PrintErrorNumber(char *buffer, SRes val)
{
sprintf(buffer + strlen(buffer), "\nError code: %x\n", (unsigned)val);
sprintf(buffer + strlen(buffer), "\n7-Zip error code: %d\n", (unsigned)val);
return 1;
}
@@ -181,9 +197,11 @@ static int main2(int numArgs, const char *args[], char *rs)
FileSeqInStream_CreateVTable(&inStream);
File_Construct(&inStream.file);
inStream.wres = 0;
FileOutStream_CreateVTable(&outStream);
File_Construct(&outStream.file);
outStream.wres = 0;
if (numArgs == 1)
{
@@ -206,14 +224,19 @@ static int main2(int numArgs, const char *args[], char *rs)
return PrintError(rs, "Incorrect UInt32 or UInt64");
}
if (InFile_Open(&inStream.file, args[2]) != 0)
return PrintError(rs, "Can not open input file");
{
WRes wres = InFile_Open(&inStream.file, args[2]);
if (wres != 0)
return PrintError_WRes(rs, "Cannot open input file", wres);
}
if (numArgs > 3)
{
WRes wres;
useOutFile = True;
if (OutFile_Open(&outStream.file, args[3]) != 0)
return PrintError(rs, "Can not open output file");
wres = OutFile_Open(&outStream.file, args[3]);
if (wres != 0)
return PrintError_WRes(rs, "Cannot open output file", wres);
}
else if (encodeMode)
PrintUserError(rs);
@@ -221,7 +244,9 @@ static int main2(int numArgs, const char *args[], char *rs)
if (encodeMode)
{
UInt64 fileSize;
File_GetLength(&inStream.file, &fileSize);
WRes wres = File_GetLength(&inStream.file, &fileSize);
if (wres != 0)
return PrintError_WRes(rs, "Cannot get file length", wres);
res = Encode(&outStream.vt, &inStream.vt, fileSize, rs);
}
else
@@ -240,9 +265,9 @@ static int main2(int numArgs, const char *args[], char *rs)
else if (res == SZ_ERROR_DATA)
return PrintError(rs, kDataErrorMessage);
else if (res == SZ_ERROR_WRITE)
return PrintError(rs, kCantWriteMessage);
return PrintError_WRes(rs, kCantWriteMessage, outStream.wres);
else if (res == SZ_ERROR_READ)
return PrintError(rs, kCantReadMessage);
return PrintError_WRes(rs, kCantReadMessage, inStream.wres);
return PrintErrorNumber(rs, res);
}
return 0;

View File

@@ -1,44 +1,19 @@
PROG = lzma
CXX = g++
LIB =
RM = rm -f
CFLAGS = -c -O2 -Wall -D_7ZIP_ST
PROG = 7lzma
include ../../../CPP/7zip/LzmaDec_gcc.mak
OBJS = \
LzmaUtil.o \
Alloc.o \
LzFind.o \
LzmaDec.o \
LzmaEnc.o \
7zFile.o \
7zStream.o \
$(LZMA_DEC_OPT_OBJS) \
$O/7zFile.o \
$O/7zStream.o \
$O/Alloc.o \
$O/LzFind.o \
$O/LzFindMt.o \
$O/LzmaDec.o \
$O/LzmaEnc.o \
$O/LzmaUtil.o \
$O/Threads.o \
all: $(PROG)
$(PROG): $(OBJS)
$(CXX) -o $(PROG) $(LDFLAGS) $(OBJS) $(LIB) $(LIB2)
LzmaUtil.o: LzmaUtil.c
$(CXX) $(CFLAGS) LzmaUtil.c
Alloc.o: ../../Alloc.c
$(CXX) $(CFLAGS) ../../Alloc.c
LzFind.o: ../../LzFind.c
$(CXX) $(CFLAGS) ../../LzFind.c
LzmaDec.o: ../../LzmaDec.c
$(CXX) $(CFLAGS) ../../LzmaDec.c
LzmaEnc.o: ../../LzmaEnc.c
$(CXX) $(CFLAGS) ../../LzmaEnc.c
7zFile.o: ../../7zFile.c
$(CXX) $(CFLAGS) ../../7zFile.c
7zStream.o: ../../7zStream.c
$(CXX) $(CFLAGS) ../../7zStream.c
clean:
-$(RM) $(PROG) $(OBJS)
include ../../7zip_gcc_c.mak

4
C/Xz.c
View File

@@ -1,5 +1,5 @@
/* Xz.c - Xz
2017-05-12 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -41,7 +41,7 @@ void Xz_Free(CXzStream *p, ISzAllocPtr alloc)
unsigned XzFlags_GetCheckSize(CXzStreamFlags f)
{
unsigned t = XzFlags_GetCheckType(f);
return (t == 0) ? 0 : (4 << ((t - 1) / 3));
return (t == 0) ? 0 : ((unsigned)4 << ((t - 1) / 3));
}
void XzCheck_Init(CXzCheck *p, unsigned mode)

107
C/Xz.h
View File

@@ -1,5 +1,5 @@
/* Xz.h - Xz interface
2018-07-04 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#ifndef __XZ_H
#define __XZ_H
@@ -47,7 +47,7 @@ typedef struct
CXzFilter filters[XZ_NUM_FILTERS_MAX];
} CXzBlock;
#define XzBlock_GetNumFilters(p) (((p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
#define XzBlock_GetNumFilters(p) (((unsigned)(p)->flags & XZ_BF_NUM_FILTERS_MASK) + 1)
#define XzBlock_HasPackSize(p) (((p)->flags & XZ_BF_PACK_SIZE) != 0)
#define XzBlock_HasUnpackSize(p) (((p)->flags & XZ_BF_UNPACK_SIZE) != 0)
#define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
@@ -277,8 +277,11 @@ void XzUnpacker_Free(CXzUnpacker *p);
{
XzUnpacker_Init()
for()
{
XzUnpacker_Code();
}
XzUnpacker_IsStreamWasFinished()
}
Interface-2 : Direct output buffer:
Use it, if you know exact size of decoded data, and you need
@@ -288,14 +291,18 @@ void XzUnpacker_Free(CXzUnpacker *p);
XzUnpacker_Init()
XzUnpacker_SetOutBufMode(); // to set output buffer and size
for()
{
XzUnpacker_Code(); // (dest = NULL) in XzUnpacker_Code()
}
XzUnpacker_IsStreamWasFinished()
}
Interface-3 : Direct output buffer : One call full decoding
It unpacks whole input buffer to output buffer in one call.
It uses Interface-2 internally.
{
XzUnpacker_CodeFull()
XzUnpacker_IsStreamWasFinished()
}
*/
@@ -309,8 +316,12 @@ Returns:
SZ_OK
status:
CODER_STATUS_NOT_FINISHED,
CODER_STATUS_NEEDS_MORE_INPUT - maybe there are more xz streams,
call XzUnpacker_IsStreamWasFinished to check that current stream was finished
CODER_STATUS_NEEDS_MORE_INPUT - the decoder can return it in two cases:
1) it needs more input data to finish current xz stream
2) xz stream was finished successfully. But the decoder supports multiple
concatented xz streams. So it expects more input data for new xz streams.
Call XzUnpacker_IsStreamWasFinished() to check that latest xz stream was finished successfully.
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_DATA - Data error
SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
@@ -335,12 +346,17 @@ SRes XzUnpacker_CodeFull(CXzUnpacker *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen,
ECoderFinishMode finishMode, ECoderStatus *status);
/*
If you decode full xz stream(s), then you can call XzUnpacker_IsStreamWasFinished()
after successful XzUnpacker_CodeFull() or after last call of XzUnpacker_Code().
*/
BoolInt XzUnpacker_IsStreamWasFinished(const CXzUnpacker *p);
/*
XzUnpacker_GetExtraSize() returns then number of uncofirmed bytes,
XzUnpacker_GetExtraSize() returns then number of unconfirmed bytes,
if it's in (XZ_STATE_STREAM_HEADER) state or in (XZ_STATE_STREAM_PADDING) state.
These bytes can be some bytes after xz archive, or
These bytes can be some data after xz archive, or
it can be start of new xz stream.
Call XzUnpacker_GetExtraSize() after XzUnpacker_Code() function to detect real size of
@@ -371,19 +387,46 @@ BoolInt XzUnpacker_IsBlockFinished(const CXzUnpacker *p);
/* ---------- Multi Threading Decoding ---------- */
/* ---- Single-Thread and Multi-Thread xz Decoding with Input/Output Streams ---- */
/*
if (CXzDecMtProps::numThreads > 1), the decoder can try to use
Multi-Threading. The decoder analyses xz block header, and if
there are pack size and unpack size values stored in xz block header,
the decoder reads compressed data of block to internal buffers,
and then it can start parallel decoding, if there are another blocks.
The decoder can switch back to Single-Thread decoding after some conditions.
The sequence of calls for xz decoding with in/out Streams:
{
XzDecMt_Create()
XzDecMtProps_Init(XzDecMtProps) to set default values of properties
// then you can change some XzDecMtProps parameters with required values
// here you can set the number of threads and (memUseMax) - the maximum
Memory usage for multithreading decoding.
for()
{
XzDecMt_Decode() // one call per one file
}
XzDecMt_Destroy()
}
*/
typedef struct
{
size_t inBufSize_ST;
size_t outStep_ST;
BoolInt ignoreErrors;
size_t inBufSize_ST; // size of input buffer for Single-Thread decoding
size_t outStep_ST; // size of output buffer for Single-Thread decoding
BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
#ifndef _7ZIP_ST
unsigned numThreads;
size_t inBufSize_MT;
size_t memUseMax;
unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding.
// it's recommended to set (memUseMax) manually to value that is smaller of total size of RAM in computer.
#endif
} CXzDecMtProps;
@@ -393,7 +436,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p);
typedef void * CXzDecMtHandle;
/*
alloc : XzDecMt uses CAlignOffsetAlloc for addresses allocated by (alloc).
alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
allocMid : for big allocations, aligned allocation is better
*/
@@ -407,33 +450,46 @@ typedef struct
Byte NumStreams_Defined;
Byte NumBlocks_Defined;
Byte DataAfterEnd;
Byte DataAfterEnd; // there are some additional data after good xz streams, and that data is not new xz stream.
Byte DecodingTruncated; // Decoding was Truncated, we need only partial output data
UInt64 InSize; // pack size processed
UInt64 InSize; // pack size processed. That value doesn't include the data after
// end of xz stream, if that data was not correct
UInt64 OutSize;
UInt64 NumStreams;
UInt64 NumBlocks;
SRes DecodeRes;
SRes ReadRes;
SRes ProgressRes;
SRes CombinedRes;
SRes CombinedRes_Type;
SRes DecodeRes; // the error code of xz streams data decoding
SRes ReadRes; // error code from ISeqInStream:Read()
SRes ProgressRes; // error code from ICompressProgress:Progress()
SRes CombinedRes; // Combined result error code that shows main rusult
// = S_OK, if there is no error.
// but check also (DataAfterEnd) that can show additional minor errors.
SRes CombinedRes_Type; // = SZ_ERROR_READ, if error from ISeqInStream
// = SZ_ERROR_PROGRESS, if error from ICompressProgress
// = SZ_ERROR_WRITE, if error from ISeqOutStream
// = SZ_ERROR_* codes for decoding
} CXzStatInfo;
void XzStatInfo_Clear(CXzStatInfo *p);
/*
XzDecMt_Decode()
SRes:
SZ_OK - OK
SRes: it's combined decoding result. It also is equal to stat->CombinedRes.
SZ_OK - no error
check also output value in (stat->DataAfterEnd)
that can show additional possible error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_NO_ARCHIVE - is not xz archive
SZ_ERROR_ARCHIVE - Headers error
SZ_ERROR_DATA - Data Error
SZ_ERROR_UNSUPPORTED - Unsupported method or method properties
SZ_ERROR_CRC - CRC Error
SZ_ERROR_INPUT_EOF - it needs more input data
SZ_ERROR_WRITE - ISeqOutStream error
@@ -451,8 +507,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle p,
// Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
// const Byte *inData, size_t inDataSize,
CXzStatInfo *stat,
int *isMT, // 0 means that ST (Single-Thread) version was used
CXzStatInfo *stat, // out: decoding results and statistics
int *isMT, // out: 0 means that ST (Single-Thread) version was used
// 1 means that MT (Multi-Thread) version was used
ICompressProgress *progress);
EXTERN_C_END

View File

@@ -1,5 +1,5 @@
/* XzCrc64Opt.c -- CRC64 calculation
2017-06-30 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -9,6 +9,7 @@
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
{
const Byte *p = (const Byte *)data;
@@ -16,7 +17,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
v = CRC64_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
UInt32 d = (UInt32)v ^ *(const UInt32 *)p;
UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p;
v = (v >> 32)
^ (table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
@@ -45,6 +46,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8))
UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
{
const Byte *p = (const Byte *)data;
@@ -54,7 +56,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)p;
UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p;
v = (v << 32)
^ (table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]

236
C/XzDec.c
View File

@@ -1,5 +1,5 @@
/* XzDec.c -- Xz Decode
2019-02-02 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -240,6 +240,7 @@ static SRes BraState_Code2(void *pp,
}
SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc);
SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc)
{
CBraState *decoder;
@@ -1275,9 +1276,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
}
else
{
const Byte *ptr = p->buf;
p->state = XZ_STATE_STREAM_FOOTER;
p->pos = 0;
if (CRC_GET_DIGEST(p->crc) != GetUi32(p->buf))
if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr))
return SZ_ERROR_CRC;
}
break;
@@ -1456,7 +1458,6 @@ typedef struct
ISeqInStream *inStream;
ISeqOutStream *outStream;
ICompressProgress *progress;
// CXzStatInfo *stat;
BoolInt finishMode;
BoolInt outSize_Defined;
@@ -1492,8 +1493,9 @@ typedef struct
UInt64 numBlocks;
// UInt64 numBadBlocks;
SRes mainErrorCode;
SRes mainErrorCode; // it's set to error code, if the size Code() output doesn't patch the size from Parsing stage
// it can be = SZ_ERROR_INPUT_EOF
// it can be = SZ_ERROR_DATA, in some another cases
BoolInt isBlockHeaderState_Parse;
BoolInt isBlockHeaderState_Write;
UInt64 outProcessed_Parse;
@@ -1877,7 +1879,7 @@ static SRes XzDecMt_Callback_PreCode(void *pp, unsigned coderIndex)
{
// if (res == SZ_ERROR_MEM) return res;
if (me->props.ignoreErrors && res != SZ_ERROR_MEM)
return S_OK;
return SZ_OK;
return res;
}
}
@@ -1898,15 +1900,18 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
*outCodePos = coder->outCodeSize;
*stop = True;
if (srcSize > coder->inPreSize - coder->inCodeSize)
return SZ_ERROR_FAIL;
if (coder->inCodeSize < coder->inPreHeaderSize)
{
UInt64 rem = coder->inPreHeaderSize - coder->inCodeSize;
size_t step = srcSize;
if (step > rem)
step = (size_t)rem;
size_t step = coder->inPreHeaderSize - coder->inCodeSize;
if (step > srcSize)
step = srcSize;
src += step;
srcSize -= step;
coder->inCodeSize += step;
*inCodePos = coder->inCodeSize;
if (coder->inCodeSize < coder->inPreHeaderSize)
{
*stop = False;
@@ -1956,7 +1961,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
{
*inCodePos = coder->inPreSize;
*outCodePos = coder->outPreSize;
return S_OK;
return SZ_OK;
}
return coder->codeRes;
}
@@ -1966,7 +1971,7 @@ static SRes XzDecMt_Callback_Code(void *pp, unsigned coderIndex,
static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
BoolInt needWriteToStream,
const Byte *src, size_t srcSize,
const Byte *src, size_t srcSize, BoolInt isCross,
// int srcFinished,
BoolInt *needContinue,
BoolInt *canRecode)
@@ -1985,7 +1990,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (!coder->dec.headerParsedOk || !coder->outBuf)
{
if (me->finishedDecoderIndex < 0)
me->finishedDecoderIndex = coderIndex;
me->finishedDecoderIndex = (int)coderIndex;
return SZ_OK;
}
@@ -2077,7 +2082,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (coder->codeRes != SZ_OK)
if (!me->props.ignoreErrors)
{
me->finishedDecoderIndex = coderIndex;
me->finishedDecoderIndex = (int)coderIndex;
return res;
}
@@ -2086,7 +2091,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (coder->inPreSize != coder->inCodeSize
|| coder->blockPackTotal != coder->inCodeSize)
{
me->finishedDecoderIndex = coderIndex;
me->finishedDecoderIndex = (int)coderIndex;
return SZ_OK;
}
@@ -2125,6 +2130,17 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
return SZ_OK;
}
/*
We have processed all xz-blocks of stream,
And xz unpacker is at XZ_STATE_BLOCK_HEADER state, where
(src) is a pointer to xz-Index structure.
We finish reading of current xz-Stream, including Zero padding after xz-Stream.
We exit, if we reach extra byte (first byte of new-Stream or another data).
But we don't update input stream pointer for that new extra byte.
If extra byte is not correct first byte of xz-signature,
we have SZ_ERROR_NO_ARCHIVE error here.
*/
res = XzUnpacker_Code(dec,
NULL, &outSizeCur,
src, &srcProcessed,
@@ -2132,15 +2148,23 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
CODER_FINISH_END, // CODER_FINISH_ANY,
&status);
// res = SZ_ERROR_ARCHIVE; // for failure test
me->status = status;
me->codeRes = res;
if (isCross)
me->mtc.crossStart += srcProcessed;
me->mtc.inProcessed += srcProcessed;
me->mtc.mtProgress.totalInSize = me->mtc.inProcessed;
srcSize -= srcProcessed;
src += srcProcessed;
if (res != SZ_OK)
{
return S_OK;
return SZ_OK;
// return res;
}
@@ -2149,20 +2173,26 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
*needContinue = True;
me->isBlockHeaderState_Parse = False;
me->isBlockHeaderState_Write = False;
if (!isCross)
{
Byte *crossBuf = MtDec_GetCrossBuff(&me->mtc);
if (!crossBuf)
return SZ_ERROR_MEM;
memcpy(crossBuf, src + srcProcessed, srcSize - srcProcessed);
}
if (srcSize != 0)
memcpy(crossBuf, src, srcSize);
me->mtc.crossStart = 0;
me->mtc.crossEnd = srcSize - srcProcessed;
me->mtc.crossEnd = srcSize;
}
PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd);
return SZ_OK;
}
if (status != CODER_STATUS_NEEDS_MORE_INPUT)
if (status != CODER_STATUS_NEEDS_MORE_INPUT || srcSize != 0)
{
return E_FAIL;
return SZ_ERROR_FAIL;
}
if (me->mtc.readWasFinished)
@@ -2174,7 +2204,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
{
size_t inPos;
size_t inLim;
const Byte *inData;
// const Byte *inData;
UInt64 inProgressPrev = me->mtc.inProcessed;
// XzDecMt_Prepare_InBuf_ST(p);
@@ -2184,9 +2214,8 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
inPos = 0;
inLim = 0;
// outProcessed = 0;
inData = crossBuf;
// inData = crossBuf;
for (;;)
{
@@ -2201,7 +2230,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
{
inPos = 0;
inLim = me->mtc.inBufSize;
me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)inData, &inLim);
me->mtc.readRes = ISeqInStream_Read(me->inStream, (void *)crossBuf, &inLim);
me->mtc.readProcessed += inLim;
if (inLim == 0 || me->mtc.readRes != SZ_OK)
me->mtc.readWasFinished = True;
@@ -2213,7 +2242,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
res = XzUnpacker_Code(dec,
NULL, &outProcessed,
inData + inPos, &inProcessed,
crossBuf + inPos, &inProcessed,
(inProcessed == 0), // srcFinished
CODER_FINISH_END, &status);
@@ -2225,7 +2254,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
if (res != SZ_OK)
{
return S_OK;
return SZ_OK;
// return res;
}
@@ -2240,7 +2269,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
}
if (status != CODER_STATUS_NEEDS_MORE_INPUT)
return E_FAIL;
return SZ_ERROR_FAIL;
if (me->mtc.progress)
{
@@ -2276,13 +2305,6 @@ void XzStatInfo_Clear(CXzStatInfo *p)
p->NumStreams_Defined = False;
p->NumBlocks_Defined = False;
// p->IsArc = False;
// p->UnexpectedEnd = False;
// p->Unsupported = False;
// p->HeadersError = False;
// p->DataError = False;
// p->CrcError = False;
p->DataAfterEnd = False;
p->DecodingTruncated = False;
@@ -2296,6 +2318,16 @@ void XzStatInfo_Clear(CXzStatInfo *p)
/*
XzDecMt_Decode_ST() can return SZ_OK or the following errors
- SZ_ERROR_MEM for memory allocation error
- error from XzUnpacker_Code() function
- SZ_ERROR_WRITE for ISeqOutStream::Write(). stat->CombinedRes_Type = SZ_ERROR_WRITE in that case
- ICompressProgress::Progress() error, stat->CombinedRes_Type = SZ_ERROR_PROGRESS.
But XzDecMt_Decode_ST() doesn't return ISeqInStream::Read() errors.
ISeqInStream::Read() result is set to p->readRes.
also it can set stat->CombinedRes_Type to SZ_ERROR_WRITE or SZ_ERROR_PROGRESS.
*/
static SRes XzDecMt_Decode_ST(CXzDecMt *p
#ifndef _7ZIP_ST
@@ -2384,7 +2416,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
inPos = 0;
inLim = p->inBufSize;
inData = p->inBuf;
p->readRes = ISeqInStream_Read(p->inStream, (void *)inData, &inLim);
p->readRes = ISeqInStream_Read(p->inStream, (void *)p->inBuf, &inLim);
p->readProcessed += inLim;
if (inLim == 0 || p->readRes != SZ_OK)
p->readWasFinished = True;
@@ -2426,8 +2458,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (finished || outProcessed >= outSize)
if (outPos != 0)
{
size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos);
p->outProcessed += written;
const size_t written = ISeqOutStream_Write(p->outStream, p->outBuf, outPos);
// p->outProcessed += written; // 21.01: BUG fixed
if (written != outPos)
{
stat->CombinedRes_Type = SZ_ERROR_WRITE;
@@ -2438,9 +2470,8 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (p->progress && res == SZ_OK)
{
UInt64 inDelta = p->inProcessed - inPrev;
UInt64 outDelta = p->outProcessed - outPrev;
if (inDelta >= (1 << 22) || outDelta >= (1 << 22))
if (p->inProcessed - inPrev >= (1 << 22) ||
p->outProcessed - outPrev >= (1 << 22))
{
res = ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed);
if (res != SZ_OK)
@@ -2455,14 +2486,31 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
}
if (finished)
return res;
{
// p->codeRes is preliminary error from XzUnpacker_Code.
// and it can be corrected later as final result
// so we return SZ_OK here instead of (res);
return SZ_OK;
// return res;
}
}
}
static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
/*
XzStatInfo_SetStat() transforms
CXzUnpacker return code and status to combined CXzStatInfo results.
it can convert SZ_OK to SZ_ERROR_INPUT_EOF
it can convert SZ_ERROR_NO_ARCHIVE to SZ_OK and (DataAfterEnd = 1)
*/
static void XzStatInfo_SetStat(const CXzUnpacker *dec,
int finishMode,
UInt64 readProcessed, UInt64 inProcessed,
SRes res, ECoderStatus status,
// UInt64 readProcessed,
UInt64 inProcessed,
SRes res, // it's result from CXzUnpacker unpacker
ECoderStatus status,
BoolInt decodingTruncated,
CXzStatInfo *stat)
{
@@ -2484,37 +2532,50 @@ static SRes XzStatInfo_SetStat(const CXzUnpacker *dec,
if (status == CODER_STATUS_NEEDS_MORE_INPUT)
{
// CODER_STATUS_NEEDS_MORE_INPUT is expected status for correct xz streams
// any extra data is part of correct data
extraSize = 0;
// if xz stream was not finished, then we need more data
if (!XzUnpacker_IsStreamWasFinished(dec))
res = SZ_ERROR_INPUT_EOF;
}
else if (!decodingTruncated || finishMode) // (status == CODER_STATUS_NOT_FINISHED)
else
{
// CODER_STATUS_FINISHED_WITH_MARK is not possible for multi stream xz decoding
// so he we have (status == CODER_STATUS_NOT_FINISHED)
// if (status != CODER_STATUS_FINISHED_WITH_MARK)
if (!decodingTruncated || finishMode)
res = SZ_ERROR_DATA;
}
}
else if (res == SZ_ERROR_NO_ARCHIVE)
{
/*
SZ_ERROR_NO_ARCHIVE is possible for 2 states:
XZ_STATE_STREAM_HEADER - if bad signature or bad CRC
XZ_STATE_STREAM_PADDING - if non-zero padding data
extraSize / inProcessed don't include "bad" byte
extraSize and inProcessed don't include "bad" byte
*/
if (inProcessed != extraSize) // if good streams before error
if (extraSize != 0 || readProcessed != inProcessed)
// if (inProcessed == extraSize), there was no any good xz stream header, and we keep error
if (inProcessed != extraSize) // if there were good xz streams before error
{
// if (extraSize != 0 || readProcessed != inProcessed)
{
// he we suppose that all xz streams were finsihed OK, and we have
// some extra data after all streams
stat->DataAfterEnd = True;
// there is some good xz stream before. So we set SZ_OK
res = SZ_OK;
}
}
}
if (stat->DecodeRes == SZ_OK)
stat->DecodeRes = res;
stat->InSize -= extraSize;
return res;
}
SRes XzDecMt_Decode(CXzDecMtHandle pp,
const CXzDecMtProps *props,
const UInt64 *outDataSize, int finishMode,
@@ -2557,8 +2618,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
p->inProcessed = 0;
p->readProcessed = 0;
p->readWasFinished = False;
p->readRes = SZ_OK;
p->codeRes = 0;
p->codeRes = SZ_OK;
p->status = CODER_STATUS_NOT_SPECIFIED;
XzUnpacker_Init(&p->dec);
@@ -2589,8 +2651,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
if (p->props.numThreads > 1)
{
IMtDecCallback vt;
IMtDecCallback2 vt;
BoolInt needContinue;
SRes res;
// we just free ST buffers here
// but we still keep state variables, that was set in XzUnpacker_Init()
XzDecMt_FreeSt(p);
@@ -2628,10 +2691,9 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
vt.Code = XzDecMt_Callback_Code;
vt.Write = XzDecMt_Callback_Write;
{
BoolInt needContinue;
SRes res = MtDec_Code(&p->mtc);
res = MtDec_Code(&p->mtc);
stat->InSize = p->mtc.inProcessed;
@@ -2656,17 +2718,18 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
}
if (!needContinue)
{
{
SRes codeRes;
BoolInt truncated = False;
ECoderStatus status;
CXzUnpacker *dec;
const CXzUnpacker *dec;
stat->OutSize = p->outProcessed;
if (p->finishedDecoderIndex >= 0)
{
CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex];
const CXzDecMtThread *coder = &p->coders[(unsigned)p->finishedDecoderIndex];
codeRes = coder->codeRes;
dec = &coder->dec;
status = coder->status;
@@ -2679,31 +2742,37 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
truncated = p->parsing_Truncated;
}
else
return E_FAIL;
return SZ_ERROR_FAIL;
if (p->mainErrorCode != SZ_OK)
stat->DecodeRes = p->mainErrorCode;
XzStatInfo_SetStat(dec, p->finishMode,
p->mtc.readProcessed, p->mtc.inProcessed,
// p->mtc.readProcessed,
p->mtc.inProcessed,
codeRes, status,
truncated,
stat);
}
if (res == SZ_OK)
{
stat->ReadRes = p->mtc.readRes;
if (p->writeRes != SZ_OK)
{
res = p->writeRes;
stat->CombinedRes_Type = SZ_ERROR_WRITE;
}
else if (p->mtc.readRes != SZ_OK && p->mtc.inProcessed == p->mtc.readProcessed)
else if (p->mtc.readRes != SZ_OK
// && p->mtc.inProcessed == p->mtc.readProcessed
&& stat->DecodeRes == SZ_ERROR_INPUT_EOF)
{
res = p->mtc.readRes;
stat->ReadRes = res;
stat->CombinedRes_Type = SZ_ERROR_READ;
}
else if (p->mainErrorCode != SZ_OK)
{
res = p->mainErrorCode;
}
else if (stat->DecodeRes != SZ_OK)
res = stat->DecodeRes;
}
stat->CombinedRes = res;
@@ -2714,7 +2783,6 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
PRF_STR("----- decoding ST -----");
}
}
#endif
@@ -2729,33 +2797,35 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
, stat
);
#ifndef _7ZIP_ST
// we must set error code from MT decoding at first
if (p->mainErrorCode != SZ_OK)
stat->DecodeRes = p->mainErrorCode;
#endif
XzStatInfo_SetStat(&p->dec,
p->finishMode,
p->readProcessed, p->inProcessed,
// p->readProcessed,
p->inProcessed,
p->codeRes, p->status,
False, // truncated
stat);
stat->ReadRes = p->readRes;
if (res == SZ_OK)
{
/*
if (p->writeRes != SZ_OK)
{
res = p->writeRes;
stat->CombinedRes_Type = SZ_ERROR_WRITE;
}
else
*/
if (p->readRes != SZ_OK && p->inProcessed == p->readProcessed)
if (p->readRes != SZ_OK
// && p->inProcessed == p->readProcessed
&& stat->DecodeRes == SZ_ERROR_INPUT_EOF)
{
// we set read error as combined error, only if that error was the reason
// of decoding problem
res = p->readRes;
stat->ReadRes = res;
stat->CombinedRes_Type = SZ_ERROR_READ;
}
#ifndef _7ZIP_ST
else if (p->mainErrorCode != SZ_OK)
res = p->mainErrorCode;
#endif
else if (stat->DecodeRes != SZ_OK)
res = stat->DecodeRes;
}
stat->CombinedRes = res;

View File

@@ -1,5 +1,5 @@
/* XzEnc.c -- Xz Encode
2019-02-02 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -36,7 +36,7 @@
#define XzBlock_ClearFlags(p) (p)->flags = 0;
#define XzBlock_SetNumFilters(p, n) (p)->flags |= ((n) - 1);
#define XzBlock_SetNumFilters(p, n) (p)->flags = (Byte)((p)->flags | ((n) - 1));
#define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE;
#define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE;
@@ -552,7 +552,7 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p)
numBlocks++;
if (numBlocks < (unsigned)t2)
{
t2r = (unsigned)numBlocks;
t2r = (int)numBlocks;
if (t2r == 0)
t2r = 1;
t3 = t1 * t2r;
@@ -751,7 +751,8 @@ static SRes Xz_CompressBlock(
}
else if (fp->ipDefined)
{
SetUi32(filter->props, fp->ip);
Byte *ptr = filter->props;
SetUi32(ptr, fp->ip);
filter->propsSize = 4;
}
}
@@ -1196,7 +1197,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
p->outBufSize = destBlockSize;
}
p->mtCoder.numThreadsMax = props->numBlockThreads_Max;
p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize;
RINOK(MtCoder_Code(&p->mtCoder));

View File

@@ -1,5 +1,5 @@
/* XzIn.c - Xz input
2018-07-04 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -152,7 +152,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
{
UInt64 indexSize;
Byte buf[XZ_STREAM_FOOTER_SIZE];
UInt64 pos = *startOffset;
UInt64 pos = (UInt64)*startOffset;
if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE)
return SZ_ERROR_NO_ARCHIVE;
@@ -202,8 +202,13 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
if (!XzFlags_IsSupported(p->flags))
return SZ_ERROR_UNSUPPORTED;
if (GetUi32(buf) != CrcCalc(buf + 4, 6))
{
/* to eliminate GCC 6.3 warning:
dereferencing type-punned pointer will break strict-aliasing rules */
const Byte *buf_ptr = buf;
if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6))
return SZ_ERROR_ARCHIVE;
}
indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2;
@@ -222,7 +227,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
return SZ_ERROR_ARCHIVE;
pos -= (totalSize + XZ_STREAM_HEADER_SIZE);
RINOK(LookInStream_SeekTo(stream, pos));
*startOffset = pos;
*startOffset = (Int64)pos;
}
{
CXzStreamFlags headerFlags;
@@ -294,12 +299,12 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
SRes res;
Xz_Construct(&st);
res = Xz_ReadBackward(&st, stream, startOffset, alloc);
st.startOffset = *startOffset;
st.startOffset = (UInt64)*startOffset;
RINOK(res);
if (p->num == p->numAllocated)
{
size_t newNum = p->num + p->num / 4 + 1;
Byte *data = (Byte *)ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
const size_t newNum = p->num + p->num / 4 + 1;
void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
if (!data)
return SZ_ERROR_MEM;
p->numAllocated = newNum;
@@ -311,8 +316,8 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
p->streams[p->num++] = st;
if (*startOffset == 0)
break;
RINOK(LookInStream_SeekTo(stream, *startOffset));
if (progress && ICompressProgress_Progress(progress, endOffset - *startOffset, (UInt64)(Int64)-1) != SZ_OK)
RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset));
if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK)
return SZ_ERROR_PROGRESS;
}
return SZ_OK;

11
C/var_clang.mak Normal file
View File

@@ -0,0 +1,11 @@
PLATFORM=
O=b/c
IS_X64=
IS_X86=
IS_ARM64=
CROSS_COMPILE=
MY_ARCH=
USE_ASM=
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1

11
C/var_clang_arm64.mak Normal file
View File

@@ -0,0 +1,11 @@
PLATFORM=arm64
O=b/c_$(PLATFORM)
IS_X64=
IS_X86=
IS_ARM64=1
CROSS_COMPILE=
MY_ARCH=
USE_ASM=1
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1

12
C/var_clang_x64.mak Normal file
View File

@@ -0,0 +1,12 @@
PLATFORM=x64
O=b/c_$(PLATFORM)
IS_X64=1
IS_X86=
IS_ARM64=
CROSS_COMPILE=
MY_ARCH=
USE_ASM=1
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1

12
C/var_clang_x86.mak Normal file
View File

@@ -0,0 +1,12 @@
PLATFORM=x86
O=b/c_$(PLATFORM)
IS_X64=
IS_X86=1
IS_ARM64=
CROSS_COMPILE=
MY_ARCH=-m32
USE_ASM=1
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1

12
C/var_gcc.mak Normal file
View File

@@ -0,0 +1,12 @@
PLATFORM=
O=b/g
IS_X64=
IS_X86=
IS_ARM64=
CROSS_COMPILE=
MY_ARCH=
USE_ASM=
CC=$(CROSS_COMPILE)gcc
CXX=$(CROSS_COMPILE)g++
# -march=armv8-a+crc+crypto

12
C/var_gcc_arm64.mak Normal file
View File

@@ -0,0 +1,12 @@
PLATFORM=arm64
O=b/g_$(PLATFORM)
IS_X64=
IS_X86=
IS_ARM64=1
CROSS_COMPILE=
MY_ARCH=-mtune=cortex-a53
USE_ASM=1
CC=$(CROSS_COMPILE)gcc
CXX=$(CROSS_COMPILE)g++
# -march=armv8-a+crc+crypto

10
C/var_gcc_x64.mak Normal file
View File

@@ -0,0 +1,10 @@
PLATFORM=x64
O=b/g_$(PLATFORM)
IS_X64=1
IS_X86=
IS_ARM64=
CROSS_COMPILE=
MY_ARCH=
USE_ASM=1
CC=$(CROSS_COMPILE)gcc
CXX=$(CROSS_COMPILE)g++

11
C/var_gcc_x86.mak Normal file
View File

@@ -0,0 +1,11 @@
PLATFORM=x86
O=b/g_$(PLATFORM)
IS_X64=
IS_X86=1
IS_ARM64=
CROSS_COMPILE=
MY_ARCH=-m32
USE_ASM=1
CC=$(CROSS_COMPILE)gcc
CXX=$(CROSS_COMPILE)g++

11
C/var_mac_arm64.mak Normal file
View File

@@ -0,0 +1,11 @@
PLATFORM=arm64
O=b/m_$(PLATFORM)
IS_X64=
IS_X86=
IS_ARM64=1
CROSS_COMPILE=
MY_ARCH=-arch arm64
USE_ASM=1
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1

11
C/var_mac_x64.mak Normal file
View File

@@ -0,0 +1,11 @@
PLATFORM=x64
O=b/m_$(PLATFORM)
IS_X64=1
IS_X86=
IS_ARM64=
CROSS_COMPILE=
MY_ARCH=-arch x86_64
USE_ASM=
CC=$(CROSS_COMPILE)clang
CXX=$(CROSS_COMPILE)clang++
USE_CLANG=1

37
C/warn_clang.mak Normal file
View File

@@ -0,0 +1,37 @@
CFLAGS_WARN_CLANG_3_8_UNIQ = \
-Wno-reserved-id-macro \
-Wno-old-style-cast \
-Wno-c++11-long-long \
-Wno-unused-macros \
CFLAGS_WARN_CLANG_3_8 = \
$(CFLAGS_WARN_CLANG_3_8_UNIQ) \
-Weverything \
-Wno-extra-semi \
-Wno-sign-conversion \
-Wno-language-extension-token \
-Wno-global-constructors \
-Wno-non-virtual-dtor \
-Wno-switch-enum \
-Wno-covered-switch-default \
-Wno-cast-qual \
-Wno-padded \
-Wno-exit-time-destructors \
-Wno-weak-vtables \
CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \
-Wno-extra-semi-stmt \
-Wno-zero-as-null-pointer-constant \
-Wno-deprecated-dynamic-exception-spec \
-Wno-c++98-compat-pedantic \
-Wno-atomic-implicit-seq-cst \
-Wconversion \
-Wno-sign-conversion \
CFLAGS_WARN_1 = \
-Wno-deprecated-copy-dtor \
CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_1)

37
C/warn_clang_mac.mak Normal file
View File

@@ -0,0 +1,37 @@
CFLAGS_WARN_CLANG_3_8_UNIQ = \
-Wno-reserved-id-macro \
-Wno-old-style-cast \
-Wno-c++11-long-long \
-Wno-unused-macros \
CFLAGS_WARN_CLANG_3_8 = \
$(CFLAGS_WARN_CLANG_3_8_UNIQ) \
-Weverything \
-Wno-extra-semi \
-Wno-sign-conversion \
-Wno-language-extension-token \
-Wno-global-constructors \
-Wno-non-virtual-dtor \
-Wno-switch-enum \
-Wno-covered-switch-default \
-Wno-cast-qual \
-Wno-padded \
-Wno-exit-time-destructors \
-Wno-weak-vtables \
CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \
-Wno-extra-semi-stmt \
-Wno-zero-as-null-pointer-constant \
-Wno-deprecated-dynamic-exception-spec \
-Wno-c++98-compat-pedantic \
-Wno-atomic-implicit-seq-cst \
-Wconversion \
-Wno-sign-conversion \
CFLAGS_WARN_MAC = \
-Wno-poison-system-directories \
-Wno-c++11-long-long \
-Wno-atomic-implicit-seq-cst \
CFLAGS_WARN = $(CFLAGS_WARN_CLANG_12) $(CFLAGS_WARN_MAC)

53
C/warn_gcc.mak Normal file
View File

@@ -0,0 +1,53 @@
CFLAGS_WARN_GCC_4_5 = \
CFLAGS_WARN_GCC_6 = \
-Waddress \
-Waggressive-loop-optimizations \
-Wattributes \
-Wbool-compare \
-Wcast-align \
-Wcomment \
-Wdiv-by-zero \
-Wduplicated-cond \
-Wformat-contains-nul \
-Winit-self \
-Wint-to-pointer-cast \
-Wunused \
-Wunused-macros \
# -Wno-strict-aliasing
CFLAGS_WARN_GCC_9 = \
-Waddress \
-Waddress-of-packed-member \
-Waggressive-loop-optimizations \
-Wattributes \
-Wbool-compare \
-Wbool-operation \
-Wcast-align \
-Wcast-align=strict \
-Wcomment \
-Wdangling-else \
-Wdiv-by-zero \
-Wduplicated-branches \
-Wduplicated-cond \
-Wformat-contains-nul \
-Wimplicit-fallthrough=5 \
-Winit-self \
-Wint-in-bool-context \
-Wint-to-pointer-cast \
-Wunused \
-Wunused-macros \
-Wconversion \
# -Wno-sign-conversion \
CFLAGS_WARN_GCC_PPMD_UNALIGNED = \
-Wno-strict-aliasing \
CFLAGS_WARN = $(CFLAGS_WARN_GCC_9) \
# $(CFLAGS_WARN_GCC_PPMD_UNALIGNED)

1183
CPP/7zip/7zip_gcc.mak Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,10 @@
C_OBJS = $(C_OBJS) \
$O\Aes.obj
!IF "$(PLATFORM)" != "ia64" && "$(PLATFORM)" != "mips" && "$(PLATFORM)" != "arm" && "$(PLATFORM)" != "arm64"
!IF defined(USE_C_AES) || "$(PLATFORM)" == "arm" || "$(PLATFORM)" == "arm64"
C_OBJS = $(C_OBJS) \
$O\AesOpt.obj
!ELSEIF "$(PLATFORM)" != "ia64" && "$(PLATFORM)" != "mips" && "$(PLATFORM)" != "arm" && "$(PLATFORM)" != "arm64"
ASM_OBJS = $(ASM_OBJS) \
$O\AesOpt.obj
!ENDIF

Some files were not shown because too many files have changed in this diff Show More