mirror of
https://github.com/Xevion/easy7zip.git
synced 2025-12-06 17:15:00 -06:00
23.01
This commit is contained in:
129
C/Sha256Opt.c
129
C/Sha256Opt.c
@@ -1,7 +1,9 @@
|
||||
/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
|
||||
2021-04-01 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
#include "Compiler.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
|
||||
@@ -9,41 +11,26 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "CpuArch.h"
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#if defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#ifndef __SHA__
|
||||
#elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
|
||||
#define USE_HW_SHA
|
||||
#if !defined(_INTEL_COMPILER)
|
||||
// icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
|
||||
#if !defined(__SHA__) || !defined(__SSSE3__)
|
||||
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
|
||||
#if defined(_MSC_VER)
|
||||
// SSSE3: for clang-cl:
|
||||
#include <tmmintrin.h>
|
||||
#define __SHA__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 8) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#ifndef __SHA__
|
||||
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
|
||||
// #pragma GCC target("sha,ssse3")
|
||||
#endif
|
||||
#endif
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#if (__INTEL_COMPILER >= 1800) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#ifdef USE_MY_MM
|
||||
#define USE_VER_MIN 1300
|
||||
#else
|
||||
#define USE_VER_MIN 1910
|
||||
#define USE_VER_MIN 1900
|
||||
#endif
|
||||
#if _MSC_VER >= USE_VER_MIN
|
||||
#if (_MSC_VER >= USE_VER_MIN)
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#endif
|
||||
@@ -52,16 +39,19 @@
|
||||
#ifdef USE_HW_SHA
|
||||
|
||||
// #pragma message("Sha256 HW")
|
||||
// #include <wmmintrin.h>
|
||||
|
||||
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
|
||||
// sse/sse2/ssse3:
|
||||
#include <tmmintrin.h>
|
||||
// sha*:
|
||||
#include <immintrin.h>
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
|
||||
// #include <intrin.h>
|
||||
#endif
|
||||
#if defined (__clang__) && defined(_MSC_VER)
|
||||
// #if !defined(__SSSE3__)
|
||||
// #endif
|
||||
#if !defined(__SHA__)
|
||||
#include <shaintrin.h>
|
||||
#endif
|
||||
#else
|
||||
|
||||
#ifdef USE_MY_MM
|
||||
#include "My_mm.h"
|
||||
@@ -98,9 +88,9 @@ const UInt32 SHA256_K_ARRAY[64];
|
||||
#define K SHA256_K_ARRAY
|
||||
|
||||
|
||||
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
|
||||
#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
|
||||
#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
|
||||
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
|
||||
#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
|
||||
#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
|
||||
|
||||
|
||||
#define LOAD_SHUFFLE(m, k) \
|
||||
@@ -112,7 +102,7 @@ const UInt32 SHA256_K_ARRAY[64];
|
||||
|
||||
#define SM2(g0, g1, g2, g3) \
|
||||
tmp = _mm_alignr_epi8(g1, g0, 4); \
|
||||
ADD_EPI32(g2, tmp); \
|
||||
ADD_EPI32(g2, tmp) \
|
||||
SHA25G_MSG2(g2, g1); \
|
||||
|
||||
// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
|
||||
@@ -138,16 +128,16 @@ const UInt32 SHA256_K_ARRAY[64];
|
||||
// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
|
||||
|
||||
#define R4(k, g0, g1, g2, g3, OP0, OP1) \
|
||||
RND2_0(g0, k); \
|
||||
OP0(g0, g1, g2, g3); \
|
||||
RND2_1; \
|
||||
OP1(g0, g1, g2, g3); \
|
||||
RND2_0(g0, k) \
|
||||
OP0(g0, g1, g2, g3) \
|
||||
RND2_1 \
|
||||
OP1(g0, g1, g2, g3) \
|
||||
|
||||
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
|
||||
R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
|
||||
R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
|
||||
R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
|
||||
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
|
||||
R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
|
||||
R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
|
||||
R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
|
||||
R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
|
||||
|
||||
#define PREPARE_STATE \
|
||||
tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
|
||||
@@ -157,11 +147,11 @@ const UInt32 SHA256_K_ARRAY[64];
|
||||
state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
|
||||
|
||||
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
#ifdef ATTRIB_SHA
|
||||
ATTRIB_SHA
|
||||
#endif
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
|
||||
__m128i tmp;
|
||||
@@ -192,13 +182,13 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
||||
|
||||
|
||||
|
||||
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
|
||||
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
|
||||
|
||||
ADD_EPI32(state0, state0_save);
|
||||
ADD_EPI32(state1, state1_save);
|
||||
ADD_EPI32(state0, state0_save)
|
||||
ADD_EPI32(state1, state1_save)
|
||||
|
||||
data += 64;
|
||||
}
|
||||
@@ -298,11 +288,11 @@ const UInt32 SHA256_K_ARRAY[64];
|
||||
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
|
||||
|
||||
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
#ifdef ATTRIB_SHA
|
||||
ATTRIB_SHA
|
||||
#endif
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
v128 state0, state1;
|
||||
|
||||
@@ -353,12 +343,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
||||
// #include <stdlib.h>
|
||||
|
||||
// #include "Sha256.h"
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
#pragma message("Sha256 HW-SW stub was used")
|
||||
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
Sha256_UpdateBlocks(state, data, numBlocks);
|
||||
/*
|
||||
@@ -371,3 +361,26 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#undef K
|
||||
#undef RND2
|
||||
#undef RND2_0
|
||||
#undef RND2_1
|
||||
|
||||
#undef MY_rev32_for_LE
|
||||
#undef NNN
|
||||
#undef LOAD_128
|
||||
#undef STORE_128
|
||||
#undef LOAD_SHUFFLE
|
||||
#undef SM1
|
||||
#undef SM2
|
||||
|
||||
#undef NNN
|
||||
#undef R4
|
||||
#undef R16
|
||||
#undef PREPARE_STATE
|
||||
#undef USE_HW_SHA
|
||||
#undef ATTRIB_SHA
|
||||
#undef USE_VER_MIN
|
||||
|
||||
Reference in New Issue
Block a user