This commit is contained in:
Igor Pavlov
2023-12-22 17:17:05 +00:00
committed by Kornel
parent ec44a8a070
commit a36c48cece
954 changed files with 42199 additions and 25482 deletions

View File

@@ -1,7 +1,9 @@
/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
2021-04-01 : Igor Pavlov : Public domain */
2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Compiler.h"
#include "CpuArch.h"
#if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
@@ -9,41 +11,26 @@
#endif
#endif
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
#define USE_HW_SHA
#if !defined(_INTEL_COMPILER)
// icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
#if !defined(__SHA__) || !defined(__SSSE3__)
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
#if defined(_MSC_VER)
// SSSE3: for clang-cl:
#include <tmmintrin.h>
#define __SHA__
#endif
#endif
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
// #pragma GCC target("sha,ssse3")
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#ifdef USE_MY_MM
#define USE_VER_MIN 1300
#else
#define USE_VER_MIN 1910
#define USE_VER_MIN 1900
#endif
#if _MSC_VER >= USE_VER_MIN
#if (_MSC_VER >= USE_VER_MIN)
#define USE_HW_SHA
#endif
#endif
@@ -52,16 +39,19 @@
#ifdef USE_HW_SHA
// #pragma message("Sha1 HW")
// #include <wmmintrin.h>
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
// sse/sse2/ssse3:
#include <tmmintrin.h>
// sha*:
#include <immintrin.h>
#else
#include <emmintrin.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
// #include <intrin.h>
#endif
#if defined (__clang__) && defined(_MSC_VER)
// #if !defined(__SSSE3__)
// #endif
#if !defined(__SHA__)
#include <shaintrin.h>
#endif
#else
#ifdef USE_MY_MM
#include "My_mm.h"
@@ -87,37 +77,37 @@ SHA:
_mm_sha1*
*/
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src);
#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask);
#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask);
#define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f);
#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
#ifdef __clang__
#define SHA1_RNDS4_RET_TYPE_CAST (__m128i)
#else
#define SHA1_RNDS4_RET_TYPE_CAST
#endif
#define SHA1_RND4(abcd, e0, f) abcd = SHA1_RNDS4_RET_TYPE_CAST _mm_sha1rnds4_epu32(abcd, e0, f);
#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
#define LOAD_SHUFFLE(m, k) \
m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
SHUFFLE_EPI8(m, mask); \
SHUFFLE_EPI8(m, mask) \
#define SM1(m0, m1, m2, m3) \
SHA1_MSG1(m0, m1); \
SHA1_MSG1(m0, m1) \
#define SM2(m0, m1, m2, m3) \
XOR_SI128(m3, m1); \
SHA1_MSG2(m3, m2); \
XOR_SI128(m3, m1) \
SHA1_MSG2(m3, m2) \
#define SM3(m0, m1, m2, m3) \
XOR_SI128(m3, m1); \
XOR_SI128(m3, m1) \
SM1(m0, m1, m2, m3) \
SHA1_MSG2(m3, m2); \
SHA1_MSG2(m3, m2) \
#define NNN(m0, m1, m2, m3)
@@ -139,9 +129,9 @@ SHA:
#define R4(k, e0, e1, m0, m1, m2, m3, OP) \
e1 = abcd; \
SHA1_RND4(abcd, e0, (k) / 5); \
SHA1_NEXTE(e1, m1); \
OP(m0, m1, m2, m3); \
SHA1_RND4(abcd, e0, (k) / 5) \
SHA1_NEXTE(e1, m1) \
OP(m0, m1, m2, m3) \
#define R16(k, mx, OP0, OP1, OP2, OP3) \
R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \
@@ -150,18 +140,18 @@ SHA:
R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \
#define PREPARE_STATE \
SHUFFLE_EPI32 (abcd, 0x1B); \
SHUFFLE_EPI32 (e0, 0x1B); \
SHUFFLE_EPI32 (abcd, 0x1B) \
SHUFFLE_EPI32 (e0, 0x1B) \
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
{
const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
@@ -190,15 +180,15 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
ADD_EPI32(e0, m0);
ADD_EPI32(e0, m0)
R16 ( 0, m0, SM1, SM3, SM3, SM3 );
R16 ( 1, m0, SM3, SM3, SM3, SM3 );
R16 ( 2, m0, SM3, SM3, SM3, SM3 );
R16 ( 3, m0, SM3, SM3, SM3, SM3 );
R16 ( 4, e2, SM2, NNN, NNN, NNN );
R16 ( 0, m0, SM1, SM3, SM3, SM3 )
R16 ( 1, m0, SM3, SM3, SM3, SM3 )
R16 ( 2, m0, SM3, SM3, SM3, SM3 )
R16 ( 3, m0, SM3, SM3, SM3, SM3 )
R16 ( 4, e2, SM2, NNN, NNN, NNN )
ADD_EPI32(abcd, abcd_save);
ADD_EPI32(abcd, abcd_save)
data += 64;
}
@@ -274,11 +264,11 @@ typedef uint32x4_t v128;
#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
#define T(m, c) t = vaddq_u32(m, c)
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
v128 abcd;
v128 c0, c1, c2, c3;
@@ -353,12 +343,12 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t
// #include <stdlib.h>
// #include "Sha1.h"
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
#pragma message("Sha1 HW-SW stub was used")
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
{
Sha1_UpdateBlocks(state, data, numBlocks);
/*
@@ -371,3 +361,26 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
}
#endif
#undef SU0
#undef SU1
#undef C
#undef P
#undef M
#undef H
#undef T
#undef MY_rev32_for_LE
#undef NNN
#undef LOAD_128
#undef STORE_128
#undef LOAD_SHUFFLE
#undef SM1
#undef SM2
#undef SM3
#undef NNN
#undef R4
#undef R16
#undef PREPARE_STATE
#undef USE_HW_SHA
#undef ATTRIB_SHA
#undef USE_VER_MIN