10#include <botan/sha160.h>
11#include <botan/rotate.h>
16namespace SHA1_SSE2_F {
25#define prep00_15(P, W) \
27 W = _mm_shufflehi_epi16(W, _MM_SHUFFLE(2, 3, 0, 1)); \
28 W = _mm_shufflelo_epi16(W, _MM_SHUFFLE(2, 3, 0, 1)); \
29 W = _mm_or_si128(_mm_slli_epi16(W, 8), \
30 _mm_srli_epi16(W, 8)); \
31 P.u128 = _mm_add_epi32(W, K00_19); \
79#define prep(prep, XW0, XW1, XW2, XW3, K) \
81 __m128i r0, r1, r2, r3; \
84 r3 = _mm_srli_si128((XW3), 4); \
87 r1 = _mm_shuffle_epi32((XW0), _MM_SHUFFLE(1,0,3,2)); \
89 r1 = _mm_unpacklo_epi64(r1, (XW1)); \
92 r0 = _mm_xor_si128(r1, r0); \
93 r2 = _mm_xor_si128(r3, r2); \
94 r0 = _mm_xor_si128(r2, r0); \
97 r2 = _mm_slli_si128(r0, 12); \
98 r1 = _mm_cmplt_epi32(r0, _mm_setzero_si128()); \
99 r0 = _mm_add_epi32(r0, r0); \
100 r0 = _mm_sub_epi32(r0, r1); \
102 r3 = _mm_srli_epi32(r2, 30); \
103 r2 = _mm_slli_epi32(r2, 2); \
105 r0 = _mm_xor_si128(r0, r3); \
106 r0 = _mm_xor_si128(r0, r2); \
109 (prep).u128 = _mm_add_epi32(r0, K); \
115inline void F1(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
117 E += (D ^ (B & (C ^ D))) + msg + rotl<5>(A);
124inline void F2(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
126 E += (B ^ C ^ D) + msg + rotl<5>(A);
133inline void F3(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
135 E += ((B & C) | ((B | C) & D)) + msg + rotl<5>(A);
142inline void F4(uint32_t A, uint32_t& B, uint32_t C, uint32_t D, uint32_t& E, uint32_t msg)
144 E += (B ^ C ^ D) + msg + rotl<5>(A);
157void SHA_160::sse2_compress_n(
secure_vector<uint32_t>& digest, const uint8_t input[],
size_t blocks)
159 using namespace SHA1_SSE2_F;
161 const __m128i K00_19 = _mm_set1_epi32(0x5A827999);
162 const __m128i K20_39 = _mm_set1_epi32(0x6ED9EBA1);
163 const __m128i K40_59 = _mm_set1_epi32(0x8F1BBCDC);
164 const __m128i K60_79 = _mm_set1_epi32(0xCA62C1D6);
166 uint32_t A = digest[0],
172 const __m128i* input_mm =
reinterpret_cast<const __m128i*
>(input);
174 for(
size_t i = 0; i != blocks; ++i)
183 __m128i W0 = _mm_loadu_si128(&input_mm[0]);
186 __m128i W1 = _mm_loadu_si128(&input_mm[1]);
189 __m128i W2 = _mm_loadu_si128(&input_mm[2]);
192 __m128i W3 = _mm_loadu_si128(&input_mm[3]);
203#define GET_P_32(P, i) P.u32[i]
209 prep(P0, W0, W1, W2, W3, K00_19);
215 prep(P1, W1, W2, W3, W0, K20_39);
221 prep(P2, W2, W3, W0, W1, K20_39);
227 prep(P3, W3, W0, W1, W2, K20_39);
233 prep(P0, W0, W1, W2, W3, K20_39);
239 prep(P1, W1, W2, W3, W0, K20_39);
245 prep(P2, W2, W3, W0, W1, K40_59);
251 prep(P3, W3, W0, W1, W2, K40_59);
257 prep(P0, W0, W1, W2, W3, K40_59);
263 prep(P1, W1, W2, W3, W0, K40_59);
269 prep(P2, W2, W3, W0, W1, K40_59);
275 prep(P3, W3, W0, W1, W2, K60_79);
281 prep(P0, W0, W1, W2, W3, K60_79);
287 prep(P1, W1, W2, W3, W0, K60_79);
293 prep(P2, W2, W3, W0, W1, K60_79);
299 prep(P3, W3, W0, W1, W2, K60_79);
321 A = (digest[0] += A);
322 B = (digest[1] += B);
323 C = (digest[2] += C);
324 D = (digest[3] += D);
325 E = (digest[4] += E);
327 input_mm += (64 / 16);
#define BOTAN_FUNC_ISA(isa)
std::vector< T, secure_allocator< T > > secure_vector
#define prep(prep, XW0, XW1, XW2, XW3, K)