Botan 2.19.3
Crypto and TLS for C&
argon2_ssse3.cpp
Go to the documentation of this file.
1/**
2* (C) 2022 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/argon2_ssse3.h>
8#include <tmmintrin.h>
9
10namespace Botan {
11
12namespace {
13
14class SIMD_2x64 final
15 {
16 public:
17 SIMD_2x64& operator=(const SIMD_2x64& other) = default;
18 SIMD_2x64(const SIMD_2x64& other) = default;
19
20 SIMD_2x64& operator=(SIMD_2x64&& other) = default;
21 SIMD_2x64(SIMD_2x64&& other) = default;
22
23 SIMD_2x64() // zero initialized
24 {
25 m_simd = _mm_setzero_si128();
26 }
27
28 static SIMD_2x64 load_le(const void* in)
29 {
30 return SIMD_2x64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
31 }
32
33 void store_le(uint64_t out[2]) const
34 {
35 this->store_le(reinterpret_cast<uint8_t*>(out));
36 }
37
38 void store_le(uint8_t out[]) const
39 {
40 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_simd);
41 }
42
43 SIMD_2x64 operator+(const SIMD_2x64& other) const
44 {
45 SIMD_2x64 retval(*this);
46 retval += other;
47 return retval;
48 }
49
50 SIMD_2x64 operator^(const SIMD_2x64& other) const
51 {
52 SIMD_2x64 retval(*this);
53 retval ^= other;
54 return retval;
55 }
56
57 void operator+=(const SIMD_2x64& other)
58 {
59 m_simd = _mm_add_epi64(m_simd, other.m_simd);
60 }
61
62 void operator^=(const SIMD_2x64& other)
63 {
64 m_simd = _mm_xor_si128(m_simd, other.m_simd);
65 }
66
67 template<size_t ROT>
68 BOTAN_FUNC_ISA("ssse3")
69 SIMD_2x64 rotr() const
70 {
71 static_assert(ROT > 0 && ROT < 64, "Invalid rotation constant");
72
73 if(ROT == 16)
74 {
75 auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
76 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
77 }
78 else if(ROT == 24)
79 {
80 auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
81 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
82 }
83 else if(ROT == 32)
84 {
85 auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
86 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
87 }
88 else
89 {
90 return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
91 _mm_slli_epi64(m_simd, static_cast<int>(64-ROT))));
92 }
93 }
94
95 template<size_t ROT>
96 SIMD_2x64 rotl() const
97 {
98 return this->rotr<64-ROT>();
99 }
100
101 // Argon2 specific operation
102 static SIMD_2x64 mul2_32(SIMD_2x64 x, SIMD_2x64 y)
103 {
104 const __m128i m = _mm_mul_epu32(x.m_simd, y.m_simd);
105 return SIMD_2x64(_mm_add_epi64(m, m));
106 }
107
108 template<size_t T>
109 BOTAN_FUNC_ISA("ssse3")
110 static SIMD_2x64 alignr(SIMD_2x64 a, SIMD_2x64 b)
111 {
112 static_assert(T > 0 && T < 16, "Invalid alignr constant");
113 return SIMD_2x64(_mm_alignr_epi8(a.m_simd, b.m_simd, T));
114 }
115
116 // Argon2 specific
117 static void twist(
118 SIMD_2x64& B0,
119 SIMD_2x64& B1,
120 SIMD_2x64& C0,
121 SIMD_2x64& C1,
122 SIMD_2x64& D0,
123 SIMD_2x64& D1)
124 {
125 SIMD_2x64 T0, T1;
126
127 T0 = SIMD_2x64::alignr<8>(B1, B0);
128 T1 = SIMD_2x64::alignr<8>(B0, B1);
129 B0 = T0;
130 B1 = T1;
131
132 T0 = C0;
133 C0 = C1;
134 C1 = T0;
135
136 T0 = SIMD_2x64::alignr<8>(D0, D1);
137 T1 = SIMD_2x64::alignr<8>(D1, D0);
138 D0 = T0;
139 D1 = T1;
140 }
141
142 // Argon2 specific
143 static void untwist(
144 SIMD_2x64& B0,
145 SIMD_2x64& B1,
146 SIMD_2x64& C0,
147 SIMD_2x64& C1,
148 SIMD_2x64& D0,
149 SIMD_2x64& D1)
150 {
151 SIMD_2x64 T0, T1;
152
153 T0 = SIMD_2x64::alignr<8>(B0, B1);
154 T1 = SIMD_2x64::alignr<8>(B1, B0);
155 B0 = T0;
156 B1 = T1;
157
158 T0 = C0;
159 C0 = C1;
160 C1 = T0;
161
162 T0 = SIMD_2x64::alignr<8>(D1, D0);
163 T1 = SIMD_2x64::alignr<8>(D0, D1);
164 D0 = T0;
165 D1 = T1;
166 }
167
168 explicit SIMD_2x64(__m128i x) : m_simd(x) {}
169 private:
170 __m128i m_simd;
171 };
172
173BOTAN_FORCE_INLINE void blamka_G(
174 SIMD_2x64& A0,
175 SIMD_2x64& A1,
176 SIMD_2x64& B0,
177 SIMD_2x64& B1,
178 SIMD_2x64& C0,
179 SIMD_2x64& C1,
180 SIMD_2x64& D0,
181 SIMD_2x64& D1)
182 {
183 A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
184 A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
185 D0 ^= A0;
186 D1 ^= A1;
187 D0 = D0.rotr<32>();
188 D1 = D1.rotr<32>();
189
190 C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
191 C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
192 B0 ^= C0;
193 B1 ^= C1;
194 B0 = B0.rotr<24>();
195 B1 = B1.rotr<24>();
196
197 A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
198 A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
199 D0 ^= A0;
200 D1 ^= A1;
201 D0 = D0.rotr<16>();
202 D1 = D1.rotr<16>();
203
204 C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
205 C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
206 B0 ^= C0;
207 B1 ^= C1;
208 B0 = B0.rotr<63>();
209 B1 = B1.rotr<63>();
210 }
211
212BOTAN_FORCE_INLINE void blamka_R(
213 SIMD_2x64& A0,
214 SIMD_2x64& A1,
215 SIMD_2x64& B0,
216 SIMD_2x64& B1,
217 SIMD_2x64& C0,
218 SIMD_2x64& C1,
219 SIMD_2x64& D0,
220 SIMD_2x64& D1)
221 {
222 blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
223
224 SIMD_2x64::twist(B0, B1, C0, C1, D0, D1);
225 blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
226 SIMD_2x64::untwist(B0, B1, C0, C1, D0, D1);
227 }
228
229}
230
231void blamka_ssse3(uint64_t T[128])
232 {
233 for(size_t i = 0; i != 8; ++i)
234 {
235 SIMD_2x64 Tv[8];
236 for(size_t j = 0; j != 8; ++j)
237 Tv[j] = SIMD_2x64::load_le(&T[2*(8*i+j)]);
238
239 blamka_R(Tv[0], Tv[1], Tv[2], Tv[3],
240 Tv[4], Tv[5], Tv[6], Tv[7]);
241
242 for(size_t j = 0; j != 8; ++j)
243 Tv[j].store_le(&T[2*(8*i+j)]);
244 }
245
246 for(size_t i = 0; i != 8; ++i)
247 {
248 SIMD_2x64 Tv[8];
249 for(size_t j = 0; j != 8; ++j)
250 Tv[j] = SIMD_2x64::load_le(&T[2*(i+8*j)]);
251
252 blamka_R(Tv[0], Tv[1], Tv[2], Tv[3],
253 Tv[4], Tv[5], Tv[6], Tv[7]);
254
255 for(size_t j = 0; j != 8; ++j)
256 Tv[j].store_le(&T[2*(i+8*j)]);
257 }
258 }
259
260}
int(* final)(unsigned char *, CTX *)
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:77
#define BOTAN_FORCE_INLINE
Definition compiler.h:205
fe T
Definition ge.cpp:37
constexpr T rotl(T input)
Definition rotate.h:23
OctetString operator^(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:125
void blamka_ssse3(uint64_t T[128])
T load_le(const uint8_t in[], size_t off)
Definition loadstor.h:123
OID operator+(const OID &oid, uint32_t new_comp)
Definition asn1_oid.cpp:122
std::vector< uint8_t, Alloc > & operator^=(std::vector< uint8_t, Alloc > &out, const std::vector< uint8_t, Alloc2 > &in)
Definition mem_ops.h:353
std::vector< T, Alloc > & operator+=(std::vector< T, Alloc > &out, const std::vector< T, Alloc2 > &in)
Definition secmem.h:79
void store_le(uint16_t in, uint8_t out[2])
Definition loadstor.h:454
constexpr T rotr(T input)
Definition rotate.h:35