7#include <botan/internal/argon2_ssse3.h>
17 SIMD_2x64& operator=(
const SIMD_2x64& other) =
default;
18 SIMD_2x64(
const SIMD_2x64& other) =
default;
20 SIMD_2x64& operator=(SIMD_2x64&& other) =
default;
21 SIMD_2x64(SIMD_2x64&& other) =
default;
25 m_simd = _mm_setzero_si128();
28 static SIMD_2x64
load_le(
const void* in)
30 return SIMD_2x64(_mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in)));
35 this->
store_le(
reinterpret_cast<uint8_t*
>(out));
40 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out), m_simd);
43 SIMD_2x64
operator+(
const SIMD_2x64& other)
const
45 SIMD_2x64 retval(*
this);
50 SIMD_2x64
operator^(
const SIMD_2x64& other)
const
52 SIMD_2x64 retval(*
this);
59 m_simd = _mm_add_epi64(m_simd, other.m_simd);
64 m_simd = _mm_xor_si128(m_simd, other.m_simd);
69 SIMD_2x64
rotr()
const
71 static_assert(ROT > 0 && ROT < 64,
"Invalid rotation constant");
75 auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
76 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
80 auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
81 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
85 auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
86 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
90 return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd,
static_cast<int>(ROT)),
91 _mm_slli_epi64(m_simd,
static_cast<int>(64-ROT))));
96 SIMD_2x64
rotl()
const
98 return this->rotr<64-ROT>();
102 static SIMD_2x64 mul2_32(SIMD_2x64 x, SIMD_2x64 y)
104 const __m128i m = _mm_mul_epu32(x.m_simd, y.m_simd);
105 return SIMD_2x64(_mm_add_epi64(m, m));
110 static SIMD_2x64 alignr(SIMD_2x64 a, SIMD_2x64 b)
112 static_assert(
T > 0 &&
T < 16,
"Invalid alignr constant");
113 return SIMD_2x64(_mm_alignr_epi8(a.m_simd, b.m_simd,
T));
127 T0 = SIMD_2x64::alignr<8>(B1, B0);
128 T1 = SIMD_2x64::alignr<8>(B0, B1);
136 T0 = SIMD_2x64::alignr<8>(D0, D1);
137 T1 = SIMD_2x64::alignr<8>(D1, D0);
153 T0 = SIMD_2x64::alignr<8>(B0, B1);
154 T1 = SIMD_2x64::alignr<8>(B1, B0);
162 T0 = SIMD_2x64::alignr<8>(D1, D0);
163 T1 = SIMD_2x64::alignr<8>(D0, D1);
168 explicit SIMD_2x64(__m128i x) : m_simd(x) {}
183 A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
184 A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
190 C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
191 C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
197 A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
198 A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
204 C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
205 C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
222 blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
224 SIMD_2x64::twist(B0, B1, C0, C1, D0, D1);
225 blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
226 SIMD_2x64::untwist(B0, B1, C0, C1, D0, D1);
233 for(
size_t i = 0; i != 8; ++i)
236 for(
size_t j = 0; j != 8; ++j)
237 Tv[j] = SIMD_2x64::load_le(&
T[2*(8*i+j)]);
239 blamka_R(Tv[0], Tv[1], Tv[2], Tv[3],
240 Tv[4], Tv[5], Tv[6], Tv[7]);
242 for(
size_t j = 0; j != 8; ++j)
246 for(
size_t i = 0; i != 8; ++i)
249 for(
size_t j = 0; j != 8; ++j)
250 Tv[j] = SIMD_2x64::load_le(&
T[2*(i+8*j)]);
252 blamka_R(Tv[0], Tv[1], Tv[2], Tv[3],
253 Tv[4], Tv[5], Tv[6], Tv[7]);
255 for(
size_t j = 0; j != 8; ++j)
int(* final)(unsigned char *, CTX *)
#define BOTAN_FUNC_ISA(isa)
#define BOTAN_FORCE_INLINE
constexpr T rotl(T input)
OctetString operator^(const OctetString &k1, const OctetString &k2)
void blamka_ssse3(uint64_t T[128])
T load_le(const uint8_t in[], size_t off)
OID operator+(const OID &oid, uint32_t new_comp)
std::vector< uint8_t, Alloc > & operator^=(std::vector< uint8_t, Alloc > &out, const std::vector< uint8_t, Alloc2 > &in)
std::vector< T, Alloc > & operator+=(std::vector< T, Alloc > &out, const std::vector< T, Alloc2 > &in)
void store_le(uint16_t in, uint8_t out[2])
constexpr T rotr(T input)