7#include <botan/argon2.h>
8#include <botan/loadstor.h>
10#include <botan/mem_ops.h>
11#include <botan/rotate.h>
12#include <botan/exceptn.h>
14#if defined(BOTAN_HAS_THREAD_UTILS)
15 #include <botan/internal/thread_pool.h>
18#if defined(BOTAN_HAS_ARGON2_SSSE3)
19 #include <botan/internal/argon2_ssse3.h>
20 #include <botan/cpuid.h>
27const size_t SYNC_POINTS = 4;
29void argon2_H0(uint8_t H0[64],
30 HashFunction& blake2b,
32 const char* password,
size_t password_len,
33 const uint8_t salt[],
size_t salt_len,
34 const uint8_t key[],
size_t key_len,
35 const uint8_t ad[],
size_t ad_len,
36 size_t y,
size_t p,
size_t M,
size_t t)
40 blake2b.update_le(
static_cast<uint32_t
>(p));
41 blake2b.update_le(
static_cast<uint32_t
>(output_len));
42 blake2b.update_le(
static_cast<uint32_t
>(M));
43 blake2b.update_le(
static_cast<uint32_t
>(t));
44 blake2b.update_le(
static_cast<uint32_t
>(v));
45 blake2b.update_le(
static_cast<uint32_t
>(y));
47 blake2b.update_le(
static_cast<uint32_t
>(password_len));
50 blake2b.update_le(
static_cast<uint32_t
>(
salt_len));
53 blake2b.update_le(
static_cast<uint32_t
>(key_len));
54 blake2b.update(key, key_len);
56 blake2b.update_le(
static_cast<uint32_t
>(ad_len));
57 blake2b.update(ad, ad_len);
62void extract_key(uint8_t output[],
size_t output_len,
63 const secure_vector<uint64_t>& B,
64 size_t memory,
size_t threads)
66 const size_t lanes = memory / threads;
68 uint64_t sum[128] = { 0 };
70 for(
size_t lane = 0; lane != threads; ++lane)
72 const size_t start = 128*(lane * lanes + lanes - 1);
73 const size_t end = 128*(lane * lanes + lanes);
75 for(
size_t j = start; j != end; ++j)
84 blake2b->update_le(
static_cast<uint32_t
>(output_len));
85 for(
size_t i = 0; i != 128; ++i)
86 blake2b->update_le(sum[i]);
87 blake2b->final(output);
91 secure_vector<uint8_t>
T(64);
94 blake2b->update_le(
static_cast<uint32_t
>(output_len));
95 for(
size_t i = 0; i != 128; ++i)
96 blake2b->update_le(sum[i]);
97 blake2b->final(&
T[0]);
99 while(output_len > 64)
108 blake2b->final(&
T[0]);
115 blake2b->final(output);
120 blake2b_f->update(
T);
121 blake2b_f->final(output);
126void init_blocks(secure_vector<uint64_t>& B,
127 HashFunction& blake2b,
128 const uint8_t H0[64],
134 for(
size_t i = 0; i != threads; ++i)
136 const size_t B_off = i * (memory / threads);
140 for(
size_t j = 0; j != 2; ++j)
142 uint8_t
T[64] = { 0 };
144 blake2b.update_le(
static_cast<uint32_t
>(1024));
145 blake2b.update(H0, 64);
146 blake2b.update_le(
static_cast<uint32_t
>(j));
147 blake2b.update_le(
static_cast<uint32_t
>(i));
150 for(
size_t k = 0; k != 30; ++k)
152 load_le(&B[128*(B_off+j)+4*k],
T, 32 / 8);
153 blake2b.update(
T, 64);
157 load_le(&B[128*(B_off+j)+4*30],
T, 64 / 8);
164 A += B + (
static_cast<uint64_t
>(2) *
static_cast<uint32_t
>(A)) *
static_cast<uint32_t
>(B);
167 C += D + (
static_cast<uint64_t
>(2) *
static_cast<uint32_t
>(C)) *
static_cast<uint32_t
>(D);
170 A += B + (
static_cast<uint64_t
>(2) *
static_cast<uint32_t
>(A)) *
static_cast<uint32_t
>(B);
173 C += D + (
static_cast<uint64_t
>(2) *
static_cast<uint32_t
>(C)) *
static_cast<uint32_t
>(D);
177void blamka(uint64_t
T[128])
179#if defined(BOTAN_HAS_ARGON2_SSSE3)
180 if(CPUID::has_ssse3())
184 for(
size_t i = 0; i != 128; i += 16)
186 blamka_G(
T[i+ 0],
T[i+ 4],
T[i+ 8],
T[i+ 12]);
187 blamka_G(
T[i+ 1],
T[i+ 5],
T[i+ 9],
T[i+ 13]);
188 blamka_G(
T[i+ 2],
T[i+ 6],
T[i+ 10],
T[i+ 14]);
189 blamka_G(
T[i+ 3],
T[i+ 7],
T[i+ 11],
T[i+ 15]);
191 blamka_G(
T[i+ 0],
T[i+ 5],
T[i+ 10],
T[i+ 15]);
192 blamka_G(
T[i+ 1],
T[i+ 6],
T[i+ 11],
T[i+ 12]);
193 blamka_G(
T[i+ 2],
T[i+ 7],
T[i+ 8],
T[i+ 13]);
194 blamka_G(
T[i+ 3],
T[i+ 4],
T[i+ 9],
T[i+ 14]);
197 for(
size_t i = 0; i != 128 / 8; i += 2)
199 blamka_G(
T[i+ 0],
T[i+ 32],
T[i+ 64],
T[i+ 96]);
200 blamka_G(
T[i+ 1],
T[i+ 33],
T[i+ 65],
T[i+ 97]);
201 blamka_G(
T[i+ 16],
T[i+ 48],
T[i+ 80],
T[i+112]);
202 blamka_G(
T[i+ 17],
T[i+ 49],
T[i+ 81],
T[i+113]);
204 blamka_G(
T[i+ 0],
T[i+ 33],
T[i+ 80],
T[i+113]);
205 blamka_G(
T[i+ 1],
T[i+ 48],
T[i+ 81],
T[i+ 96]);
206 blamka_G(
T[i+ 16],
T[i+ 49],
T[i+ 64],
T[i+ 97]);
207 blamka_G(
T[i+ 17],
T[i+ 32],
T[i+ 65],
T[i+112]);
211void gen_2i_addresses(uint64_t
T[128], uint64_t B[128],
212 size_t n,
size_t lane,
size_t slice,
size_t memory,
213 size_t time,
size_t mode,
size_t cnt)
225 for(
size_t r = 0; r != 2; ++r)
231 for(
size_t i = 0; i != 128; ++i)
236uint32_t index_alpha(uint64_t random,
245 size_t ref_lane =
static_cast<uint32_t
>(random >> 32) % threads;
247 if(n == 0 && slice == 0)
250 size_t m = 3*segments;
251 size_t s = ((slice+1) % 4)*segments;
260 if(slice == 0 || lane == ref_lane)
264 if(index == 0 || lane == ref_lane)
267 uint64_t p =
static_cast<uint32_t
>(random);
271 return static_cast<uint32_t
>(ref_lane*lanes + (s + m - (p+1)) % lanes);
274void process_block(secure_vector<uint64_t>& B,
275 size_t n,
size_t slice,
size_t lane,
276 size_t lanes,
size_t segments,
size_t threads, uint8_t mode,
277 size_t memory,
size_t time)
281 if(n == 0 && slice == 0)
284 const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2);
286 uint64_t addresses[128];
287 size_t address_counter = 1;
291 gen_2i_addresses(
T, addresses, n, lane, slice, memory, time, mode, address_counter);
294 while(index < segments)
296 const size_t offset = lane*lanes + slice*segments + index;
298 size_t prev = offset - 1;
299 if(index == 0 && slice == 0)
302 if(use_2i && index > 0 && index % 128 == 0)
304 address_counter += 1;
305 gen_2i_addresses(
T, addresses, n, lane, slice, memory, time, mode, address_counter);
308 const uint64_t random = use_2i ? addresses[index % 128] : B.at(128*prev);
309 const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
311 for(
size_t i = 0; i != 128; ++i)
312 T[i] = B[128*prev+i] ^ B[128*new_offset+i];
316 for(
size_t i = 0; i != 128; ++i)
317 B[128*offset + i] ^=
T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
323void process_blocks(secure_vector<uint64_t>& B,
329 const size_t lanes = memory / threads;
330 const size_t segments = lanes / SYNC_POINTS;
332#if defined(BOTAN_HAS_THREAD_UTILS)
336 for(
size_t n = 0; n != t; ++n)
338 for(
size_t slice = 0; slice != SYNC_POINTS; ++slice)
340#if defined(BOTAN_HAS_THREAD_UTILS)
343 std::vector<std::future<void>> fut_results;
344 fut_results.reserve(threads);
346 for(
size_t lane = 0; lane != threads; ++lane)
348 fut_results.push_back(thread_pool.run(
350 std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
353 for(
auto& fut : fut_results)
360 for(
size_t lane = 0; lane != threads; ++lane)
362 process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
370void argon2(uint8_t output[],
size_t output_len,
371 const char* password,
size_t password_len,
372 const uint8_t salt[],
size_t salt_len,
373 const uint8_t key[],
size_t key_len,
374 const uint8_t ad[],
size_t ad_len,
375 uint8_t mode,
size_t threads,
size_t M,
size_t t)
377 BOTAN_ARG_CHECK(mode == 0 || mode == 1 || mode == 2,
"Unknown Argon2 mode parameter");
379 BOTAN_ARG_CHECK(threads >= 1 && threads <= 128,
"Invalid Argon2 threads parameter");
380 BOTAN_ARG_CHECK(M >= 8*threads && M <= 8192*1024,
"Invalid Argon2 M parameter");
385 uint8_t H0[64] = { 0 };
386 argon2_H0(H0, *blake2, output_len,
387 password, password_len,
391 mode, threads, M, t);
393 const size_t memory = (M / (SYNC_POINTS*threads)) * (SYNC_POINTS*threads);
397 init_blocks(B, *blake2, H0, memory, threads);
398 process_blocks(B, t, memory, threads, mode);
401 extract_key(output, output_len, B, memory, threads);
#define BOTAN_ASSERT_NOMSG(expr)
#define BOTAN_ARG_CHECK(expr, msg)
static std::unique_ptr< HashFunction > create_or_throw(const std::string &algo_spec, const std::string &provider="")
static Thread_Pool & global_instance()
#define BOTAN_FORCE_INLINE
void blamka_ssse3(uint64_t T[128])
void argon2(uint8_t output[], size_t output_len, const char *password, size_t password_len, const uint8_t salt[], size_t salt_len, const uint8_t key[], size_t key_len, const uint8_t ad[], size_t ad_len, uint8_t mode, size_t threads, size_t M, size_t t)
T load_le(const uint8_t in[], size_t off)
void copy_mem(T *out, const T *in, size_t n)
std::vector< T, secure_allocator< T > > secure_vector
void clear_mem(T *ptr, size_t n)
const uint8_t * cast_char_ptr_to_uint8(const char *s)