Botan 2.19.3
Crypto and TLS for C&
shacal2_simd.cpp
Go to the documentation of this file.
1/*
2* SHACAL-2 using SIMD
3* (C) 2017 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/shacal2.h>
9#include <botan/internal/simd_32.h>
10
11namespace Botan {
12
13namespace {
14
15inline
16void SHACAL2_Fwd(const SIMD_4x32& A, const SIMD_4x32& B, const SIMD_4x32& C, SIMD_4x32& D,
17 const SIMD_4x32& E, const SIMD_4x32& F, const SIMD_4x32& G, SIMD_4x32& H,
18 uint32_t RK)
19 {
20 H += E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_4x32::splat(RK);
21 D += H;
22 H += A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
23 }
24
25inline
26void SHACAL2_Rev(const SIMD_4x32& A, const SIMD_4x32& B, const SIMD_4x32& C, SIMD_4x32& D,
27 const SIMD_4x32& E, const SIMD_4x32& F, const SIMD_4x32& G, SIMD_4x32& H,
28 uint32_t RK)
29 {
30 H -= A.rho<2,13,22>() + ((A & B) | ((A | B) & C));
31 D -= H;
32 H -= E.rho<6,11,25>() + ((E & F) ^ (~E & G)) + SIMD_4x32::splat(RK);
33 }
34
35}
36
37void SHACAL2::simd_encrypt_4(const uint8_t in[], uint8_t out[]) const
38 {
39 SIMD_4x32 A = SIMD_4x32::load_be(in);
40 SIMD_4x32 E = SIMD_4x32::load_be(in+16);
41 SIMD_4x32 B = SIMD_4x32::load_be(in+32);
42 SIMD_4x32 F = SIMD_4x32::load_be(in+48);
43
44 SIMD_4x32 C = SIMD_4x32::load_be(in+64);
45 SIMD_4x32 G = SIMD_4x32::load_be(in+80);
46 SIMD_4x32 D = SIMD_4x32::load_be(in+96);
47 SIMD_4x32 H = SIMD_4x32::load_be(in+112);
48
49 SIMD_4x32::transpose(A, B, C, D);
50 SIMD_4x32::transpose(E, F, G, H);
51
52 for(size_t r = 0; r != 64; r += 8)
53 {
54 SHACAL2_Fwd(A, B, C, D, E, F, G, H, m_RK[r+0]);
55 SHACAL2_Fwd(H, A, B, C, D, E, F, G, m_RK[r+1]);
56 SHACAL2_Fwd(G, H, A, B, C, D, E, F, m_RK[r+2]);
57 SHACAL2_Fwd(F, G, H, A, B, C, D, E, m_RK[r+3]);
58 SHACAL2_Fwd(E, F, G, H, A, B, C, D, m_RK[r+4]);
59 SHACAL2_Fwd(D, E, F, G, H, A, B, C, m_RK[r+5]);
60 SHACAL2_Fwd(C, D, E, F, G, H, A, B, m_RK[r+6]);
61 SHACAL2_Fwd(B, C, D, E, F, G, H, A, m_RK[r+7]);
62 }
63
64 SIMD_4x32::transpose(A, B, C, D);
65 SIMD_4x32::transpose(E, F, G, H);
66
67 A.store_be(out);
68 E.store_be(out+16);
69 B.store_be(out+32);
70 F.store_be(out+48);
71
72 C.store_be(out+64);
73 G.store_be(out+80);
74 D.store_be(out+96);
75 H.store_be(out+112);
76 }
77
78void SHACAL2::simd_decrypt_4(const uint8_t in[], uint8_t out[]) const
79 {
80 SIMD_4x32 A = SIMD_4x32::load_be(in);
81 SIMD_4x32 E = SIMD_4x32::load_be(in+16);
82 SIMD_4x32 B = SIMD_4x32::load_be(in+32);
83 SIMD_4x32 F = SIMD_4x32::load_be(in+48);
84
85 SIMD_4x32 C = SIMD_4x32::load_be(in+64);
86 SIMD_4x32 G = SIMD_4x32::load_be(in+80);
87 SIMD_4x32 D = SIMD_4x32::load_be(in+96);
88 SIMD_4x32 H = SIMD_4x32::load_be(in+112);
89
90 SIMD_4x32::transpose(A, B, C, D);
91 SIMD_4x32::transpose(E, F, G, H);
92
93 for(size_t r = 0; r != 64; r += 8)
94 {
95 SHACAL2_Rev(B, C, D, E, F, G, H, A, m_RK[63-r]);
96 SHACAL2_Rev(C, D, E, F, G, H, A, B, m_RK[62-r]);
97 SHACAL2_Rev(D, E, F, G, H, A, B, C, m_RK[61-r]);
98 SHACAL2_Rev(E, F, G, H, A, B, C, D, m_RK[60-r]);
99 SHACAL2_Rev(F, G, H, A, B, C, D, E, m_RK[59-r]);
100 SHACAL2_Rev(G, H, A, B, C, D, E, F, m_RK[58-r]);
101 SHACAL2_Rev(H, A, B, C, D, E, F, G, m_RK[57-r]);
102 SHACAL2_Rev(A, B, C, D, E, F, G, H, m_RK[56-r]);
103 }
104
105 SIMD_4x32::transpose(A, B, C, D);
106 SIMD_4x32::transpose(E, F, G, H);
107
108 A.store_be(out);
109 E.store_be(out+16);
110 B.store_be(out+32);
111 F.store_be(out+48);
112
113 C.store_be(out+64);
114 G.store_be(out+80);
115 D.store_be(out+96);
116 H.store_be(out+112);
117 }
118
119}
static SIMD_4x32 splat(uint32_t B)
Definition simd_32.h:131
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3)
Definition simd_32.h:564
static SIMD_4x32 load_be(const void *in)
Definition simd_32.h:177