Botan 2.19.3
Crypto and TLS for C&
zfec_sse2.cpp
Go to the documentation of this file.
1/*
2* (C) 2009,2010,2021 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/zfec.h>
8#include <botan/internal/simd_32.h>
9#include <immintrin.h>
10
11namespace Botan {
12
13namespace {
14
15inline SIMD_4x32 rshift_1_u8(const SIMD_4x32& v)
16 {
17 return SIMD_4x32(_mm_add_epi8(v.raw(), v.raw()));
18 }
19
20inline SIMD_4x32 high_bit_set_u8(const SIMD_4x32& v)
21 {
22 return SIMD_4x32(_mm_cmpgt_epi8(_mm_setzero_si128(), v.raw()));
23 }
24
25}
26
27BOTAN_FUNC_ISA("sse2")
28size_t ZFEC::addmul_sse2(uint8_t z[], const uint8_t x[], uint8_t y, size_t size)
29 {
30 const SIMD_4x32 polynomial = SIMD_4x32::splat_u8(0x1D);
31
32 const size_t orig_size = size;
33
34 // unrolled out to cache line size
35 while(size >= 64)
36 {
37 SIMD_4x32 x_1 = SIMD_4x32::load_le(x);
38 SIMD_4x32 x_2 = SIMD_4x32::load_le(x + 16);
39 SIMD_4x32 x_3 = SIMD_4x32::load_le(x + 32);
40 SIMD_4x32 x_4 = SIMD_4x32::load_le(x + 48);
41
42 SIMD_4x32 z_1 = SIMD_4x32::load_le(z);
43 SIMD_4x32 z_2 = SIMD_4x32::load_le(z + 16);
44 SIMD_4x32 z_3 = SIMD_4x32::load_le(z + 32);
45 SIMD_4x32 z_4 = SIMD_4x32::load_le(z + 48);
46
47 if(y & 0x01)
48 {
49 z_1 ^= x_1;
50 z_2 ^= x_2;
51 z_3 ^= x_3;
52 z_4 ^= x_4;
53 }
54
55 for(size_t j = 1; j != 8; ++j)
56 {
57 /*
58 * Each byte of each mask is either 0 or the polynomial 0x1D,
59 * depending on if the high bit of x_i is set or not.
60 */
61
62 const SIMD_4x32 mask_1(high_bit_set_u8(x_1));
63 const SIMD_4x32 mask_2(high_bit_set_u8(x_2));
64 const SIMD_4x32 mask_3(high_bit_set_u8(x_3));
65 const SIMD_4x32 mask_4(high_bit_set_u8(x_4));
66
67 // x <<= 1
68 x_1 = rshift_1_u8(x_1);
69 x_2 = rshift_1_u8(x_2);
70 x_3 = rshift_1_u8(x_3);
71 x_4 = rshift_1_u8(x_4);
72
73 x_1 ^= mask_1 & polynomial;
74 x_2 ^= mask_2 & polynomial;
75 x_3 ^= mask_3 & polynomial;
76 x_4 ^= mask_4 & polynomial;
77
78 if((y >> j) & 1)
79 {
80 z_1 ^= x_1;
81 z_2 ^= x_2;
82 z_3 ^= x_3;
83 z_4 ^= x_4;
84 }
85 }
86
87 z_1.store_le(z);
88 z_2.store_le(z + 16);
89 z_3.store_le(z + 32);
90 z_4.store_le(z + 48);
91
92 x += 64;
93 z += 64;
94 size -= 64;
95 }
96
97 return orig_size - size;
98 }
99
100}
static SIMD_4x32 splat_u8(uint8_t B)
Definition simd_32.h:145
static SIMD_4x32 load_le(const void *in)
Definition simd_32.h:160
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:77