19 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 25 #if (CRYPTOPP_SSE41_AVAILABLE) 26 # include <smmintrin.h> 30 # include <ammintrin.h> 33 #if defined(__AVX512F__) 34 # define CRYPTOPP_AVX512_ROTATE 1 35 # include <immintrin.h> 39 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 42 # include <arm_neon.h> 46 #if (CRYPTOPP_ARM_ACLE_AVAILABLE) 48 # include <arm_acle.h> 51 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 57 extern const char SPECK64_SIMD_FNAME[] = __FILE__;
59 ANONYMOUS_NAMESPACE_BEGIN
62 using CryptoPP::word32;
63 using CryptoPP::word64;
67 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 70 inline T UnpackHigh32(
const T& a,
const T& b)
72 const uint32x2_t x(vget_high_u32((uint32x4_t)a));
73 const uint32x2_t y(vget_high_u32((uint32x4_t)b));
74 const uint32x2x2_t r = vzip_u32(x, y);
75 return (T)vcombine_u32(r.val[0], r.val[1]);
79 inline T UnpackLow32(
const T& a,
const T& b)
81 const uint32x2_t x(vget_low_u32((uint32x4_t)a));
82 const uint32x2_t y(vget_low_u32((uint32x4_t)b));
83 const uint32x2x2_t r = vzip_u32(x, y);
84 return (T)vcombine_u32(r.val[0], r.val[1]);
87 template <
unsigned int R>
88 inline uint32x4_t RotateLeft32(
const uint32x4_t& val)
90 const uint32x4_t a(vshlq_n_u32(val, R));
91 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
92 return vorrq_u32(a, b);
95 template <
unsigned int R>
96 inline uint32x4_t RotateRight32(
const uint32x4_t& val)
98 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
99 const uint32x4_t b(vshrq_n_u32(val, R));
100 return vorrq_u32(a, b);
103 #if defined(__aarch32__) || defined(__aarch64__) 106 inline uint32x4_t RotateLeft32<8>(
const uint32x4_t& val)
108 #if (CRYPTOPP_BIG_ENDIAN) 109 const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
110 const uint8x16_t mask = vld1q_u8(maskb);
112 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
113 const uint8x16_t mask = vld1q_u8(maskb);
116 return vreinterpretq_u32_u8(
117 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
122 inline uint32x4_t RotateRight32<8>(
const uint32x4_t& val)
124 #if (CRYPTOPP_BIG_ENDIAN) 125 const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
126 const uint8x16_t mask = vld1q_u8(maskb);
128 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,15,12 };
129 const uint8x16_t mask = vld1q_u8(maskb);
132 return vreinterpretq_u32_u8(
133 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
135 #endif // Aarch32 or Aarch64 137 inline void SPECK64_Enc_Block(uint32x4_t &block0, uint32x4_t &block1,
138 const word32 *subkeys,
unsigned int rounds)
141 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
142 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
144 for (
int i=0; i < static_cast<int>(rounds); ++i)
146 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
148 x1 = RotateRight32<8>(x1);
149 x1 = vaddq_u32(x1, y1);
150 x1 = veorq_u32(x1, rk);
151 y1 = RotateLeft32<3>(y1);
152 y1 = veorq_u32(y1, x1);
156 block0 = UnpackLow32(y1, x1);
157 block1 = UnpackHigh32(y1, x1);
160 inline void SPECK64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
161 const word32 *subkeys,
unsigned int rounds)
164 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
165 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
167 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
169 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
171 y1 = veorq_u32(y1, x1);
172 y1 = RotateRight32<3>(y1);
173 x1 = veorq_u32(x1, rk);
174 x1 = vsubq_u32(x1, y1);
175 x1 = RotateLeft32<8>(x1);
179 block0 = UnpackLow32(y1, x1);
180 block1 = UnpackHigh32(y1, x1);
183 inline void SPECK64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
184 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
185 const word32 *subkeys,
unsigned int rounds)
188 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
189 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
190 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
191 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
192 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
193 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
195 for (
int i=0; i < static_cast<int>(rounds); ++i)
197 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
199 x1 = RotateRight32<8>(x1);
200 x2 = RotateRight32<8>(x2);
201 x3 = RotateRight32<8>(x3);
202 x1 = vaddq_u32(x1, y1);
203 x2 = vaddq_u32(x2, y2);
204 x3 = vaddq_u32(x3, y3);
205 x1 = veorq_u32(x1, rk);
206 x2 = veorq_u32(x2, rk);
207 x3 = veorq_u32(x3, rk);
208 y1 = RotateLeft32<3>(y1);
209 y2 = RotateLeft32<3>(y2);
210 y3 = RotateLeft32<3>(y3);
211 y1 = veorq_u32(y1, x1);
212 y2 = veorq_u32(y2, x2);
213 y3 = veorq_u32(y3, x3);
217 block0 = UnpackLow32(y1, x1);
218 block1 = UnpackHigh32(y1, x1);
219 block2 = UnpackLow32(y2, x2);
220 block3 = UnpackHigh32(y2, x2);
221 block4 = UnpackLow32(y3, x3);
222 block5 = UnpackHigh32(y3, x3);
225 inline void SPECK64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
226 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
227 const word32 *subkeys,
unsigned int rounds)
230 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
231 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
232 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
233 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
234 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
235 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
237 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
239 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
241 y1 = veorq_u32(y1, x1);
242 y2 = veorq_u32(y2, x2);
243 y3 = veorq_u32(y3, x3);
244 y1 = RotateRight32<3>(y1);
245 y2 = RotateRight32<3>(y2);
246 y3 = RotateRight32<3>(y3);
247 x1 = veorq_u32(x1, rk);
248 x2 = veorq_u32(x2, rk);
249 x3 = veorq_u32(x3, rk);
250 x1 = vsubq_u32(x1, y1);
251 x2 = vsubq_u32(x2, y2);
252 x3 = vsubq_u32(x3, y3);
253 x1 = RotateLeft32<8>(x1);
254 x2 = RotateLeft32<8>(x2);
255 x3 = RotateLeft32<8>(x3);
259 block0 = UnpackLow32(y1, x1);
260 block1 = UnpackHigh32(y1, x1);
261 block2 = UnpackLow32(y2, x2);
262 block3 = UnpackHigh32(y2, x2);
263 block4 = UnpackLow32(y3, x3);
264 block5 = UnpackHigh32(y3, x3);
267 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 271 #if defined(CRYPTOPP_SSE41_AVAILABLE) 273 template <
unsigned int R>
274 inline __m128i RotateLeft32(
const __m128i& val)
277 return _mm_roti_epi32(val, R);
280 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
284 template <
unsigned int R>
285 inline __m128i RotateRight32(
const __m128i& val)
288 return _mm_roti_epi32(val, 32-R);
291 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
297 __m128i RotateLeft32<8>(
const __m128i& val)
300 return _mm_roti_epi32(val, 8);
302 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
303 return _mm_shuffle_epi8(val, mask);
309 __m128i RotateRight32<8>(
const __m128i& val)
312 return _mm_roti_epi32(val, 32-8);
314 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
315 return _mm_shuffle_epi8(val, mask);
319 inline void SPECK64_Enc_Block(__m128i &block0, __m128i &block1,
320 const word32 *subkeys,
unsigned int rounds)
323 const __m128 t0 = _mm_castsi128_ps(block0);
324 const __m128 t1 = _mm_castsi128_ps(block1);
325 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
326 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
328 for (
int i=0; i < static_cast<int>(rounds); ++i)
330 const __m128i rk = _mm_set1_epi32(subkeys[i]);
332 x1 = RotateRight32<8>(x1);
333 x1 = _mm_add_epi32(x1, y1);
334 x1 = _mm_xor_si128(x1, rk);
335 y1 = RotateLeft32<3>(y1);
336 y1 = _mm_xor_si128(y1, x1);
341 block0 = _mm_unpacklo_epi32(y1, x1);
342 block1 = _mm_unpackhi_epi32(y1, x1);
345 inline void SPECK64_Dec_Block(__m128i &block0, __m128i &block1,
346 const word32 *subkeys,
unsigned int rounds)
349 const __m128 t0 = _mm_castsi128_ps(block0);
350 const __m128 t1 = _mm_castsi128_ps(block1);
351 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
352 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
354 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
356 const __m128i rk = _mm_set1_epi32(subkeys[i]);
358 y1 = _mm_xor_si128(y1, x1);
359 y1 = RotateRight32<3>(y1);
360 x1 = _mm_xor_si128(x1, rk);
361 x1 = _mm_sub_epi32(x1, y1);
362 x1 = RotateLeft32<8>(x1);
367 block0 = _mm_unpacklo_epi32(y1, x1);
368 block1 = _mm_unpackhi_epi32(y1, x1);
371 inline void SPECK64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
372 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
373 const word32 *subkeys,
unsigned int rounds)
376 const __m128 t0 = _mm_castsi128_ps(block0);
377 const __m128 t1 = _mm_castsi128_ps(block1);
378 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
379 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
381 const __m128 t2 = _mm_castsi128_ps(block2);
382 const __m128 t3 = _mm_castsi128_ps(block3);
383 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
384 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
386 const __m128 t4 = _mm_castsi128_ps(block4);
387 const __m128 t5 = _mm_castsi128_ps(block5);
388 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
389 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
391 for (
int i=0; i < static_cast<int>(rounds); ++i)
393 const __m128i rk = _mm_set1_epi32(subkeys[i]);
395 x1 = RotateRight32<8>(x1);
396 x2 = RotateRight32<8>(x2);
397 x3 = RotateRight32<8>(x3);
398 x1 = _mm_add_epi32(x1, y1);
399 x2 = _mm_add_epi32(x2, y2);
400 x3 = _mm_add_epi32(x3, y3);
401 x1 = _mm_xor_si128(x1, rk);
402 x2 = _mm_xor_si128(x2, rk);
403 x3 = _mm_xor_si128(x3, rk);
404 y1 = RotateLeft32<3>(y1);
405 y2 = RotateLeft32<3>(y2);
406 y3 = RotateLeft32<3>(y3);
407 y1 = _mm_xor_si128(y1, x1);
408 y2 = _mm_xor_si128(y2, x2);
409 y3 = _mm_xor_si128(y3, x3);
414 block0 = _mm_unpacklo_epi32(y1, x1);
415 block1 = _mm_unpackhi_epi32(y1, x1);
416 block2 = _mm_unpacklo_epi32(y2, x2);
417 block3 = _mm_unpackhi_epi32(y2, x2);
418 block4 = _mm_unpacklo_epi32(y3, x3);
419 block5 = _mm_unpackhi_epi32(y3, x3);
422 inline void SPECK64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
423 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
424 const word32 *subkeys,
unsigned int rounds)
427 const __m128 t0 = _mm_castsi128_ps(block0);
428 const __m128 t1 = _mm_castsi128_ps(block1);
429 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
430 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
432 const __m128 t2 = _mm_castsi128_ps(block2);
433 const __m128 t3 = _mm_castsi128_ps(block3);
434 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
435 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
437 const __m128 t4 = _mm_castsi128_ps(block4);
438 const __m128 t5 = _mm_castsi128_ps(block5);
439 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
440 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
442 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
444 const __m128i rk = _mm_set1_epi32(subkeys[i]);
446 y1 = _mm_xor_si128(y1, x1);
447 y2 = _mm_xor_si128(y2, x2);
448 y3 = _mm_xor_si128(y3, x3);
449 y1 = RotateRight32<3>(y1);
450 y2 = RotateRight32<3>(y2);
451 y3 = RotateRight32<3>(y3);
452 x1 = _mm_xor_si128(x1, rk);
453 x2 = _mm_xor_si128(x2, rk);
454 x3 = _mm_xor_si128(x3, rk);
455 x1 = _mm_sub_epi32(x1, y1);
456 x2 = _mm_sub_epi32(x2, y2);
457 x3 = _mm_sub_epi32(x3, y3);
458 x1 = RotateLeft32<8>(x1);
459 x2 = RotateLeft32<8>(x2);
460 x3 = RotateLeft32<8>(x3);
465 block0 = _mm_unpacklo_epi32(y1, x1);
466 block1 = _mm_unpackhi_epi32(y1, x1);
467 block2 = _mm_unpacklo_epi32(y2, x2);
468 block3 = _mm_unpackhi_epi32(y2, x2);
469 block4 = _mm_unpacklo_epi32(y3, x3);
470 block5 = _mm_unpackhi_epi32(y3, x3);
473 #endif // CRYPTOPP_SSE41_AVAILABLE 477 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 488 template<
unsigned int C>
492 return vec_rl(val, m);
496 template<
unsigned int C>
499 const uint32x4_p m = {32-C, 32-C, 32-C, 32-C};
500 return vec_rl(val, m);
504 const word32 *subkeys,
unsigned int rounds)
506 #if (CRYPTOPP_BIG_ENDIAN) 507 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
508 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
510 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
511 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
518 for (
int i=0; i < static_cast<int>(rounds); ++i)
520 #if CRYPTOPP_POWER7_AVAILABLE 524 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
529 x1 = RotateRight32<8>(x1);
533 y1 = RotateLeft32<3>(y1);
537 #if (CRYPTOPP_BIG_ENDIAN) 538 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
539 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
541 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
542 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
551 const word32 *subkeys,
unsigned int rounds)
553 #if (CRYPTOPP_BIG_ENDIAN) 554 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
555 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
557 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
558 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
565 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
567 #if CRYPTOPP_POWER7_AVAILABLE 571 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
577 y1 = RotateRight32<3>(y1);
581 x1 = RotateLeft32<8>(x1);
584 #if (CRYPTOPP_BIG_ENDIAN) 585 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
586 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
588 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
589 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
599 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
601 #if (CRYPTOPP_BIG_ENDIAN) 602 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
603 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
605 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
606 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
617 for (
int i=0; i < static_cast<int>(rounds); ++i)
619 #if CRYPTOPP_POWER7_AVAILABLE 623 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
628 x1 = RotateRight32<8>(x1);
629 x2 = RotateRight32<8>(x2);
630 x3 = RotateRight32<8>(x3);
640 y1 = RotateLeft32<3>(y1);
641 y2 = RotateLeft32<3>(y2);
642 y3 = RotateLeft32<3>(y3);
649 #if (CRYPTOPP_BIG_ENDIAN) 650 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
651 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
653 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
654 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
668 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
670 #if (CRYPTOPP_BIG_ENDIAN) 671 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
672 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
674 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
675 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
686 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
688 #if CRYPTOPP_POWER7_AVAILABLE 692 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
701 y1 = RotateRight32<3>(y1);
702 y2 = RotateRight32<3>(y2);
703 y3 = RotateRight32<3>(y3);
713 x1 = RotateLeft32<8>(x1);
714 x2 = RotateLeft32<8>(x2);
715 x3 = RotateLeft32<8>(x3);
718 #if (CRYPTOPP_BIG_ENDIAN) 719 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
720 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
722 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
723 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
735 #endif // CRYPTOPP_ALTIVEC_AVAILABLE 737 ANONYMOUS_NAMESPACE_END
745 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 746 size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
747 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
749 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
750 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
753 size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
754 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
756 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
757 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
763 #if defined(CRYPTOPP_SSE41_AVAILABLE) 764 size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
765 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
767 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
768 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
771 size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
772 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
774 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
775 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
781 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 782 size_t SPECK64_Enc_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
783 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
785 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
786 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
789 size_t SPECK64_Dec_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
790 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
792 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
793 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Utility functions for the Crypto++ library.
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
Library configuration file.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
Template for AdvancedProcessBlocks and SIMD processing.
Classes for the Speck block cipher.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
Crypto++ library namespace.
uint32x4_p VecLoad(const byte src[16])
Loads a vector from a byte array.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.