20 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 25 #if (CRYPTOPP_SSE41_AVAILABLE) 26 # include <smmintrin.h> 30 # include <ammintrin.h> 33 #if defined(__AVX512F__) && defined(__AVX512VL__) 34 # define CRYPTOPP_AVX512_ROTATE 1 35 # include <immintrin.h> 38 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 39 # include <arm_neon.h> 44 #if (CRYPTOPP_ARM_ACLE_AVAILABLE) 46 # include <arm_acle.h> 49 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 53 #ifndef CRYPTOPP_INLINE 54 # if defined(CRYPTOPP_DEBUG) 55 # define CRYPTOPP_INLINE static 57 # define CRYPTOPP_INLINE inline 62 extern const char SPECK64_SIMD_FNAME[] = __FILE__;
64 ANONYMOUS_NAMESPACE_BEGIN
67 using CryptoPP::word32;
68 using CryptoPP::word64;
72 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 75 CRYPTOPP_INLINE T UnpackHigh32(
const T& a,
const T& b)
77 const uint32x2_t x(vget_high_u32((uint32x4_t)a));
78 const uint32x2_t y(vget_high_u32((uint32x4_t)b));
79 const uint32x2x2_t r = vzip_u32(x, y);
80 return (T)vcombine_u32(r.val[0], r.val[1]);
84 CRYPTOPP_INLINE T UnpackLow32(
const T& a,
const T& b)
86 const uint32x2_t x(vget_low_u32((uint32x4_t)a));
87 const uint32x2_t y(vget_low_u32((uint32x4_t)b));
88 const uint32x2x2_t r = vzip_u32(x, y);
89 return (T)vcombine_u32(r.val[0], r.val[1]);
92 template <
unsigned int R>
93 CRYPTOPP_INLINE uint32x4_t RotateLeft32(
const uint32x4_t& val)
95 const uint32x4_t a(vshlq_n_u32(val, R));
96 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
97 return vorrq_u32(a, b);
100 template <
unsigned int R>
101 CRYPTOPP_INLINE uint32x4_t RotateRight32(
const uint32x4_t& val)
103 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
104 const uint32x4_t b(vshrq_n_u32(val, R));
105 return vorrq_u32(a, b);
108 #if defined(__aarch32__) || defined(__aarch64__) 111 CRYPTOPP_INLINE uint32x4_t RotateLeft32<8>(
const uint32x4_t& val)
113 #if (CRYPTOPP_BIG_ENDIAN) 114 const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
115 const uint8x16_t mask = vld1q_u8(maskb);
117 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
118 const uint8x16_t mask = vld1q_u8(maskb);
121 return vreinterpretq_u32_u8(
122 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
127 CRYPTOPP_INLINE uint32x4_t RotateRight32<8>(
const uint32x4_t& val)
129 #if (CRYPTOPP_BIG_ENDIAN) 130 const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
131 const uint8x16_t mask = vld1q_u8(maskb);
133 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,15,12 };
134 const uint8x16_t mask = vld1q_u8(maskb);
137 return vreinterpretq_u32_u8(
138 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
140 #endif // Aarch32 or Aarch64 142 CRYPTOPP_INLINE
void SPECK64_Enc_Block(uint32x4_t &block0, uint32x4_t &block1,
143 const word32 *subkeys,
unsigned int rounds)
146 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
147 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
149 for (
int i=0; i < static_cast<int>(rounds); ++i)
151 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
153 x1 = RotateRight32<8>(x1);
154 x1 = vaddq_u32(x1, y1);
155 x1 = veorq_u32(x1, rk);
156 y1 = RotateLeft32<3>(y1);
157 y1 = veorq_u32(y1, x1);
161 block0 = UnpackLow32(y1, x1);
162 block1 = UnpackHigh32(y1, x1);
165 CRYPTOPP_INLINE
void SPECK64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
166 const word32 *subkeys,
unsigned int rounds)
169 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
170 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
172 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
174 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
176 y1 = veorq_u32(y1, x1);
177 y1 = RotateRight32<3>(y1);
178 x1 = veorq_u32(x1, rk);
179 x1 = vsubq_u32(x1, y1);
180 x1 = RotateLeft32<8>(x1);
184 block0 = UnpackLow32(y1, x1);
185 block1 = UnpackHigh32(y1, x1);
188 CRYPTOPP_INLINE
void SPECK64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
189 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
190 const word32 *subkeys,
unsigned int rounds)
193 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
194 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
195 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
196 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
197 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
198 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
200 for (
int i=0; i < static_cast<int>(rounds); ++i)
202 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
204 x1 = RotateRight32<8>(x1);
205 x2 = RotateRight32<8>(x2);
206 x3 = RotateRight32<8>(x3);
207 x1 = vaddq_u32(x1, y1);
208 x2 = vaddq_u32(x2, y2);
209 x3 = vaddq_u32(x3, y3);
210 x1 = veorq_u32(x1, rk);
211 x2 = veorq_u32(x2, rk);
212 x3 = veorq_u32(x3, rk);
213 y1 = RotateLeft32<3>(y1);
214 y2 = RotateLeft32<3>(y2);
215 y3 = RotateLeft32<3>(y3);
216 y1 = veorq_u32(y1, x1);
217 y2 = veorq_u32(y2, x2);
218 y3 = veorq_u32(y3, x3);
222 block0 = UnpackLow32(y1, x1);
223 block1 = UnpackHigh32(y1, x1);
224 block2 = UnpackLow32(y2, x2);
225 block3 = UnpackHigh32(y2, x2);
226 block4 = UnpackLow32(y3, x3);
227 block5 = UnpackHigh32(y3, x3);
230 CRYPTOPP_INLINE
void SPECK64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
231 uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
232 const word32 *subkeys,
unsigned int rounds)
235 uint32x4_t x1 = vuzpq_u32(block0, block1).val[1];
236 uint32x4_t y1 = vuzpq_u32(block0, block1).val[0];
237 uint32x4_t x2 = vuzpq_u32(block2, block3).val[1];
238 uint32x4_t y2 = vuzpq_u32(block2, block3).val[0];
239 uint32x4_t x3 = vuzpq_u32(block4, block5).val[1];
240 uint32x4_t y3 = vuzpq_u32(block4, block5).val[0];
242 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
244 const uint32x4_t rk = vdupq_n_u32(subkeys[i]);
246 y1 = veorq_u32(y1, x1);
247 y2 = veorq_u32(y2, x2);
248 y3 = veorq_u32(y3, x3);
249 y1 = RotateRight32<3>(y1);
250 y2 = RotateRight32<3>(y2);
251 y3 = RotateRight32<3>(y3);
252 x1 = veorq_u32(x1, rk);
253 x2 = veorq_u32(x2, rk);
254 x3 = veorq_u32(x3, rk);
255 x1 = vsubq_u32(x1, y1);
256 x2 = vsubq_u32(x2, y2);
257 x3 = vsubq_u32(x3, y3);
258 x1 = RotateLeft32<8>(x1);
259 x2 = RotateLeft32<8>(x2);
260 x3 = RotateLeft32<8>(x3);
264 block0 = UnpackLow32(y1, x1);
265 block1 = UnpackHigh32(y1, x1);
266 block2 = UnpackLow32(y2, x2);
267 block3 = UnpackHigh32(y2, x2);
268 block4 = UnpackLow32(y3, x3);
269 block5 = UnpackHigh32(y3, x3);
272 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 276 #if defined(CRYPTOPP_SSE41_AVAILABLE) 278 template <
unsigned int R>
279 CRYPTOPP_INLINE __m128i RotateLeft32(
const __m128i& val)
282 return _mm_roti_epi32(val, R);
285 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
289 template <
unsigned int R>
290 CRYPTOPP_INLINE __m128i RotateRight32(
const __m128i& val)
293 return _mm_roti_epi32(val, 32-R);
296 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
302 __m128i RotateLeft32<8>(
const __m128i& val)
305 return _mm_roti_epi32(val, 8);
307 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
308 return _mm_shuffle_epi8(val, mask);
314 __m128i RotateRight32<8>(
const __m128i& val)
317 return _mm_roti_epi32(val, 32-8);
319 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
320 return _mm_shuffle_epi8(val, mask);
324 CRYPTOPP_INLINE
void SPECK64_Enc_Block(__m128i &block0, __m128i &block1,
325 const word32 *subkeys,
unsigned int rounds)
328 const __m128 t0 = _mm_castsi128_ps(block0);
329 const __m128 t1 = _mm_castsi128_ps(block1);
330 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
331 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
333 for (
int i=0; i < static_cast<int>(rounds); ++i)
335 const __m128i rk = _mm_set1_epi32(subkeys[i]);
337 x1 = RotateRight32<8>(x1);
338 x1 = _mm_add_epi32(x1, y1);
339 x1 = _mm_xor_si128(x1, rk);
340 y1 = RotateLeft32<3>(y1);
341 y1 = _mm_xor_si128(y1, x1);
346 block0 = _mm_unpacklo_epi32(y1, x1);
347 block1 = _mm_unpackhi_epi32(y1, x1);
350 CRYPTOPP_INLINE
void SPECK64_Dec_Block(__m128i &block0, __m128i &block1,
351 const word32 *subkeys,
unsigned int rounds)
354 const __m128 t0 = _mm_castsi128_ps(block0);
355 const __m128 t1 = _mm_castsi128_ps(block1);
356 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
357 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
359 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
361 const __m128i rk = _mm_set1_epi32(subkeys[i]);
363 y1 = _mm_xor_si128(y1, x1);
364 y1 = RotateRight32<3>(y1);
365 x1 = _mm_xor_si128(x1, rk);
366 x1 = _mm_sub_epi32(x1, y1);
367 x1 = RotateLeft32<8>(x1);
372 block0 = _mm_unpacklo_epi32(y1, x1);
373 block1 = _mm_unpackhi_epi32(y1, x1);
376 CRYPTOPP_INLINE
void SPECK64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
377 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
378 const word32 *subkeys,
unsigned int rounds)
381 const __m128 t0 = _mm_castsi128_ps(block0);
382 const __m128 t1 = _mm_castsi128_ps(block1);
383 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
384 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
386 const __m128 t2 = _mm_castsi128_ps(block2);
387 const __m128 t3 = _mm_castsi128_ps(block3);
388 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
389 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
391 const __m128 t4 = _mm_castsi128_ps(block4);
392 const __m128 t5 = _mm_castsi128_ps(block5);
393 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
394 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
396 for (
int i=0; i < static_cast<int>(rounds); ++i)
398 const __m128i rk = _mm_set1_epi32(subkeys[i]);
400 x1 = RotateRight32<8>(x1);
401 x2 = RotateRight32<8>(x2);
402 x3 = RotateRight32<8>(x3);
403 x1 = _mm_add_epi32(x1, y1);
404 x2 = _mm_add_epi32(x2, y2);
405 x3 = _mm_add_epi32(x3, y3);
406 x1 = _mm_xor_si128(x1, rk);
407 x2 = _mm_xor_si128(x2, rk);
408 x3 = _mm_xor_si128(x3, rk);
409 y1 = RotateLeft32<3>(y1);
410 y2 = RotateLeft32<3>(y2);
411 y3 = RotateLeft32<3>(y3);
412 y1 = _mm_xor_si128(y1, x1);
413 y2 = _mm_xor_si128(y2, x2);
414 y3 = _mm_xor_si128(y3, x3);
419 block0 = _mm_unpacklo_epi32(y1, x1);
420 block1 = _mm_unpackhi_epi32(y1, x1);
421 block2 = _mm_unpacklo_epi32(y2, x2);
422 block3 = _mm_unpackhi_epi32(y2, x2);
423 block4 = _mm_unpacklo_epi32(y3, x3);
424 block5 = _mm_unpackhi_epi32(y3, x3);
427 CRYPTOPP_INLINE
void SPECK64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
428 __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
429 const word32 *subkeys,
unsigned int rounds)
432 const __m128 t0 = _mm_castsi128_ps(block0);
433 const __m128 t1 = _mm_castsi128_ps(block1);
434 __m128i x1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(3,1,3,1)));
435 __m128i y1 = _mm_castps_si128(_mm_shuffle_ps(t0, t1, _MM_SHUFFLE(2,0,2,0)));
437 const __m128 t2 = _mm_castsi128_ps(block2);
438 const __m128 t3 = _mm_castsi128_ps(block3);
439 __m128i x2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(3,1,3,1)));
440 __m128i y2 = _mm_castps_si128(_mm_shuffle_ps(t2, t3, _MM_SHUFFLE(2,0,2,0)));
442 const __m128 t4 = _mm_castsi128_ps(block4);
443 const __m128 t5 = _mm_castsi128_ps(block5);
444 __m128i x3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(3,1,3,1)));
445 __m128i y3 = _mm_castps_si128(_mm_shuffle_ps(t4, t5, _MM_SHUFFLE(2,0,2,0)));
447 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
449 const __m128i rk = _mm_set1_epi32(subkeys[i]);
451 y1 = _mm_xor_si128(y1, x1);
452 y2 = _mm_xor_si128(y2, x2);
453 y3 = _mm_xor_si128(y3, x3);
454 y1 = RotateRight32<3>(y1);
455 y2 = RotateRight32<3>(y2);
456 y3 = RotateRight32<3>(y3);
457 x1 = _mm_xor_si128(x1, rk);
458 x2 = _mm_xor_si128(x2, rk);
459 x3 = _mm_xor_si128(x3, rk);
460 x1 = _mm_sub_epi32(x1, y1);
461 x2 = _mm_sub_epi32(x2, y2);
462 x3 = _mm_sub_epi32(x3, y3);
463 x1 = RotateLeft32<8>(x1);
464 x2 = RotateLeft32<8>(x2);
465 x3 = RotateLeft32<8>(x3);
470 block0 = _mm_unpacklo_epi32(y1, x1);
471 block1 = _mm_unpackhi_epi32(y1, x1);
472 block2 = _mm_unpacklo_epi32(y2, x2);
473 block3 = _mm_unpackhi_epi32(y2, x2);
474 block4 = _mm_unpacklo_epi32(y3, x3);
475 block5 = _mm_unpackhi_epi32(y3, x3);
478 #endif // CRYPTOPP_SSE41_AVAILABLE 482 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 493 template<
unsigned int C>
497 return vec_rl(val, m);
501 template<
unsigned int C>
504 const uint32x4_p m = {32-C, 32-C, 32-C, 32-C};
505 return vec_rl(val, m);
509 const word32 *subkeys,
unsigned int rounds)
511 #if (CRYPTOPP_BIG_ENDIAN) 512 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
513 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
515 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
516 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
523 for (
int i=0; i < static_cast<int>(rounds); ++i)
525 #if CRYPTOPP_POWER7_AVAILABLE 529 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
534 x1 = RotateRight32<8>(x1);
538 y1 = RotateLeft32<3>(y1);
542 #if (CRYPTOPP_BIG_ENDIAN) 543 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
544 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
546 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
547 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
556 const word32 *subkeys,
unsigned int rounds)
558 #if (CRYPTOPP_BIG_ENDIAN) 559 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
560 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
562 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
563 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
570 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
572 #if CRYPTOPP_POWER7_AVAILABLE 576 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
582 y1 = RotateRight32<3>(y1);
586 x1 = RotateLeft32<8>(x1);
589 #if (CRYPTOPP_BIG_ENDIAN) 590 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
591 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
593 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
594 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
604 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
606 #if (CRYPTOPP_BIG_ENDIAN) 607 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
608 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
610 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
611 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
622 for (
int i=0; i < static_cast<int>(rounds); ++i)
624 #if CRYPTOPP_POWER7_AVAILABLE 628 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
633 x1 = RotateRight32<8>(x1);
634 x2 = RotateRight32<8>(x2);
635 x3 = RotateRight32<8>(x3);
645 y1 = RotateLeft32<3>(y1);
646 y2 = RotateLeft32<3>(y2);
647 y3 = RotateLeft32<3>(y3);
654 #if (CRYPTOPP_BIG_ENDIAN) 655 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
656 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
658 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
659 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
673 uint32x4_p &block5,
const word32 *subkeys,
unsigned int rounds)
675 #if (CRYPTOPP_BIG_ENDIAN) 676 const uint8x16_p m1 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
677 const uint8x16_p m2 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
679 const uint8x16_p m1 = {3,2,1,0, 11,10,9,8, 19,18,17,16, 27,26,25,24};
680 const uint8x16_p m2 = {7,6,5,4, 15,14,13,12, 23,22,21,20, 31,30,29,28};
691 for (
int i = static_cast<int>(rounds-1); i >= 0; --i)
693 #if CRYPTOPP_POWER7_AVAILABLE 697 const uint8x16_p m = {0,1,2,3, 0,1,2,3, 0,1,2,3, 0,1,2,3};
706 y1 = RotateRight32<3>(y1);
707 y2 = RotateRight32<3>(y2);
708 y3 = RotateRight32<3>(y3);
718 x1 = RotateLeft32<8>(x1);
719 x2 = RotateLeft32<8>(x2);
720 x3 = RotateLeft32<8>(x3);
723 #if (CRYPTOPP_BIG_ENDIAN) 724 const uint8x16_p m3 = {19,18,17,16, 3,2,1,0, 23,22,21,20, 7,6,5,4};
725 const uint8x16_p m4 = {27,26,25,24, 11,10,9,8, 31,30,29,28, 15,14,13,12};
727 const uint8x16_p m3 = {3,2,1,0, 19,18,17,16, 7,6,5,4, 23,22,21,20};
728 const uint8x16_p m4 = {11,10,9,8, 27,26,25,24, 15,14,13,12, 31,30,29,28};
740 #endif // CRYPTOPP_ALTIVEC_AVAILABLE 742 ANONYMOUS_NAMESPACE_END
750 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 751 size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
752 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
754 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
755 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
758 size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
759 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
761 return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
762 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
768 #if defined(CRYPTOPP_SSE41_AVAILABLE) 769 size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
770 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
772 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
773 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
776 size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(
const word32* subKeys,
size_t rounds,
777 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
779 return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
780 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
786 #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) 787 size_t SPECK64_Enc_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
788 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
790 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
791 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
794 size_t SPECK64_Dec_AdvancedProcessBlocks_ALTIVEC(
const word32* subKeys,
size_t rounds,
795 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
797 return AdvancedProcessBlocks64_6x2_ALTIVEC(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
798 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
Utility functions for the Crypto++ library.
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
Library configuration file.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
Template for AdvancedProcessBlocks and SIMD processing.
Classes for the Speck block cipher.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
Crypto++ library namespace.
uint32x4_p VecLoad(const byte src[16])
Loads a vector from a byte array.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.