19 #if (CRYPTOPP_SSSE3_AVAILABLE) 21 # include <pmmintrin.h> 22 # include <tmmintrin.h> 26 # include <ammintrin.h> 27 # if defined(__GNUC__) 28 # include <x86intrin.h> 32 #if defined(__AVX512F__) 33 # define CRYPTOPP_AVX512_ROTATE 1 34 # include <immintrin.h> 37 #if (CRYPTOPP_ARM_NEON_HEADER) 39 # include <arm_neon.h> 42 #if (CRYPTOPP_ARM_ACLE_HEADER) 44 # include <arm_acle.h> 65 #undef CRYPTOPP_POWER8_AVAILABLE 66 #if defined(CRYPTOPP_POWER8_AVAILABLE) 72 extern const char LEA_SIMD_FNAME[] = __FILE__;
74 ANONYMOUS_NAMESPACE_BEGIN
76 using CryptoPP::word32;
80 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 82 inline uint32x4_t Xor(
const uint32x4_t& a,
const uint32x4_t& b)
84 return veorq_u32(a, b);
87 inline uint32x4_t Add(
const uint32x4_t& a,
const uint32x4_t& b)
89 return vaddq_u32(a, b);
92 inline uint32x4_t Sub(
const uint32x4_t& a,
const uint32x4_t& b)
94 return vsubq_u32(a, b);
97 template <
unsigned int R>
98 inline uint32x4_t RotateLeft(
const uint32x4_t& val)
100 const uint32x4_t a(vshlq_n_u32(val, R));
101 const uint32x4_t b(vshrq_n_u32(val, 32 - R));
102 return vorrq_u32(a, b);
105 template <
unsigned int R>
106 inline uint32x4_t RotateRight(
const uint32x4_t& val)
108 const uint32x4_t a(vshlq_n_u32(val, 32 - R));
109 const uint32x4_t b(vshrq_n_u32(val, R));
110 return vorrq_u32(a, b);
113 #if defined(__aarch32__) || defined(__aarch64__) 115 inline uint32x4_t RotateLeft<8>(
const uint32x4_t& val)
117 #if (CRYPTOPP_BIG_ENDIAN) 118 const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
119 const uint8x16_t mask = vld1q_u8(maskb);
121 const uint8_t maskb[16] = { 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 };
122 const uint8x16_t mask = vld1q_u8(maskb);
125 return vreinterpretq_u32_u8(
126 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
130 inline uint32x4_t RotateRight<8>(
const uint32x4_t& val)
132 #if (CRYPTOPP_BIG_ENDIAN) 133 const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
134 const uint8x16_t mask = vld1q_u8(maskb);
136 const uint8_t maskb[16] = { 1,2,3,0, 5,6,7,4, 9,10,11,8, 13,14,14,12 };
137 const uint8x16_t mask = vld1q_u8(maskb);
140 return vreinterpretq_u32_u8(
141 vqtbl1q_u8(vreinterpretq_u8_u32(val), mask));
145 uint32x4_t UnpackLow32(uint32x4_t a, uint32x4_t b)
147 uint32x2_t a1 = vget_low_u32(a);
148 uint32x2_t b1 = vget_low_u32(b);
149 uint32x2x2_t result = vzip_u32(a1, b1);
150 return vcombine_u32(result.val[0], result.val[1]);
153 uint32x4_t UnpackHigh32(uint32x4_t a, uint32x4_t b)
155 uint32x2_t a1 = vget_high_u32(a);
156 uint32x2_t b1 = vget_high_u32(b);
157 uint32x2x2_t result = vzip_u32(a1, b1);
158 return vcombine_u32(result.val[0], result.val[1]);
161 uint32x4_t UnpackLow64(uint32x4_t a, uint32x4_t b)
163 uint64x1_t a1 = vget_low_u64((uint64x2_t)a);
164 uint64x1_t b1 = vget_low_u64((uint64x2_t)b);
165 return (uint32x4_t)vcombine_u64(a1, b1);
168 uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b)
170 uint64x1_t a1 = vget_high_u64((uint64x2_t)a);
171 uint64x1_t b1 = vget_high_u64((uint64x2_t)b);
172 return (uint32x4_t)vcombine_u64(a1, b1);
175 template <
unsigned int IDX>
176 inline uint32x4_t LoadKey(
const word32 rkey[])
178 return vdupq_n_u32(rkey[IDX]);
181 template <
unsigned int IDX>
182 inline uint32x4_t UnpackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
186 return vmovq_n_u32(0);
190 inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
192 const uint32x4_t r1 = UnpackLow32(a, b);
193 const uint32x4_t r2 = UnpackLow32(c, d);
194 return UnpackLow64(r1, r2);
198 inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
200 const uint32x4_t r1 = UnpackLow32(a, b);
201 const uint32x4_t r2 = UnpackLow32(c, d);
202 return UnpackHigh64(r1, r2);
206 inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
208 const uint32x4_t r1 = UnpackHigh32(a, b);
209 const uint32x4_t r2 = UnpackHigh32(c, d);
210 return UnpackLow64(r1, r2);
214 inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
216 const uint32x4_t r1 = UnpackHigh32(a, b);
217 const uint32x4_t r2 = UnpackHigh32(c, d);
218 return UnpackHigh64(r1, r2);
221 template <
unsigned int IDX>
222 inline uint32x4_t UnpackNEON(
const uint32x4_t& v)
226 return vmovq_n_u32(0);
230 inline uint32x4_t UnpackNEON<0>(
const uint32x4_t& v)
233 return vdupq_n_u32(vgetq_lane_u32(v, 0));
237 inline uint32x4_t UnpackNEON<1>(
const uint32x4_t& v)
240 return vdupq_n_u32(vgetq_lane_u32(v, 1));
244 inline uint32x4_t UnpackNEON<2>(
const uint32x4_t& v)
247 return vdupq_n_u32(vgetq_lane_u32(v, 2));
251 inline uint32x4_t UnpackNEON<3>(
const uint32x4_t& v)
254 return vdupq_n_u32(vgetq_lane_u32(v, 3));
257 template <
unsigned int IDX>
258 inline uint32x4_t RepackNEON(
const uint32x4_t& a,
const uint32x4_t& b,
const uint32x4_t& c,
const uint32x4_t& d)
260 return UnpackNEON<IDX>(a, b, c, d);
263 template <
unsigned int IDX>
264 inline uint32x4_t RepackNEON(
const uint32x4_t& v)
266 return UnpackNEON<IDX>(v);
269 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 273 #if (CRYPTOPP_SSSE3_AVAILABLE) 275 inline __m128i Xor(
const __m128i& a,
const __m128i& b)
277 return _mm_xor_si128(a, b);
280 inline __m128i Add(
const __m128i& a,
const __m128i& b)
282 return _mm_add_epi32(a, b);
285 inline __m128i Sub(
const __m128i& a,
const __m128i& b)
287 return _mm_sub_epi32(a, b);
290 template <
unsigned int R>
291 inline __m128i RotateLeft(
const __m128i& val)
294 return _mm_roti_epi32(val, R);
297 _mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
301 template <
unsigned int R>
302 inline __m128i RotateRight(
const __m128i& val)
305 return _mm_roti_epi32(val, 32-R);
308 _mm_slli_epi32(val, 32-R), _mm_srli_epi32(val, R));
314 inline __m128i RotateLeft<8>(
const __m128i& val)
317 return _mm_roti_epi32(val, 8);
319 const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
320 return _mm_shuffle_epi8(val, mask);
326 inline __m128i RotateRight<8>(
const __m128i& val)
329 return _mm_roti_epi32(val, 32-8);
331 const __m128i mask = _mm_set_epi8(12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1);
332 return _mm_shuffle_epi8(val, mask);
336 template <
unsigned int IDX>
337 inline __m128i LoadKey(
const word32 rkey[])
339 float rk; std::memcpy(&rk, rkey+IDX,
sizeof(rk));
340 return _mm_castps_si128(_mm_load_ps1(&rk));
343 template <
unsigned int IDX>
344 inline __m128i UnpackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
347 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
348 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
350 return _mm_setzero_si128();
354 inline __m128i UnpackXMM<0>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
357 const __m128i r1 = _mm_unpacklo_epi32(a, b);
358 const __m128i r2 = _mm_unpacklo_epi32(c, d);
359 return _mm_unpacklo_epi64(r1, r2);
363 inline __m128i UnpackXMM<1>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
366 const __m128i r1 = _mm_unpacklo_epi32(a, b);
367 const __m128i r2 = _mm_unpacklo_epi32(c, d);
368 return _mm_unpackhi_epi64(r1, r2);
372 inline __m128i UnpackXMM<2>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
375 const __m128i r1 = _mm_unpackhi_epi32(a, b);
376 const __m128i r2 = _mm_unpackhi_epi32(c, d);
377 return _mm_unpacklo_epi64(r1, r2);
381 inline __m128i UnpackXMM<3>(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
384 const __m128i r1 = _mm_unpackhi_epi32(a, b);
385 const __m128i r2 = _mm_unpackhi_epi32(c, d);
386 return _mm_unpackhi_epi64(r1, r2);
389 template <
unsigned int IDX>
390 inline __m128i UnpackXMM(
const __m128i& v)
394 return _mm_setzero_si128();
398 inline __m128i UnpackXMM<0>(
const __m128i& v)
401 return _mm_shuffle_epi8(v, _mm_set_epi8(3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0));
405 inline __m128i UnpackXMM<1>(
const __m128i& v)
408 return _mm_shuffle_epi8(v, _mm_set_epi8(7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4));
412 inline __m128i UnpackXMM<2>(
const __m128i& v)
415 return _mm_shuffle_epi8(v, _mm_set_epi8(11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8));
419 inline __m128i UnpackXMM<3>(
const __m128i& v)
422 return _mm_shuffle_epi8(v, _mm_set_epi8(15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12));
425 template <
unsigned int IDX>
426 inline __m128i RepackXMM(
const __m128i& a,
const __m128i& b,
const __m128i& c,
const __m128i& d)
428 return UnpackXMM<IDX>(a, b, c, d);
431 template <
unsigned int IDX>
432 inline __m128i RepackXMM(
const __m128i& v)
434 return UnpackXMM<IDX>(v);
437 #endif // CRYPTOPP_SSSE3_AVAILABLE 441 #if (CRYPTOPP_POWER8_AVAILABLE) 462 template <
unsigned int R>
466 return vec_rl(val, m);
469 template <
unsigned int R>
472 const uint32x4_p m = {32-R, 32-R, 32-R, 32-R};
473 return vec_rl(val, m);
476 template <
unsigned int IDX>
477 inline uint32x4_p LoadKey(
const word32 rkey[])
479 return vec_splats(rkey[IDX]);
482 template <
unsigned int IDX>
486 CRYPTOPP_UNUSED(a); CRYPTOPP_UNUSED(b);
487 CRYPTOPP_UNUSED(c); CRYPTOPP_UNUSED(d);
524 template <
unsigned int IDX>
536 const uint8x16_p m = {3,2,1,0, 3,2,1,0, 3,2,1,0, 3,2,1,0};
544 const uint8x16_p m = {7,6,5,4, 7,6,5,4, 7,6,5,4, 7,6,5,4};
552 const uint8x16_p m = {11,10,9,8, 11,10,9,8, 11,10,9,8, 11,10,9,8};
560 const uint8x16_p m = {15,14,13,12, 15,14,13,12, 15,14,13,12, 15,14,13,12};
564 template <
unsigned int IDX>
567 return UnpackSIMD<IDX>(a, b, c, d);
570 template <
unsigned int IDX>
573 return UnpackSIMD<IDX>(v);
576 #endif // CRYPTOPP_POWER8_AVAILABLE 580 #if (CRYPTOPP_ARM_NEON_AVAILABLE || CRYPTOPP_SSSE3_AVAILABLE) 583 inline void LEA_Encryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
585 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<4>(subkeys)), Xor(temp[3], LoadKey<5>(subkeys))));
586 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<2>(subkeys)), Xor(temp[2], LoadKey<3>(subkeys))));
587 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<0>(subkeys)), Xor(temp[1], LoadKey<1>(subkeys))));
588 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<10>(subkeys)), Xor(temp[0], LoadKey<11>(subkeys))));
589 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<8>(subkeys)), Xor(temp[3], LoadKey<9>(subkeys))));
590 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<6>(subkeys)), Xor(temp[2], LoadKey<7>(subkeys))));
591 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<16>(subkeys)), Xor(temp[1], LoadKey<17>(subkeys))));
592 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<14>(subkeys)), Xor(temp[0], LoadKey<15>(subkeys))));
593 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<12>(subkeys)), Xor(temp[3], LoadKey<13>(subkeys))));
594 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<22>(subkeys)), Xor(temp[2], LoadKey<23>(subkeys))));
595 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<20>(subkeys)), Xor(temp[1], LoadKey<21>(subkeys))));
596 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<18>(subkeys)), Xor(temp[0], LoadKey<19>(subkeys))));
598 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<28>(subkeys)), Xor(temp[3], LoadKey<29>(subkeys))));
599 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<26>(subkeys)), Xor(temp[2], LoadKey<27>(subkeys))));
600 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<24>(subkeys)), Xor(temp[1], LoadKey<25>(subkeys))));
601 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<34>(subkeys)), Xor(temp[0], LoadKey<35>(subkeys))));
602 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<32>(subkeys)), Xor(temp[3], LoadKey<33>(subkeys))));
603 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<30>(subkeys)), Xor(temp[2], LoadKey<31>(subkeys))));
604 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<40>(subkeys)), Xor(temp[1], LoadKey<41>(subkeys))));
605 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<38>(subkeys)), Xor(temp[0], LoadKey<39>(subkeys))));
606 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<36>(subkeys)), Xor(temp[3], LoadKey<37>(subkeys))));
607 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<46>(subkeys)), Xor(temp[2], LoadKey<47>(subkeys))));
608 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<44>(subkeys)), Xor(temp[1], LoadKey<45>(subkeys))));
609 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<42>(subkeys)), Xor(temp[0], LoadKey<43>(subkeys))));
611 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<52>(subkeys)), Xor(temp[3], LoadKey<53>(subkeys))));
612 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<50>(subkeys)), Xor(temp[2], LoadKey<51>(subkeys))));
613 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<48>(subkeys)), Xor(temp[1], LoadKey<49>(subkeys))));
614 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<58>(subkeys)), Xor(temp[0], LoadKey<59>(subkeys))));
615 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<56>(subkeys)), Xor(temp[3], LoadKey<57>(subkeys))));
616 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<54>(subkeys)), Xor(temp[2], LoadKey<55>(subkeys))));
617 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<64>(subkeys)), Xor(temp[1], LoadKey<65>(subkeys))));
618 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<62>(subkeys)), Xor(temp[0], LoadKey<63>(subkeys))));
619 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<60>(subkeys)), Xor(temp[3], LoadKey<61>(subkeys))));
620 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<70>(subkeys)), Xor(temp[2], LoadKey<71>(subkeys))));
621 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<68>(subkeys)), Xor(temp[1], LoadKey<69>(subkeys))));
622 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<66>(subkeys)), Xor(temp[0], LoadKey<67>(subkeys))));
624 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<76>(subkeys)), Xor(temp[3], LoadKey<77>(subkeys))));
625 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<74>(subkeys)), Xor(temp[2], LoadKey<75>(subkeys))));
626 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<72>(subkeys)), Xor(temp[1], LoadKey<73>(subkeys))));
627 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<82>(subkeys)), Xor(temp[0], LoadKey<83>(subkeys))));
628 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<80>(subkeys)), Xor(temp[3], LoadKey<81>(subkeys))));
629 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<78>(subkeys)), Xor(temp[2], LoadKey<79>(subkeys))));
630 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<88>(subkeys)), Xor(temp[1], LoadKey<89>(subkeys))));
631 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<86>(subkeys)), Xor(temp[0], LoadKey<87>(subkeys))));
632 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<84>(subkeys)), Xor(temp[3], LoadKey<85>(subkeys))));
633 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<94>(subkeys)), Xor(temp[2], LoadKey<95>(subkeys))));
634 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<92>(subkeys)), Xor(temp[1], LoadKey<93>(subkeys))));
635 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<90>(subkeys)), Xor(temp[0], LoadKey<91>(subkeys))));
637 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<100>(subkeys)), Xor(temp[3], LoadKey<101>(subkeys))));
638 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<98>(subkeys)), Xor(temp[2], LoadKey<99>(subkeys))));
639 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<96>(subkeys)), Xor(temp[1], LoadKey<97>(subkeys))));
640 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<106>(subkeys)), Xor(temp[0], LoadKey<107>(subkeys))));
641 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<104>(subkeys)), Xor(temp[3], LoadKey<105>(subkeys))));
642 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<102>(subkeys)), Xor(temp[2], LoadKey<103>(subkeys))));
643 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<112>(subkeys)), Xor(temp[1], LoadKey<113>(subkeys))));
644 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<110>(subkeys)), Xor(temp[0], LoadKey<111>(subkeys))));
645 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<108>(subkeys)), Xor(temp[3], LoadKey<109>(subkeys))));
646 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<118>(subkeys)), Xor(temp[2], LoadKey<119>(subkeys))));
647 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<116>(subkeys)), Xor(temp[1], LoadKey<117>(subkeys))));
648 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<114>(subkeys)), Xor(temp[0], LoadKey<115>(subkeys))));
650 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<124>(subkeys)), Xor(temp[3], LoadKey<125>(subkeys))));
651 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<122>(subkeys)), Xor(temp[2], LoadKey<123>(subkeys))));
652 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<120>(subkeys)), Xor(temp[1], LoadKey<121>(subkeys))));
653 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<130>(subkeys)), Xor(temp[0], LoadKey<131>(subkeys))));
654 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<128>(subkeys)), Xor(temp[3], LoadKey<129>(subkeys))));
655 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<126>(subkeys)), Xor(temp[2], LoadKey<127>(subkeys))));
656 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<136>(subkeys)), Xor(temp[1], LoadKey<137>(subkeys))));
657 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<134>(subkeys)), Xor(temp[0], LoadKey<135>(subkeys))));
658 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<132>(subkeys)), Xor(temp[3], LoadKey<133>(subkeys))));
659 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<142>(subkeys)), Xor(temp[2], LoadKey<143>(subkeys))));
660 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<140>(subkeys)), Xor(temp[1], LoadKey<141>(subkeys))));
661 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<138>(subkeys)), Xor(temp[0], LoadKey<139>(subkeys))));
665 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<148>(subkeys)), Xor(temp[3], LoadKey<149>(subkeys))));
666 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<146>(subkeys)), Xor(temp[2], LoadKey<147>(subkeys))));
667 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<144>(subkeys)), Xor(temp[1], LoadKey<145>(subkeys))));
668 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<154>(subkeys)), Xor(temp[0], LoadKey<155>(subkeys))));
669 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<152>(subkeys)), Xor(temp[3], LoadKey<153>(subkeys))));
670 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<150>(subkeys)), Xor(temp[2], LoadKey<151>(subkeys))));
671 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<160>(subkeys)), Xor(temp[1], LoadKey<161>(subkeys))));
672 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<158>(subkeys)), Xor(temp[0], LoadKey<159>(subkeys))));
673 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<156>(subkeys)), Xor(temp[3], LoadKey<157>(subkeys))));
674 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<166>(subkeys)), Xor(temp[2], LoadKey<167>(subkeys))));
675 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<164>(subkeys)), Xor(temp[1], LoadKey<165>(subkeys))));
676 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<162>(subkeys)), Xor(temp[0], LoadKey<163>(subkeys))));
681 temp[3] = RotateRight<3>(Add(Xor(temp[2], LoadKey<172>(subkeys)), Xor(temp[3], LoadKey<173>(subkeys))));
682 temp[2] = RotateRight<5>(Add(Xor(temp[1], LoadKey<170>(subkeys)), Xor(temp[2], LoadKey<171>(subkeys))));
683 temp[1] = RotateLeft<9>(Add(Xor(temp[0], LoadKey<168>(subkeys)), Xor(temp[1], LoadKey<169>(subkeys))));
684 temp[0] = RotateRight<3>(Add(Xor(temp[3], LoadKey<178>(subkeys)), Xor(temp[0], LoadKey<179>(subkeys))));
685 temp[3] = RotateRight<5>(Add(Xor(temp[2], LoadKey<176>(subkeys)), Xor(temp[3], LoadKey<177>(subkeys))));
686 temp[2] = RotateLeft<9>(Add(Xor(temp[1], LoadKey<174>(subkeys)), Xor(temp[2], LoadKey<175>(subkeys))));
687 temp[1] = RotateRight<3>(Add(Xor(temp[0], LoadKey<184>(subkeys)), Xor(temp[1], LoadKey<185>(subkeys))));
688 temp[0] = RotateRight<5>(Add(Xor(temp[3], LoadKey<182>(subkeys)), Xor(temp[0], LoadKey<183>(subkeys))));
689 temp[3] = RotateLeft<9>(Add(Xor(temp[2], LoadKey<180>(subkeys)), Xor(temp[3], LoadKey<181>(subkeys))));
690 temp[2] = RotateRight<3>(Add(Xor(temp[1], LoadKey<190>(subkeys)), Xor(temp[2], LoadKey<191>(subkeys))));
691 temp[1] = RotateRight<5>(Add(Xor(temp[0], LoadKey<188>(subkeys)), Xor(temp[1], LoadKey<189>(subkeys))));
692 temp[0] = RotateLeft<9>(Add(Xor(temp[3], LoadKey<186>(subkeys)), Xor(temp[0], LoadKey<187>(subkeys))));
699 inline void LEA_Decryption(W temp[4],
const word32 *subkeys,
unsigned int rounds)
703 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<186>(subkeys))), LoadKey<187>(subkeys));
704 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<188>(subkeys))), LoadKey<189>(subkeys));
705 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<190>(subkeys))), LoadKey<191>(subkeys));
706 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<180>(subkeys))), LoadKey<181>(subkeys));
707 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<182>(subkeys))), LoadKey<183>(subkeys));
708 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<184>(subkeys))), LoadKey<185>(subkeys));
709 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<174>(subkeys))), LoadKey<175>(subkeys));
710 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<176>(subkeys))), LoadKey<177>(subkeys));
711 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<178>(subkeys))), LoadKey<179>(subkeys));
712 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<168>(subkeys))), LoadKey<169>(subkeys));
713 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<170>(subkeys))), LoadKey<171>(subkeys));
714 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<172>(subkeys))), LoadKey<173>(subkeys));
719 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<162>(subkeys))), LoadKey<163>(subkeys));
720 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<164>(subkeys))), LoadKey<165>(subkeys));
721 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<166>(subkeys))), LoadKey<167>(subkeys));
722 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<156>(subkeys))), LoadKey<157>(subkeys));
723 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<158>(subkeys))), LoadKey<159>(subkeys));
724 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<160>(subkeys))), LoadKey<161>(subkeys));
725 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<150>(subkeys))), LoadKey<151>(subkeys));
726 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<152>(subkeys))), LoadKey<153>(subkeys));
727 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<154>(subkeys))), LoadKey<155>(subkeys));
728 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<144>(subkeys))), LoadKey<145>(subkeys));
729 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<146>(subkeys))), LoadKey<147>(subkeys));
730 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<148>(subkeys))), LoadKey<149>(subkeys));
733 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<138>(subkeys))), LoadKey<139>(subkeys));
734 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<140>(subkeys))), LoadKey<141>(subkeys));
735 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<142>(subkeys))), LoadKey<143>(subkeys));
736 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<132>(subkeys))), LoadKey<133>(subkeys));
737 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<134>(subkeys))), LoadKey<135>(subkeys));
738 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<136>(subkeys))), LoadKey<137>(subkeys));
739 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<126>(subkeys))), LoadKey<127>(subkeys));
740 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<128>(subkeys))), LoadKey<129>(subkeys));
741 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<130>(subkeys))), LoadKey<131>(subkeys));
742 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<120>(subkeys))), LoadKey<121>(subkeys));
743 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<122>(subkeys))), LoadKey<123>(subkeys));
744 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<124>(subkeys))), LoadKey<125>(subkeys));
746 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<114>(subkeys))), LoadKey<115>(subkeys));
747 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<116>(subkeys))), LoadKey<117>(subkeys));
748 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<118>(subkeys))), LoadKey<119>(subkeys));
749 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<108>(subkeys))), LoadKey<109>(subkeys));
750 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<110>(subkeys))), LoadKey<111>(subkeys));
751 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<112>(subkeys))), LoadKey<113>(subkeys));
752 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<102>(subkeys))), LoadKey<103>(subkeys));
753 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<104>(subkeys))), LoadKey<105>(subkeys));
754 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<106>(subkeys))), LoadKey<107>(subkeys));
755 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<96>(subkeys))), LoadKey<97>(subkeys));
756 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<98>(subkeys))), LoadKey<99>(subkeys));
757 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<100>(subkeys))), LoadKey<101>(subkeys));
759 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<90>(subkeys))), LoadKey<91>(subkeys));
760 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<92>(subkeys))), LoadKey<93>(subkeys));
761 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<94>(subkeys))), LoadKey<95>(subkeys));
762 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<84>(subkeys))), LoadKey<85>(subkeys));
763 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<86>(subkeys))), LoadKey<87>(subkeys));
764 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<88>(subkeys))), LoadKey<89>(subkeys));
765 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<78>(subkeys))), LoadKey<79>(subkeys));
766 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<80>(subkeys))), LoadKey<81>(subkeys));
767 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<82>(subkeys))), LoadKey<83>(subkeys));
768 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<72>(subkeys))), LoadKey<73>(subkeys));
769 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<74>(subkeys))), LoadKey<75>(subkeys));
770 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<76>(subkeys))), LoadKey<77>(subkeys));
772 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<66>(subkeys))), LoadKey<67>(subkeys));
773 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<68>(subkeys))), LoadKey<69>(subkeys));
774 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<70>(subkeys))), LoadKey<71>(subkeys));
775 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<60>(subkeys))), LoadKey<61>(subkeys));
776 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<62>(subkeys))), LoadKey<63>(subkeys));
777 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<64>(subkeys))), LoadKey<65>(subkeys));
778 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<54>(subkeys))), LoadKey<55>(subkeys));
779 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<56>(subkeys))), LoadKey<57>(subkeys));
780 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<58>(subkeys))), LoadKey<59>(subkeys));
781 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<48>(subkeys))), LoadKey<49>(subkeys));
782 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<50>(subkeys))), LoadKey<51>(subkeys));
783 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<52>(subkeys))), LoadKey<53>(subkeys));
785 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<42>(subkeys))), LoadKey<43>(subkeys));
786 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<44>(subkeys))), LoadKey<45>(subkeys));
787 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<46>(subkeys))), LoadKey<47>(subkeys));
788 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<36>(subkeys))), LoadKey<37>(subkeys));
789 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<38>(subkeys))), LoadKey<39>(subkeys));
790 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<40>(subkeys))), LoadKey<41>(subkeys));
791 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<30>(subkeys))), LoadKey<31>(subkeys));
792 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<32>(subkeys))), LoadKey<33>(subkeys));
793 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<34>(subkeys))), LoadKey<35>(subkeys));
794 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<24>(subkeys))), LoadKey<25>(subkeys));
795 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<26>(subkeys))), LoadKey<27>(subkeys));
796 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<28>(subkeys))), LoadKey<29>(subkeys));
798 temp[0] = Xor(Sub(RotateRight<9>(temp[0]), Xor(temp[3], LoadKey<18>(subkeys))), LoadKey<19>(subkeys));
799 temp[1] = Xor(Sub(RotateLeft<5>(temp[1]), Xor(temp[0], LoadKey<20>(subkeys))), LoadKey<21>(subkeys));
800 temp[2] = Xor(Sub(RotateLeft<3>(temp[2]), Xor(temp[1], LoadKey<22>(subkeys))), LoadKey<23>(subkeys));
801 temp[3] = Xor(Sub(RotateRight<9>(temp[3]), Xor(temp[2], LoadKey<12>(subkeys))), LoadKey<13>(subkeys));
802 temp[0] = Xor(Sub(RotateLeft<5>(temp[0]), Xor(temp[3], LoadKey<14>(subkeys))), LoadKey<15>(subkeys));
803 temp[1] = Xor(Sub(RotateLeft<3>(temp[1]), Xor(temp[0], LoadKey<16>(subkeys))), LoadKey<17>(subkeys));
804 temp[2] = Xor(Sub(RotateRight<9>(temp[2]), Xor(temp[1], LoadKey<6>(subkeys))), LoadKey<7>(subkeys));
805 temp[3] = Xor(Sub(RotateLeft<5>(temp[3]), Xor(temp[2], LoadKey<8>(subkeys))), LoadKey<9>(subkeys));
806 temp[0] = Xor(Sub(RotateLeft<3>(temp[0]), Xor(temp[3], LoadKey<10>(subkeys))), LoadKey<11>(subkeys));
807 temp[1] = Xor(Sub(RotateRight<9>(temp[1]), Xor(temp[0], LoadKey<0>(subkeys))), LoadKey<1>(subkeys));
808 temp[2] = Xor(Sub(RotateLeft<5>(temp[2]), Xor(temp[1], LoadKey<2>(subkeys))), LoadKey<3>(subkeys));
809 temp[3] = Xor(Sub(RotateLeft<3>(temp[3]), Xor(temp[2], LoadKey<4>(subkeys))), LoadKey<5>(subkeys));
812 #endif // LEA Encryption and Decryption 816 #if (CRYPTOPP_ARM_NEON_AVAILABLE) 818 inline void LEA_Enc_Block(uint32x4_t &block0,
819 const word32 *subkeys,
unsigned int rounds)
822 temp[0] = UnpackNEON<0>(block0);
823 temp[1] = UnpackNEON<1>(block0);
824 temp[2] = UnpackNEON<2>(block0);
825 temp[3] = UnpackNEON<3>(block0);
827 LEA_Encryption(temp, subkeys, rounds);
829 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
832 inline void LEA_Dec_Block(uint32x4_t &block0,
833 const word32 *subkeys,
unsigned int rounds)
836 temp[0] = UnpackNEON<0>(block0);
837 temp[1] = UnpackNEON<1>(block0);
838 temp[2] = UnpackNEON<2>(block0);
839 temp[3] = UnpackNEON<3>(block0);
841 LEA_Decryption(temp, subkeys, rounds);
843 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
846 inline void LEA_Enc_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
847 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
850 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
851 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
852 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
853 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
855 LEA_Encryption(temp, subkeys, rounds);
857 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
858 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
859 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
860 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
863 inline void LEA_Dec_4_Blocks(uint32x4_t &block0, uint32x4_t &block1,
864 uint32x4_t &block2, uint32x4_t &block3,
const word32 *subkeys,
unsigned int rounds)
867 temp[0] = UnpackNEON<0>(block0, block1, block2, block3);
868 temp[1] = UnpackNEON<1>(block0, block1, block2, block3);
869 temp[2] = UnpackNEON<2>(block0, block1, block2, block3);
870 temp[3] = UnpackNEON<3>(block0, block1, block2, block3);
872 LEA_Decryption(temp, subkeys, rounds);
874 block0 = RepackNEON<0>(temp[0], temp[1], temp[2], temp[3]);
875 block1 = RepackNEON<1>(temp[0], temp[1], temp[2], temp[3]);
876 block2 = RepackNEON<2>(temp[0], temp[1], temp[2], temp[3]);
877 block3 = RepackNEON<3>(temp[0], temp[1], temp[2], temp[3]);
880 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 884 #if (CRYPTOPP_SSSE3_AVAILABLE) 886 inline void LEA_Enc_Block(__m128i &block0,
887 const word32 *subkeys,
unsigned int rounds)
890 temp[0] = UnpackXMM<0>(block0);
891 temp[1] = UnpackXMM<1>(block0);
892 temp[2] = UnpackXMM<2>(block0);
893 temp[3] = UnpackXMM<3>(block0);
895 LEA_Encryption(temp, subkeys, rounds);
897 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
900 inline void LEA_Dec_Block(__m128i &block0,
901 const word32 *subkeys,
unsigned int rounds)
904 temp[0] = UnpackXMM<0>(block0);
905 temp[1] = UnpackXMM<1>(block0);
906 temp[2] = UnpackXMM<2>(block0);
907 temp[3] = UnpackXMM<3>(block0);
909 LEA_Decryption(temp, subkeys, rounds);
911 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
914 inline void LEA_Enc_4_Blocks(__m128i &block0, __m128i &block1,
915 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
918 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
919 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
920 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
921 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
923 LEA_Encryption(temp, subkeys, rounds);
925 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
926 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
927 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
928 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
931 inline void LEA_Dec_4_Blocks(__m128i &block0, __m128i &block1,
932 __m128i &block2, __m128i &block3,
const word32 *subkeys,
unsigned int rounds)
935 temp[0] = UnpackXMM<0>(block0, block1, block2, block3);
936 temp[1] = UnpackXMM<1>(block0, block1, block2, block3);
937 temp[2] = UnpackXMM<2>(block0, block1, block2, block3);
938 temp[3] = UnpackXMM<3>(block0, block1, block2, block3);
940 LEA_Decryption(temp, subkeys, rounds);
942 block0 = RepackXMM<0>(temp[0], temp[1], temp[2], temp[3]);
943 block1 = RepackXMM<1>(temp[0], temp[1], temp[2], temp[3]);
944 block2 = RepackXMM<2>(temp[0], temp[1], temp[2], temp[3]);
945 block3 = RepackXMM<3>(temp[0], temp[1], temp[2], temp[3]);
948 #endif // CRYPTOPP_SSSE3_AVAILABLE 952 #if (CRYPTOPP_POWER8_AVAILABLE) 955 const word32 *subkeys,
unsigned int rounds)
958 temp[0] = UnpackSIMD<0>(block0);
959 temp[1] = UnpackSIMD<1>(block0);
960 temp[2] = UnpackSIMD<2>(block0);
961 temp[3] = UnpackSIMD<3>(block0);
963 LEA_Encryption(temp, subkeys, rounds);
965 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
969 const word32 *subkeys,
unsigned int rounds)
972 temp[0] = UnpackSIMD<0>(block0);
973 temp[1] = UnpackSIMD<1>(block0);
974 temp[2] = UnpackSIMD<2>(block0);
975 temp[3] = UnpackSIMD<3>(block0);
977 LEA_Decryption(temp, subkeys, rounds);
979 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
986 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
987 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
988 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
989 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
991 LEA_Encryption(temp, subkeys, rounds);
993 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
994 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
995 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
996 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1003 temp[0] = UnpackSIMD<0>(block0, block1, block2, block3);
1004 temp[1] = UnpackSIMD<1>(block0, block1, block2, block3);
1005 temp[2] = UnpackSIMD<2>(block0, block1, block2, block3);
1006 temp[3] = UnpackSIMD<3>(block0, block1, block2, block3);
1008 LEA_Decryption(temp, subkeys, rounds);
1010 block0 = RepackSIMD<0>(temp[0], temp[1], temp[2], temp[3]);
1011 block1 = RepackSIMD<1>(temp[0], temp[1], temp[2], temp[3]);
1012 block2 = RepackSIMD<2>(temp[0], temp[1], temp[2], temp[3]);
1013 block3 = RepackSIMD<3>(temp[0], temp[1], temp[2], temp[3]);
1016 #endif // CRYPTOPP_POWER8_AVAILABLE 1018 ANONYMOUS_NAMESPACE_END
1024 #if defined(CRYPTOPP_SSSE3_AVAILABLE) 1025 size_t LEA_Enc_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1026 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1029 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1032 size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(
const word32* subKeys,
size_t rounds,
1033 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1036 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1038 #endif // CRYPTOPP_SSSE3_AVAILABLE 1040 #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) 1041 size_t LEA_Enc_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1042 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1045 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1048 size_t LEA_Dec_AdvancedProcessBlocks_NEON(
const word32* subKeys,
size_t rounds,
1049 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1052 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1054 #endif // CRYPTOPP_ARM_NEON_AVAILABLE 1056 #if defined(CRYPTOPP_POWER8_AVAILABLE) 1057 size_t LEA_Enc_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1058 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1061 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1064 size_t LEA_Dec_AdvancedProcessBlocks_POWER8(
const word32* subKeys,
size_t rounds,
1065 const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks,
size_t length, word32 flags)
1068 subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
1070 #endif // CRYPTOPP_POWER8_AVAILABLE Utility functions for the Crypto++ library.
Classes for the LEA block cipher.
T1 VecSub(const T1 vec1, const T2 vec2)
Subtract two vectors.
Library configuration file.
T1 VecAdd(const T1 vec1, const T2 vec2)
Add two vectors.
T1 VecPermute(const T1 vec, const T2 mask)
Permutes a vector.
__vector unsigned int uint32x4_p
Vector of 32-bit elements.
Support functions for PowerPC and vector operations.
Template for AdvancedProcessBlocks and SIMD processing.
size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4, const W *subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
AdvancedProcessBlocks for 1 and 4 blocks.
__vector unsigned long long uint64x2_p
Vector of 64-bit elements.
Crypto++ library namespace.
__vector unsigned char uint8x16_p
Vector of 8-bit elements.