11 #include <botan/sha1_sse2.h>    12 #include <botan/rotate.h>    13 #include <emmintrin.h>    17 namespace SHA1_SSE2_F {
    26 #define prep00_15(P, W)                                      \    28       W = _mm_shufflehi_epi16(W, _MM_SHUFFLE(2, 3, 0, 1));   \    29       W = _mm_shufflelo_epi16(W, _MM_SHUFFLE(2, 3, 0, 1));   \    30       W = _mm_or_si128(_mm_slli_epi16(W, 8),                 \    31                        _mm_srli_epi16(W, 8));                \    32       P.u128 = _mm_add_epi32(W, K00_19);                     \    80 #define prep(prep, XW0, XW1, XW2, XW3, K)                               \    82       __m128i r0, r1, r2, r3;                                           \    85       r3 = _mm_srli_si128((XW3), 4);                                    \    88       r1 = _mm_shuffle_epi32((XW0), _MM_SHUFFLE(1,0,3,2));              \    90       r1 = _mm_unpacklo_epi64(r1, (XW1));                               \    93       r0 = _mm_xor_si128(r1, r0);                                       \    94       r2 = _mm_xor_si128(r3, r2);                                       \    95       r0 = _mm_xor_si128(r2, r0);                                       \    98       r2 = _mm_slli_si128(r0, 12);                                      \    99       r1 = _mm_cmplt_epi32(r0, _mm_setzero_si128());                    \   100       r0 = _mm_add_epi32(r0, r0);                  \   101       r0 = _mm_sub_epi32(r0, r1);              \   103       r3 = _mm_srli_epi32(r2, 30);                                      \   104       r2 = _mm_slli_epi32(r2, 2);                                       \   106       r0 = _mm_xor_si128(r0, r3);                                       \   107       r0 = _mm_xor_si128(r0, r2);                \   110       (prep).u128 = _mm_add_epi32(r0, K);                               \   118    E += (D ^ (B & (C ^ D))) + msg + 
rotate_left(A, 5);
   136    E += ((B & C) | ((B | C) & D)) + msg + 
rotate_left(A, 5);
   156 void SHA_160_SSE2::compress_n(
const byte input_bytes[], 
size_t blocks)
   158    using namespace SHA1_SSE2_F;
   160    const __m128i K00_19 = _mm_set1_epi32(0x5A827999);
   161    const __m128i K20_39 = _mm_set1_epi32(0x6ED9EBA1);
   162    const __m128i K40_59 = _mm_set1_epi32(0x8F1BBCDC);
   163    const __m128i K60_79 = _mm_set1_epi32(0xCA62C1D6);
   171    const __m128i* input = 
reinterpret_cast<const __m128i*
>(input_bytes);
   173    for(
size_t i = 0; i != blocks; ++i)
   182       __m128i W0 = _mm_loadu_si128(&input[0]);
   185       __m128i W1 = _mm_loadu_si128(&input[1]);
   188       __m128i W2 = _mm_loadu_si128(&input[2]);
   191       __m128i W3 = _mm_loadu_si128(&input[3]);
   202 #define GET_P_32(P, i) P.u32[i]   208       prep(P0, W0, W1, W2, W3, K00_19);
   214       prep(P1, W1, W2, W3, W0, K20_39);
   220       prep(P2, W2, W3, W0, W1, K20_39);
   226       prep(P3, W3, W0, W1, W2, K20_39);
   232       prep(P0, W0, W1, W2, W3, K20_39);
   238       prep(P1, W1, W2, W3, W0, K20_39);
   244       prep(P2, W2, W3, W0, W1, K40_59);
   250       prep(P3, W3, W0, W1, W2, K40_59);
   256       prep(P0, W0, W1, W2, W3, K40_59);
   262       prep(P1, W1, W2, W3, W0, K40_59);
   268       prep(P2, W2, W3, W0, W1, K40_59);
   274       prep(P3, W3, W0, W1, W2, K60_79);
   280       prep(P0, W0, W1, W2, W3, K60_79);
   286       prep(P1, W1, W2, W3, W0, K60_79);
   292       prep(P2, W2, W3, W0, W1, K60_79);
   298       prep(P3, W3, W0, W1, W2, K60_79);
   320       A = (digest[0] += A);
   321       B = (digest[1] += B);
   322       C = (digest[2] += C);
   323       D = (digest[3] += D);
   324       E = (digest[4] += E);
   326       input += (hash_block_size() / 16);
 
T rotate_left(T input, size_t rot)
void F2(u32bit A, u32bit &B, u32bit C, u32bit D, u32bit &E, u32bit msg, u32bit rot)
void F3(u32bit A, u32bit &B, u32bit C, u32bit D, u32bit &E, u32bit msg, u32bit rot)
#define prep(prep, XW0, XW1, XW2, XW3, K)
void F4(u32bit A, u32bit &B, u32bit C, u32bit D, u32bit &E, u32bit msg, u32bit rot)
void F1(u32bit A, u32bit &B, u32bit C, u32bit D, u32bit &E, u32bit msg, u32bit rot)