From 9813b147580d9f42e2484d27ecaa3ab244841ee8 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Sun, 14 May 2017 19:52:19 +1200 Subject: Add experimental AVX base32 algo, modularise base32 out --- .gitignore | 1 + Makefile | 6 ++-- onion_base32.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ onion_base32.h | 6 ++++ sand-leek.c | 29 +++++----------- 5 files changed, 122 insertions(+), 22 deletions(-) create mode 100644 onion_base32.c create mode 100644 onion_base32.h diff --git a/.gitignore b/.gitignore index 93438ac..64123bc 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ sand-leek +*.o diff --git a/Makefile b/Makefile index aa3b243..2998c81 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,12 @@ -CFLAGS += -Wall -Wextra -O2 -finline-functions +CFLAGS += -Wall -Wextra -O2 LDFLAGS += -lssl -lcrypto -lpthread all: sand-leek +sand-leek: sand-leek.o onion_base32.o + clean: - rm -vf sand-leek + rm -vf sand-leek *.o .PHONY: all clean diff --git a/onion_base32.c b/onion_base32.c new file mode 100644 index 0000000..d70facf --- /dev/null +++ b/onion_base32.c @@ -0,0 +1,102 @@ +#include +#include + +static const char base32_lookup[] = "abcdefghijklmnopqrstuvwxyz234567"; + +int +check_base32(char *subject) { + size_t offset = 0; + + if ((offset = strspn(subject, base32_lookup)) != strlen(subject)) { + return offset; + } + return 0; +} + +/* Simple and reliable base32 algorithm - "old trusty" + * Note: This is not a general base32 algorithm; it outputs only the + * first 16 base32 symbols of the input buffer, using only the first + * 20 bytes of that buffer. + */ +void +onion_base32(char output[16], unsigned char sum[20]) { + size_t c = 0; + int i = 0; + + for (i = 0; i < 10; i+=5) { + output[c++] = base32_lookup[sum[i] >> 3]; + output[c++] = base32_lookup[((sum[i] & 0x07) << 2) | (sum[i+1] >> 6)]; + output[c++] = base32_lookup[(sum[i+1] >> 1) & 0x1F]; + output[c++] = base32_lookup[((sum[i+1] & 1) << 4) | (sum[i+2] >> 4)]; + output[c++] = base32_lookup[((sum[i+2] & 0x0F) << 1) | ((sum[i+3] & 0x80) >> 7)]; + output[c++] = base32_lookup[(sum[i+3] >> 2) & 0x1F]; + output[c++] = base32_lookup[((sum[i+3] & 0x03) << 3) | (sum[i+4] >> 5)]; + output[c++] = base32_lookup[sum[i+4] & 0x1F]; + } +} + +#ifdef AVX_ONION_BASE32 +#include + +/* A slightly-parallel base32 algorithm using AVX + * Note: This is not a general base32 algorithm; it outputs only the + * first 16 base32 symbols of the input buffer, using only the first + * 20 bytes of that buffer. + * + * Somewhat inspired by http://www.alfredklomp.com/programming/sse-base64/ + */ +void +onion_base32_avx(char output[16], unsigned char sum[20]) { + __m128i res; + __m128i ssum; + __m128i masklow5; + __m128i lmask, l; + __m128i nmask, n; + + ssum = _mm_loadu_si128((__m128i*)sum); + + /* FIXME seems a little hacky */ + masklow5 = _mm_set1_epi32(0x1F000000); + masklow5 = _mm_slli_epi64(masklow5, 32); + + ssum = _mm_shuffle_epi8(ssum, + _mm_setr_epi8(9,9,9,9,8,7,6,5,4,4,4,4,3,2,1,0 )); + + /* remember how I said "slightly parallel" ? */ + res = _mm_srli_epi64(ssum, 3) & masklow5; + masklow5 = _mm_srli_epi64(masklow5, 8); + + res |= _mm_srli_epi64(ssum, 6) & masklow5; + masklow5 = _mm_srli_epi64(masklow5, 8); + + res |= _mm_srli_epi64(ssum, 9) & masklow5; + masklow5 = _mm_srli_epi64(masklow5, 8); + + res |= _mm_srli_epi64(ssum, 12) & masklow5; + masklow5 = _mm_srli_epi64(masklow5, 8); + + res |= _mm_srli_epi64(ssum, 15) & masklow5; + masklow5 = _mm_srli_epi64(masklow5, 8); + + res |= _mm_srli_epi64(ssum, 18) & masklow5; + masklow5 = _mm_srli_epi64(masklow5, 8); + + res |= _mm_srli_epi64(ssum, 21) & masklow5; + masklow5 = _mm_srli_epi64(masklow5, 8); + + res |= _mm_srli_epi64(ssum, 24) & masklow5; + masklow5 = _mm_srli_epi64(masklow5, 8); + + + res = _mm_shuffle_epi8(res, + _mm_setr_epi8(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0)); + + lmask = _mm_cmplt_epi8(res, _mm_set1_epi8(26)); + nmask = _mm_andnot_si128(lmask, _mm_cmplt_epi8(res, _mm_set1_epi8(32))); + + l = lmask & _mm_add_epi8(res, _mm_set1_epi8('a')); + n = nmask & _mm_add_epi8(res, _mm_set1_epi8('2' - 26)); + + _mm_storeu_si128((__m128i*)output, l|n); +} +#endif /* ifdef AVX_ONION_BASE32 */ diff --git a/onion_base32.h b/onion_base32.h new file mode 100644 index 0000000..8f88ebb --- /dev/null +++ b/onion_base32.h @@ -0,0 +1,6 @@ +int check_base32(char *); +void onion_base32(char [16], unsigned char (*)); + +#ifdef AVX_ONION_BASE32 +void onion_base32_avx(char [16], unsigned char (*)); +#endif diff --git a/sand-leek.c b/sand-leek.c index 285dda4..561574e 100644 --- a/sand-leek.c +++ b/sand-leek.c @@ -13,34 +13,18 @@ #include #include +#include "onion_base32.h" + #define EXPONENT_SIZE_BYTES 4 #define EXPONENT_MIN 0x1FFFFFFF #define EXPONENT_MAX 0xFFFFFFFF #define RSA_KEY_BITS 1024 -static const char base32_lookup[] = "abcdefghijklmnopqrstuvwxyz234567"; static char *search; static size_t search_len; sem_t working; -void -onion_sha(char output[16], unsigned char sum[20]) { - size_t c = 0; - int i = 0; - - for (i = 0; i < 10; i+=5) { - output[c++] = base32_lookup[sum[i] >> 3]; - output[c++] = base32_lookup[((sum[i] & 0x07) << 2) | (sum[i+1] >> 6)]; - output[c++] = base32_lookup[(sum[i+1] >> 1) & 0x1F]; - output[c++] = base32_lookup[((sum[i+1] & 1) << 4) | (sum[i+2] >> 4)]; - output[c++] = base32_lookup[((sum[i+2] & 0x0F) << 1) | ((sum[i+3] & 0x80) >> 7)]; - output[c++] = base32_lookup[(sum[i+3] >> 2) & 0x1F]; - output[c++] = base32_lookup[((sum[i+3] & 0x03) << 3) | (sum[i+4] >> 5)]; - output[c++] = base32_lookup[sum[i+4] & 0x1F]; - } -} - /* re-calculate the decryption key `d` for the given key * the product of e and d must be congruent to 1, and since we are messing * with e to generate our keys, we must re-calculate d */ @@ -168,7 +152,12 @@ work(void *arg) { SHA1_Update(&working_sha_c, &e_big_endian, EXPONENT_SIZE_BYTES); SHA1_Final((unsigned char*)&sha, &working_sha_c); - onion_sha(onion, sha); +#ifdef AVX_ONION_BASE32 + onion_base32_avx(onion, sha); +#else + onion_base32(onion, sha); +#endif + onion[16] = '\0'; if (hashes++ >= 1000) { @@ -288,7 +277,7 @@ main(int argc, char **argv) { search_len = strlen(search); - if ((offset = strspn(search, base32_lookup)) != search_len) { + if ((offset = check_base32(search))) { fprintf(stderr, "Error: search contains non-base-32 character(s): %c\n" "I cannot search for something that will never occur\n", -- cgit v1.1