diff options
| author | David Phillips <david@sighup.nz> | 2017-05-14 19:52:19 +1200 | 
|---|---|---|
| committer | David Phillips <david@sighup.nz> | 2017-05-14 19:52:19 +1200 | 
| commit | 9813b147580d9f42e2484d27ecaa3ab244841ee8 (patch) | |
| tree | 56a94fcd0f12aa878e0af29fc9600270dd85f759 | |
| parent | 70652fa53c364d1bf4e7a5eeac7e9e1a5a69a89e (diff) | |
| download | sand-leek-9813b147580d9f42e2484d27ecaa3ab244841ee8.tar.xz | |
Add experimental AVX base32 algo, modularise base32 out
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | onion_base32.c | 102 | ||||
| -rw-r--r-- | onion_base32.h | 6 | ||||
| -rw-r--r-- | sand-leek.c | 29 | 
5 files changed, 122 insertions, 22 deletions
| @@ -1 +1,2 @@  sand-leek +*.o @@ -1,10 +1,12 @@ -CFLAGS += -Wall -Wextra -O2 -finline-functions +CFLAGS += -Wall -Wextra -O2  LDFLAGS += -lssl -lcrypto -lpthread  all: sand-leek +sand-leek: sand-leek.o onion_base32.o +  clean: -	rm -vf sand-leek +	rm -vf sand-leek *.o  .PHONY: all clean diff --git a/onion_base32.c b/onion_base32.c new file mode 100644 index 0000000..d70facf --- /dev/null +++ b/onion_base32.c @@ -0,0 +1,102 @@ +#include <stdlib.h> +#include <string.h> + +static const char base32_lookup[] = "abcdefghijklmnopqrstuvwxyz234567"; + +int +check_base32(char *subject) { +	size_t offset = 0; + +	if ((offset = strspn(subject, base32_lookup)) != strlen(subject)) { +		return offset; +	} +	return 0; +} + +/* Simple and reliable base32 algorithm - "old trusty" + * Note: This is not a general base32 algorithm; it outputs only the + * first 16 base32 symbols of the input buffer, using only the first + * 20 bytes of that buffer. + */ +void +onion_base32(char output[16], unsigned char sum[20]) { +	size_t c = 0; +	int i = 0; + +	for (i = 0; i < 10; i+=5) { +		output[c++] = base32_lookup[sum[i] >> 3]; +		output[c++] = base32_lookup[((sum[i] & 0x07) << 2) | (sum[i+1] >> 6)]; +		output[c++] = base32_lookup[(sum[i+1] >> 1) & 0x1F]; +		output[c++] = base32_lookup[((sum[i+1] & 1) << 4) | (sum[i+2] >> 4)]; +		output[c++] = base32_lookup[((sum[i+2] & 0x0F) << 1) | ((sum[i+3] & 0x80) >> 7)]; +		output[c++] = base32_lookup[(sum[i+3] >> 2) & 0x1F]; +		output[c++] = base32_lookup[((sum[i+3] & 0x03) << 3) | (sum[i+4] >> 5)]; +		output[c++] = base32_lookup[sum[i+4] & 0x1F]; +	} +} + +#ifdef AVX_ONION_BASE32 +#include <immintrin.h> + +/* A slightly-parallel base32 algorithm using AVX + * Note: This is not a general base32 algorithm; it outputs only the + * first 16 base32 symbols of the input buffer, using only the first + * 20 bytes of that buffer. + * + * Somewhat inspired by http://www.alfredklomp.com/programming/sse-base64/ + */ +void +onion_base32_avx(char output[16], unsigned char sum[20]) { +	__m128i res; +	__m128i ssum; +	__m128i masklow5; +	__m128i lmask, l; +	__m128i nmask, n; + +	ssum = _mm_loadu_si128((__m128i*)sum); + +	/* FIXME seems a little hacky */ +	masklow5 = _mm_set1_epi32(0x1F000000); +	masklow5 = _mm_slli_epi64(masklow5, 32); + +	ssum = _mm_shuffle_epi8(ssum, +		_mm_setr_epi8(9,9,9,9,8,7,6,5,4,4,4,4,3,2,1,0 )); + +	/* remember how I said "slightly parallel" ? */ +	res = _mm_srli_epi64(ssum, 3) & masklow5; +	masklow5 = _mm_srli_epi64(masklow5, 8); + +	res |= _mm_srli_epi64(ssum, 6) & masklow5; +	masklow5 = _mm_srli_epi64(masklow5, 8); + +	res |= _mm_srli_epi64(ssum, 9) & masklow5; +	masklow5 = _mm_srli_epi64(masklow5, 8); + +	res |= _mm_srli_epi64(ssum, 12) & masklow5; +	masklow5 = _mm_srli_epi64(masklow5, 8); + +	res |= _mm_srli_epi64(ssum, 15) & masklow5; +	masklow5 = _mm_srli_epi64(masklow5, 8); + +	res |= _mm_srli_epi64(ssum, 18) & masklow5; +	masklow5 = _mm_srli_epi64(masklow5, 8); + +	res |= _mm_srli_epi64(ssum, 21) & masklow5; +	masklow5 = _mm_srli_epi64(masklow5, 8); + +	res |= _mm_srli_epi64(ssum, 24) & masklow5; +	masklow5 = _mm_srli_epi64(masklow5, 8); + + +	res = _mm_shuffle_epi8(res, +		_mm_setr_epi8(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0)); + +	lmask = _mm_cmplt_epi8(res, _mm_set1_epi8(26)); +	nmask = _mm_andnot_si128(lmask, _mm_cmplt_epi8(res, _mm_set1_epi8(32))); + +	l = lmask & _mm_add_epi8(res, _mm_set1_epi8('a')); +	n = nmask & _mm_add_epi8(res, _mm_set1_epi8('2' - 26)); + +	_mm_storeu_si128((__m128i*)output, l|n); +} +#endif /* ifdef AVX_ONION_BASE32 */ diff --git a/onion_base32.h b/onion_base32.h new file mode 100644 index 0000000..8f88ebb --- /dev/null +++ b/onion_base32.h @@ -0,0 +1,6 @@ +int check_base32(char *); +void onion_base32(char [16], unsigned char (*)); + +#ifdef AVX_ONION_BASE32 +void onion_base32_avx(char [16], unsigned char (*)); +#endif diff --git a/sand-leek.c b/sand-leek.c index 285dda4..561574e 100644 --- a/sand-leek.c +++ b/sand-leek.c @@ -13,34 +13,18 @@  #include <openssl/pem.h>  #include <openssl/err.h> +#include "onion_base32.h" +  #define EXPONENT_SIZE_BYTES   4  #define EXPONENT_MIN          0x1FFFFFFF  #define EXPONENT_MAX          0xFFFFFFFF  #define RSA_KEY_BITS          1024 -static const char base32_lookup[] = "abcdefghijklmnopqrstuvwxyz234567";  static char *search;  static size_t search_len;  sem_t working; -void -onion_sha(char output[16], unsigned char sum[20]) { -	size_t c = 0; -	int i = 0; - -	for (i = 0; i < 10; i+=5) { -		output[c++] = base32_lookup[sum[i] >> 3]; -		output[c++] = base32_lookup[((sum[i] & 0x07) << 2) | (sum[i+1] >> 6)]; -		output[c++] = base32_lookup[(sum[i+1] >> 1) & 0x1F]; -		output[c++] = base32_lookup[((sum[i+1] & 1) << 4) | (sum[i+2] >> 4)]; -		output[c++] = base32_lookup[((sum[i+2] & 0x0F) << 1) | ((sum[i+3] & 0x80) >> 7)]; -		output[c++] = base32_lookup[(sum[i+3] >> 2) & 0x1F]; -		output[c++] = base32_lookup[((sum[i+3] & 0x03) << 3) | (sum[i+4] >> 5)]; -		output[c++] = base32_lookup[sum[i+4] & 0x1F]; -	} -} -  /* re-calculate the decryption key `d` for the given key   * the product of e and d must be congruent to 1, and since we are messing   * with e to generate our keys, we must re-calculate d */ @@ -168,7 +152,12 @@ work(void *arg) {  			SHA1_Update(&working_sha_c, &e_big_endian, EXPONENT_SIZE_BYTES);  			SHA1_Final((unsigned char*)&sha, &working_sha_c); -			onion_sha(onion, sha); +#ifdef AVX_ONION_BASE32 +			onion_base32_avx(onion, sha); +#else +			onion_base32(onion, sha); +#endif +  			onion[16] = '\0';  			if (hashes++ >= 1000) { @@ -288,7 +277,7 @@ main(int argc, char **argv) {  	search_len = strlen(search); -	if ((offset = strspn(search, base32_lookup)) != search_len) { +	if ((offset = check_base32(search))) {  		fprintf(stderr,  			"Error: search contains non-base-32 character(s): %c\n"  			"I cannot search for something that will never occur\n", | 
