aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2017-05-14 19:52:19 +1200
committerDavid Phillips <david@sighup.nz>2017-05-14 19:52:19 +1200
commit9813b147580d9f42e2484d27ecaa3ab244841ee8 (patch)
tree56a94fcd0f12aa878e0af29fc9600270dd85f759
parent70652fa53c364d1bf4e7a5eeac7e9e1a5a69a89e (diff)
downloadsand-leek-9813b147580d9f42e2484d27ecaa3ab244841ee8.tar.xz
Add experimental AVX base32 algo, modularise base32 out
-rw-r--r--.gitignore1
-rw-r--r--Makefile6
-rw-r--r--onion_base32.c102
-rw-r--r--onion_base32.h6
-rw-r--r--sand-leek.c29
5 files changed, 122 insertions, 22 deletions
diff --git a/.gitignore b/.gitignore
index 93438ac..64123bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
sand-leek
+*.o
diff --git a/Makefile b/Makefile
index aa3b243..2998c81 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,12 @@
-CFLAGS += -Wall -Wextra -O2 -finline-functions
+CFLAGS += -Wall -Wextra -O2
LDFLAGS += -lssl -lcrypto -lpthread
all: sand-leek
+sand-leek: sand-leek.o onion_base32.o
+
clean:
- rm -vf sand-leek
+ rm -vf sand-leek *.o
.PHONY: all clean
diff --git a/onion_base32.c b/onion_base32.c
new file mode 100644
index 0000000..d70facf
--- /dev/null
+++ b/onion_base32.c
@@ -0,0 +1,102 @@
+#include <stdlib.h>
+#include <string.h>
+
+static const char base32_lookup[] = "abcdefghijklmnopqrstuvwxyz234567";
+
+int
+check_base32(char *subject) {
+ size_t offset = 0;
+
+ if ((offset = strspn(subject, base32_lookup)) != strlen(subject)) {
+ return offset;
+ }
+ return 0;
+}
+
+/* Simple and reliable base32 algorithm - "old trusty"
+ * Note: This is not a general base32 algorithm; it outputs only the
+ * first 16 base32 symbols of the input buffer, using only the first
+ * 20 bytes of that buffer.
+ */
+void
+onion_base32(char output[16], unsigned char sum[20]) {
+ size_t c = 0;
+ int i = 0;
+
+ for (i = 0; i < 10; i+=5) {
+ output[c++] = base32_lookup[sum[i] >> 3];
+ output[c++] = base32_lookup[((sum[i] & 0x07) << 2) | (sum[i+1] >> 6)];
+ output[c++] = base32_lookup[(sum[i+1] >> 1) & 0x1F];
+ output[c++] = base32_lookup[((sum[i+1] & 1) << 4) | (sum[i+2] >> 4)];
+ output[c++] = base32_lookup[((sum[i+2] & 0x0F) << 1) | ((sum[i+3] & 0x80) >> 7)];
+ output[c++] = base32_lookup[(sum[i+3] >> 2) & 0x1F];
+ output[c++] = base32_lookup[((sum[i+3] & 0x03) << 3) | (sum[i+4] >> 5)];
+ output[c++] = base32_lookup[sum[i+4] & 0x1F];
+ }
+}
+
+#ifdef AVX_ONION_BASE32
+#include <immintrin.h>
+
+/* A slightly-parallel base32 algorithm using AVX
+ * Note: This is not a general base32 algorithm; it outputs only the
+ * first 16 base32 symbols of the input buffer, using only the first
+ * 20 bytes of that buffer.
+ *
+ * Somewhat inspired by http://www.alfredklomp.com/programming/sse-base64/
+ */
+void
+onion_base32_avx(char output[16], unsigned char sum[20]) {
+ __m128i res;
+ __m128i ssum;
+ __m128i masklow5;
+ __m128i lmask, l;
+ __m128i nmask, n;
+
+ ssum = _mm_loadu_si128((__m128i*)sum);
+
+ /* FIXME seems a little hacky */
+ masklow5 = _mm_set1_epi32(0x1F000000);
+ masklow5 = _mm_slli_epi64(masklow5, 32);
+
+ ssum = _mm_shuffle_epi8(ssum,
+ _mm_setr_epi8(9,9,9,9,8,7,6,5,4,4,4,4,3,2,1,0 ));
+
+ /* remember how I said "slightly parallel" ? */
+ res = _mm_srli_epi64(ssum, 3) & masklow5;
+ masklow5 = _mm_srli_epi64(masklow5, 8);
+
+ res |= _mm_srli_epi64(ssum, 6) & masklow5;
+ masklow5 = _mm_srli_epi64(masklow5, 8);
+
+ res |= _mm_srli_epi64(ssum, 9) & masklow5;
+ masklow5 = _mm_srli_epi64(masklow5, 8);
+
+ res |= _mm_srli_epi64(ssum, 12) & masklow5;
+ masklow5 = _mm_srli_epi64(masklow5, 8);
+
+ res |= _mm_srli_epi64(ssum, 15) & masklow5;
+ masklow5 = _mm_srli_epi64(masklow5, 8);
+
+ res |= _mm_srli_epi64(ssum, 18) & masklow5;
+ masklow5 = _mm_srli_epi64(masklow5, 8);
+
+ res |= _mm_srli_epi64(ssum, 21) & masklow5;
+ masklow5 = _mm_srli_epi64(masklow5, 8);
+
+ res |= _mm_srli_epi64(ssum, 24) & masklow5;
+ masklow5 = _mm_srli_epi64(masklow5, 8);
+
+
+ res = _mm_shuffle_epi8(res,
+ _mm_setr_epi8(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0));
+
+ lmask = _mm_cmplt_epi8(res, _mm_set1_epi8(26));
+ nmask = _mm_andnot_si128(lmask, _mm_cmplt_epi8(res, _mm_set1_epi8(32)));
+
+ l = lmask & _mm_add_epi8(res, _mm_set1_epi8('a'));
+ n = nmask & _mm_add_epi8(res, _mm_set1_epi8('2' - 26));
+
+ _mm_storeu_si128((__m128i*)output, l|n);
+}
+#endif /* ifdef AVX_ONION_BASE32 */
diff --git a/onion_base32.h b/onion_base32.h
new file mode 100644
index 0000000..8f88ebb
--- /dev/null
+++ b/onion_base32.h
@@ -0,0 +1,6 @@
+int check_base32(char *);
+void onion_base32(char [16], unsigned char (*));
+
+#ifdef AVX_ONION_BASE32
+void onion_base32_avx(char [16], unsigned char (*));
+#endif
diff --git a/sand-leek.c b/sand-leek.c
index 285dda4..561574e 100644
--- a/sand-leek.c
+++ b/sand-leek.c
@@ -13,34 +13,18 @@
#include <openssl/pem.h>
#include <openssl/err.h>
+#include "onion_base32.h"
+
#define EXPONENT_SIZE_BYTES 4
#define EXPONENT_MIN 0x1FFFFFFF
#define EXPONENT_MAX 0xFFFFFFFF
#define RSA_KEY_BITS 1024
-static const char base32_lookup[] = "abcdefghijklmnopqrstuvwxyz234567";
static char *search;
static size_t search_len;
sem_t working;
-void
-onion_sha(char output[16], unsigned char sum[20]) {
- size_t c = 0;
- int i = 0;
-
- for (i = 0; i < 10; i+=5) {
- output[c++] = base32_lookup[sum[i] >> 3];
- output[c++] = base32_lookup[((sum[i] & 0x07) << 2) | (sum[i+1] >> 6)];
- output[c++] = base32_lookup[(sum[i+1] >> 1) & 0x1F];
- output[c++] = base32_lookup[((sum[i+1] & 1) << 4) | (sum[i+2] >> 4)];
- output[c++] = base32_lookup[((sum[i+2] & 0x0F) << 1) | ((sum[i+3] & 0x80) >> 7)];
- output[c++] = base32_lookup[(sum[i+3] >> 2) & 0x1F];
- output[c++] = base32_lookup[((sum[i+3] & 0x03) << 3) | (sum[i+4] >> 5)];
- output[c++] = base32_lookup[sum[i+4] & 0x1F];
- }
-}
-
/* re-calculate the decryption key `d` for the given key
* the product of e and d must be congruent to 1, and since we are messing
* with e to generate our keys, we must re-calculate d */
@@ -168,7 +152,12 @@ work(void *arg) {
SHA1_Update(&working_sha_c, &e_big_endian, EXPONENT_SIZE_BYTES);
SHA1_Final((unsigned char*)&sha, &working_sha_c);
- onion_sha(onion, sha);
+#ifdef AVX_ONION_BASE32
+ onion_base32_avx(onion, sha);
+#else
+ onion_base32(onion, sha);
+#endif
+
onion[16] = '\0';
if (hashes++ >= 1000) {
@@ -288,7 +277,7 @@ main(int argc, char **argv) {
search_len = strlen(search);
- if ((offset = strspn(search, base32_lookup)) != search_len) {
+ if ((offset = check_base32(search))) {
fprintf(stderr,
"Error: search contains non-base-32 character(s): %c\n"
"I cannot search for something that will never occur\n",