aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2018-04-24 16:57:48 +1200
committerDavid Phillips <david@sighup.nz>2018-04-24 17:11:30 +1200
commit0755368f42c8f233c67528e398352a9de2bb33bf (patch)
treea7773108812640dafaa20ae7ad8ea5e084d9356c
parent2fec05364409258a28a77ede1552f1cdc3eabd24 (diff)
downloadsand-leek-0755368f42c8f233c67528e398352a9de2bb33bf.tar.xz
Improve host-side checks, fix MSB bug, misc tidying
Host-side check now uses from-scratch libssl SHA to confirm GPU work. Bug when most significant bit of the exponent is set turns out to be because of libssl's storage of such exponents requiring 5 bytes. This cannot be dynamically adjusted without adjusting the pre-calculated value which is hardcoded into the CL kernel for performance reasons (INITIAL_DATA_LEN)
-rw-r--r--sand-leek-cl.c126
-rw-r--r--slurp.c1
-rw-r--r--trampoline.c2
3 files changed, 68 insertions, 61 deletions
diff --git a/sand-leek-cl.c b/sand-leek-cl.c
index 42ebba5..de512ec 100644
--- a/sand-leek-cl.c
+++ b/sand-leek-cl.c
@@ -1,7 +1,7 @@
/* FIXME magic */
/* 32768 kernels doing 32767 each, except if it's 0xxxxxxx */
#define HASH_PER_RUN ((32768UL*32767UL) - (1<<24))
-#define EXPONENT_MIN 0x1FFFFFFFUL
+#define EXPONENT_MAX 0x1FFFFFFFUL
#define EXPONENT_SIZE_BYTES 4
#define RSA_KEY_BITS 1024
#include <stdio.h>
@@ -34,39 +34,43 @@ unsigned char bitmasks[] = {
[7] = 0xE0 /* 3 MSB */
};
-int truffle_valid(unsigned char *search_raw, int raw_len, char bitmask, struct sha_data sha, unsigned char e[4]) {
- unsigned char digest[20] = {};
- sha_update(&sha, e, EXPONENT_SIZE_BYTES);
- sha_final((unsigned char*)&digest, &sha);
- fprintf(stderr, "Need %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (first %d bytes plus bitmask %x)\n",
- search_raw[0],
- search_raw[1],
- search_raw[2],
- search_raw[3],
- search_raw[4],
- search_raw[5],
- search_raw[6],
- search_raw[7],
- search_raw[8],
- search_raw[9],
- raw_len,
- bitmask & 0xFF
- );
- fprintf(stderr, "GPU got %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (public exponent %02x %02x %02x %02x)\n",
- digest[0],
- digest[1],
- digest[2],
- digest[3],
- digest[4],
- digest[5],
- digest[6],
- digest[7],
- digest[8],
- digest[9],
- e[0], e[1], e[2], e[3]
- );
- return memcmp(digest, search_raw, raw_len) == 0 &&
- (search_raw[raw_len] & bitmask) == (digest[raw_len] & bitmask);
+int truffle_valid(RSA *rsa_key, const char *search, uint32_t e) {
+ char onion[17] = {0};
+ int der_length;
+ unsigned char *der_data;
+ unsigned char *tmp_data;
+ uint32_t e_big_endian;
+ unsigned char digest[20];
+ SHA_CTX sha;
+
+ der_length = i2d_RSAPublicKey(rsa_key, NULL);
+ if (der_length <= 0) {
+ fprintf(stderr, "i2d failed\n");
+ return 1;
+ }
+ der_data = malloc(der_length);
+ if (!der_data) {
+ fprintf(stderr, "DER data malloc failed\n");
+ return 1;
+ }
+ tmp_data = der_data;
+ if (i2d_RSAPublicKey(rsa_key, &tmp_data) != der_length) {
+ fprintf(stderr, "DER formatting failed\n");
+ return 1;
+ }
+
+ e_big_endian = htobe32(e);
+ SHA1_Init(&sha);
+ SHA1_Update(&sha, der_data, der_length - EXPONENT_SIZE_BYTES);
+ SHA1_Update(&sha, &e_big_endian, 4);
+ SHA1_Final((unsigned char*)&digest, &sha);
+
+ onion_base32(onion, (unsigned char*)&digest);
+ onion[16] = '\0';
+
+ fprintf(stderr, "GPU got %s.onion\n", onion, search);
+
+ return strncmp(onion, search, strlen(search) - 1) == 0;
}
double tv_delta(struct timespec *start, struct timespec *end) {
@@ -83,9 +87,10 @@ void die_help(char *argv0)
int main(int argc, char **argv)
{
- const char *search = NULL;
+ char *search = NULL;
char *preferred_platform = NULL;
char c = '\0';
+ int offset = 0;
while ((c = getopt(argc, argv, "s:p:")) != -1) {
switch (c) {
@@ -102,13 +107,18 @@ int main(int argc, char **argv)
}
}
- if (preferred_platform == NULL || search == NULL) {
+ if (preferred_platform == NULL || search == NULL || strlen(search) == 0) {
die_help(argv[0]);
}
- /* FIXME sanatise the input search for non-base32 chars
- * Also investigate performance benefit from pre-unbase32-ing it
- * like the CPU-bound version does */
+ if ((offset = check_base32(search)) >= 0) {
+ fprintf(stderr,
+ "Error: search contains non-base-32 character(s): %c\n"
+ "I cannot search for something that will never occur\n",
+ search[offset]
+ );
+ return 1;
+ }
unsigned char search_raw[10];
/* padded array of the human-readable search */
@@ -141,14 +151,16 @@ int main(int argc, char **argv)
}
- struct timespec tv_program_start = {};
- struct timespec tv_start = {};
- struct timespec tv_end = {};
+ struct timespec tv_program_start = {0};
+ struct timespec tv_start = {0};
+ struct timespec tv_end = {0};
int bitmask = bitmasks[search_len % 8];
unsigned char *der_data = NULL;
unsigned char *tmp_data = NULL;
int der_length = 0;
- unsigned long e = EXPONENT_MIN;
+ unsigned long e = EXPONENT_MAX;
+ unsigned long key_number = 1;
+ unsigned char byte_e[4] = {0};
fprintf(stderr, "Building CL trampoline... ");
if (tramp_init(preferred_platform)) {
@@ -185,14 +197,14 @@ int main(int argc, char **argv)
}
fprintf(stderr, "Done.\n");
- /* FIXME */cl_int *buffer = malloc(4*65536);
- /* FIXME */unsigned long key_number = 1;
+ /* FIXME */cl_int *buffer = malloc(sizeof(cl_int)*65536);
/* FIXME check for error */
bignum_e = BN_new();
clock_gettime(CLOCK_MONOTONIC, &tv_program_start);
+ int success = 0;
do {
- e = EXPONENT_MIN;
+ e = EXPONENT_MAX;
BN_set_word(bignum_e, e);
if (!RSA_generate_key_ex(rsa_key, RSA_KEY_BITS, bignum_e, NULL)) {
fprintf(stderr, "Failed to generate RSA key\n");
@@ -219,7 +231,7 @@ int main(int argc, char **argv)
free(der_data);
- /* pre-adjust context for modofications that are common to all GPU threads */
+ /* pre-adjust context for modifications that are common to all GPU threads */
sha_c.data_len += 4;
sha_c.len += 4;
@@ -266,8 +278,7 @@ int main(int argc, char **argv)
perror("host data buffer malloc");
return 1;
}
-// fprintf(stderr, "Reading data from device... ");
- if (tramp_copy_data((void*)&buffer, 4*65536)) {
+ if (tramp_copy_data((void*)&buffer, sizeof(cl_int)*65536)) {
fprintf(stderr, "Failed.\n");
return 1;
}
@@ -276,28 +287,26 @@ int main(int argc, char **argv)
/* FIXME */ int count = 0;
/* FIXME BUG: temporarily looping backwards to increase chance of using
* something beginning with bit '1' as our exponent to highligt bug */
- for (i = 65536; i >= 0; i--) {
+ for (i = 0; i < 65536; i++) {
if (buffer[i] != 0) {
count++;
- fprintf(stderr, "%x had %x \n", i, buffer[i]);
- /* FIXME */unsigned char byte_e[4] = {};
/* FIXME */uint16_t smalls = (unsigned int)buffer[i];
/* FIXME */uint16_t biggies = (unsigned int)i;
byte_e[0] = (biggies >> 8) & 0xFF;
byte_e[1] = biggies & 0xFF;
byte_e[2] = (smalls >> 8) & 0xFF;
byte_e[3] = smalls & 0xFF;
+ e = (uint32_t)(byte_e[0] << 24) | (uint32_t)(byte_e[1] << 16) | (uint32_t)(byte_e[2] << 8) | (uint32_t)(byte_e[3]);
- if (truffle_valid(search_raw, raw_len, bitmask, sha_c, byte_e)) {
- e = byte_e[0] << 24 | byte_e[1] << 16 | byte_e[2] << 8 | byte_e[3];
+ if (truffle_valid(rsa_key, search, e)) {
+ success = 1;
} else {
fprintf(stderr, "GPU doesn't agree with CPU: bug or hardware fault?\n");
}
break;
}
}
- } while (e == 0);
- fprintf(stderr, "Done.\n");
+ } while (success == 0);
fprintf(stderr, "Destroying CL trampoline... ");
tramp_destroy();
@@ -306,13 +315,12 @@ int main(int argc, char **argv)
free(buffer);
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+ bignum_e = BN_new();
if (BN_set_word(bignum_e, e) != 1) {
fprintf(stderr, "BN_set_word failed\n");
return 1;
}
RSA_set0_key(rsa_key, NULL, bignum_e, NULL);
- /* allocate what was freed by above function call */
- bignum_e = BN_new();
#else
/* much tidier to be honest */
BN_set_word(rsa_key->e, e);
@@ -330,7 +338,7 @@ int main(int argc, char **argv)
return 1;
}
PEM_write_PrivateKey(stdout, evp_key, NULL, NULL, 0, NULL, NULL);
- //EVP_PKEY_free(evp_key);
+ EVP_PKEY_free(evp_key);
} else {
fprintf(stderr, "Key invalid:");
ERR_print_errors_fp(stderr);
diff --git a/slurp.c b/slurp.c
index e98f48f..773fc48 100644
--- a/slurp.c
+++ b/slurp.c
@@ -17,7 +17,6 @@ char *slurp(FILE *f, size_t *size)
while (!feof(f)) {
nread = fread(&buffer[*size], 1, BUFFER_STEP, f);
*size += nread;
- fprintf(stderr, "size is %d\n",*size);
buffer = realloc(buffer, *size);
if (!buffer) {
perror("realloc");
diff --git a/trampoline.c b/trampoline.c
index 24573f1..8dd9040 100644
--- a/trampoline.c
+++ b/trampoline.c
@@ -395,7 +395,7 @@ int tramp_run_kernel()
cl_event event;
cl_int ret = 0;
size_t workgroup_sizes[2];
- workgroup_sizes[0] = 65536;
+ workgroup_sizes[0] = 32768;
workgroup_sizes[1] = 1;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, workgroup_sizes, NULL, 0, NULL, &event);