aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2018-04-24 13:02:19 +1200
committerDavid Phillips <david@sighup.nz>2018-04-24 17:11:46 +1200
commit8546bbc5abf0aafdaa9a2f5926b53d7c738ff44c (patch)
treea1ab8abdbb6976fc26444d7d61ef7b0f5e392425
parent43fc2e02818737e1979c4f21d726a356784d16f2 (diff)
downloadsand-leek-8546bbc5abf0aafdaa9a2f5926b53d7c738ff44c.tar.xz
Further improvements
-rw-r--r--sand-leek-cl.c333
-rw-r--r--slurp.c2
-rw-r--r--trampoline.c2
3 files changed, 170 insertions, 167 deletions
diff --git a/sand-leek-cl.c b/sand-leek-cl.c
index 2a44641..105fa45 100644
--- a/sand-leek-cl.c
+++ b/sand-leek-cl.c
@@ -1,9 +1,16 @@
+/* FIXME magic */
+/* 32768 kernels doing 32767 each, except if it's 0xxxxxxx */
+#define HASH_PER_RUN ((32768UL*32767UL) - (1<<24))
+#define EXPONENT_MIN 0x1FFFFFFFUL
+#define EXPONENT_SIZE_BYTES 4
+#define RSA_KEY_BITS 1024
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <cl.h>
#include <string.h>
+#include <endian.h>
#include <openssl/pem.h>
#include <openssl/rsa.h>
@@ -29,9 +36,9 @@ unsigned char bitmasks[] = {
int truffle_valid(unsigned char *search_raw, int raw_len, char bitmask, struct sha_data sha, unsigned char e[4]) {
unsigned char digest[20] = {};
- sha_update(&sha, e, 4);
- sha_final(&digest, &sha);
- fprintf(stderr, "Need %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (%d)\n",
+ sha_update(&sha, e, EXPONENT_SIZE_BYTES);
+ sha_final((unsigned char*)&digest, &sha);
+ fprintf(stderr, "Need %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (first %d bytes plus bitmask %x)\n",
search_raw[0],
search_raw[1],
search_raw[2],
@@ -42,9 +49,10 @@ int truffle_valid(unsigned char *search_raw, int raw_len, char bitmask, struct s
search_raw[7],
search_raw[8],
search_raw[9],
- raw_len
+ raw_len,
+ bitmask & 0xFF
);
- fprintf(stderr, "GPU got %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+ fprintf(stderr, "GPU got %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x (public exponent %02x %02x %02x %02x)\n",
digest[0],
digest[1],
digest[2],
@@ -54,7 +62,8 @@ int truffle_valid(unsigned char *search_raw, int raw_len, char bitmask, struct s
digest[6],
digest[7],
digest[8],
- digest[9]
+ digest[9],
+ e[0], e[1], e[2], e[3]
);
return memcmp(digest, search_raw, raw_len) == 0 &&
(search_raw[raw_len] & bitmask) == (digest[raw_len] & bitmask);
@@ -66,146 +75,6 @@ double tv_delta(struct timespec *start, struct timespec *end) {
return s_delta + (double)ns_delta/1e9;
}
-/* FIXME make loop internal to run(), rather than rebuilding kernel etc
- * each new key */
-unsigned long run(const char *preferred_platform, unsigned char *search_raw, size_t raw_len, size_t search_len, struct sha_data *sha)
-{
- struct timespec tv_start = {};
- struct timespec tv_end = {};
- int bitmask = bitmasks[search_len % 8];
-
- fprintf(stderr, "Building CL trampoline... ");
- if (tramp_init(preferred_platform)) {
- fprintf(stderr, "Failed.\n");
- return 1;
- }
- fprintf(stderr, "Done.\n");
-
- fprintf(stderr, "Loading kernel source from file... ");
- if (tramp_load_kernel(CL_SRC_DIR"onion.cl")) {
- fprintf(stderr, "Failed.\n");
- return 1;
- }
- fprintf(stderr, "Loaded.\n");
-
- fprintf(stderr, "Compiling kernel source... ");
- if (tramp_compile_kernel()) {
- fprintf(stderr, "Failed:\n%s\n", tramp_get_build_log());
- return 1;
- }
- fprintf(stderr, "Compiled.\n");
-
- fprintf(stderr, "Setting kernel arguments... ");
- if (tramp_set_kernel_args(raw_len, bitmask)) {
- fprintf(stderr, "Failed.\n");
- return 1;
- }
- fprintf(stderr, "Done.\n");
-
- fprintf(stderr, "Transferring search target to device... ");
- if (tramp_copy_search(search_raw)) {
- fprintf(stderr, "Failed.\n");
- return 1;
- }
- fprintf(stderr, "Done.\n");
-
- /* pre-adjust context for modofications that are common to all GPU threads */
- sha->data_len += 4;
- sha->len += 4;
-
- /* pre-load end-mark bit */
- sha->data[sha->data_len] = 0x80;
-
- sha->len *= 8;
- /* FIXME loop or leave unrolled? */
- sha->data[56] = sha->len >> 56;
- sha->data[57] = sha->len >> 48;
- sha->data[58] = sha->len >> 40;
- sha->data[59] = sha->len >> 32;
- sha->data[60] = sha->len >> 24;
- sha->data[61] = sha->len >> 16;
- sha->data[62] = sha->len >> 8;
- sha->data[63] = sha->len;
-
-
- fprintf(stderr, "Transferring partial SHA work to device (data len is at %d, len is at %d)... ", sha->data_len, sha->len);
- if (tramp_copy_sha(sha)) {
- fprintf(stderr, "Failed.\n");
- return 1;
- }
- fprintf(stderr, "Done.\n");
-
- /* un-adjust context for modofications that are common to all GPU threads */
- sha->len /= 8;
- sha->data_len -= 4;
- sha->len -= 4;
-
- fprintf(stderr, "Running kernel... ");
- clock_gettime(CLOCK_MONOTONIC, &tv_start);
-
-/* FIXME magic */
-/* 65536 kernels doing 32767 each, except if it's 00xxxxxx */
-#define HASH_PER_RUN ((65536UL*32767UL) - (1<<24))
- if (tramp_run_kernel()) {
- fprintf(stderr, "Failed.\n");
- return 1;
- }
- clock_gettime(CLOCK_MONOTONIC, &tv_end);
-
- /*FIXME*/double clock_delta = tv_delta(&tv_start, &tv_end);
- fprintf(stderr, "Done in %.2f seconds (%.3f MH/s).\n", clock_delta, (HASH_PER_RUN/clock_delta/1e6));
-
- /* FIXME */cl_int *buffer = malloc(4*65536);
- if (!buffer) {
- perror("host data buffer malloc");
- return 1;
- }
- fprintf(stderr, "Reading data from device... ");
- if (tramp_copy_data((void*)&buffer, 4*65536)) {
- fprintf(stderr, "Failed.\n");
- return 1;
- }
- fprintf(stderr, "Done.\n");
-
- fprintf(stderr, "Analysing batch results. Successful nonces: \n");
-
- /* FIXME */ int i = 0;
- /* FIXME */ int count = 0;
- for (i = 0; i < 65536; i++) {
- if (buffer[i] != 0) {
- count++;
- fprintf(stderr, "%d \n", buffer[i]);
- /* FIXME */unsigned char e[4] = {};
- /* FIXME */unsigned int smalls = (unsigned int)buffer[i];
- /* FIXME */unsigned int biggies = (unsigned int)i;
- e[0] = (biggies >> 8) & 0xFF;
- e[1] = biggies & 0xFF;
- e[2] = (smalls >> 8) & 0xFF;
- e[3] = smalls & 0xFF;
- if (truffle_valid(search_raw, raw_len, bitmask, *sha, e)) {
- fprintf(stderr, "«%x %x %x %x»\n", e[0], e[1], e[2], e[3]);
- /* FIXME */unsigned long eLE = e[0] << 24 | e[1] << 16 | e[2] << 8 | e[3];
- fprintf(stderr, "Got eem: %xul!\n", eLE);
- return eLE;
- } else {
- fprintf(stderr, "GPU doesn't agree with CPU: bug or hardware fault?\n");
- }
- break;
- }
- }
- if (count == 0) {
- fprintf(stderr, "None. ");
- }
- fprintf(stderr, "Done.\n");
-
- fprintf(stderr, "Destroying CL trampoline... ");
- tramp_destroy();
- fprintf(stderr, "Blown to smitherines.\n");
-
- free(buffer);
- return 0;
-}
-
void die_help(char *argv0)
{
fprintf(stderr, "Syntax:\n%s [-p platform] [-s search]\n", argv0);
@@ -214,7 +83,7 @@ void die_help(char *argv0)
int main(int argc, char **argv)
{
- const char *search = 0;
+ const char *search = NULL;
char *preferred_platform = NULL;
char c = '\0';
@@ -233,6 +102,10 @@ int main(int argc, char **argv)
}
}
+ if (preferred_platform == NULL || search == NULL) {
+ die_help(argv[0]);
+ }
+
/* FIXME sanatise the input search for non-base32 chars
* Also investigate performance benefit from pre-unbase32-ing it
* like the CPU-bound version does */
@@ -258,15 +131,6 @@ int main(int argc, char **argv)
fprintf(stderr, "Failed to allocate RSA key\n");
return 1;
}
-
-#define EXPONENT_SIZE_BYTES 4
-#define EXPONENT_MIN 0x1FFFFFFF
-#define RSA_KEY_BITS 1024
-
- unsigned long e = EXPONENT_MIN;
- unsigned char *der_data = NULL;
- unsigned char *tmp_data = NULL;
- int der_length = 0;
struct sha_data sha_c;
BIGNUM *bignum_e = NULL;
@@ -276,10 +140,61 @@ int main(int argc, char **argv)
return 1;
}
- e = EXPONENT_MIN;
- BN_set_word(bignum_e, e);
+ struct timespec tv_program_start = {};
+ struct timespec tv_start = {};
+ struct timespec tv_end = {};
+ int bitmask = bitmasks[search_len % 8];
+ unsigned char *der_data = NULL;
+ unsigned char *tmp_data = NULL;
+ int der_length = 0;
+ unsigned long e = EXPONENT_MIN;
+
+ fprintf(stderr, "Building CL trampoline... ");
+ if (tramp_init(preferred_platform)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+
+ fprintf(stderr, "Loading kernel source from file... ");
+ if (tramp_load_kernel(CL_SRC_DIR"onion.cl")) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Loaded.\n");
+
+ fprintf(stderr, "Compiling kernel source... ");
+ if (tramp_compile_kernel()) {
+ fprintf(stderr, "Failed:\n%s\n", tramp_get_build_log());
+ return 1;
+ }
+ fprintf(stderr, "Compiled.\n");
+
+ fprintf(stderr, "Setting kernel arguments... ");
+ if (tramp_set_kernel_args(raw_len, bitmask)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+
+ fprintf(stderr, "Transferring search target to device... ");
+ if (tramp_copy_search(search_raw)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+
+ /*FIXME*/uint32_t eBE = 0;
+ /* FIXME */cl_int *buffer = malloc(4*65536);
+ /* FIXME */unsigned long key_number = 1;
+
+ /* FIXME check for error */
+ bignum_e = BN_new();
+ clock_gettime(CLOCK_MONOTONIC, &tv_program_start);
do {
+ e = EXPONENT_MIN;
+ BN_set_word(bignum_e, e);
if (!RSA_generate_key_ex(rsa_key, RSA_KEY_BITS, bignum_e, NULL)) {
fprintf(stderr, "Failed to generate RSA key\n");
return 1;
@@ -304,11 +219,101 @@ int main(int argc, char **argv)
sha_update(&sha_c, der_data, der_length - EXPONENT_SIZE_BYTES);
free(der_data);
- e = run(preferred_platform, search_raw, raw_len, search_len, &sha_c);
- } while (e == 0);
- BN_set_word(bignum_e, e);
-fprintf(stderr, "exponent is %lx\n", e);
+ /* pre-adjust context for modofications that are common to all GPU threads */
+ sha_c.data_len += 4;
+ sha_c.len += 4;
+
+ /* pre-load end-mark bit */
+ sha_c.data[sha_c.data_len] = 0x80;
+
+ sha_c.len *= 8;
+ /* FIXME loop or leave unrolled? */
+ sha_c.data[56] = sha_c.len >> 56;
+ sha_c.data[57] = sha_c.len >> 48;
+ sha_c.data[58] = sha_c.len >> 40;
+ sha_c.data[59] = sha_c.len >> 32;
+ sha_c.data[60] = sha_c.len >> 24;
+ sha_c.data[61] = sha_c.len >> 16;
+ sha_c.data[62] = sha_c.len >> 8;
+ sha_c.data[63] = sha_c.len;
+
+
+ if (tramp_copy_sha(&sha_c)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+
+ /* un-adjust context for modofications that are common to all GPU threads */
+ sha_c.len /= 8;
+ sha_c.data_len -= 4;
+ sha_c.len -= 4;
+
+ clock_gettime(CLOCK_MONOTONIC, &tv_start);
+
+ if (tramp_run_kernel()) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &tv_end);
+
+ /*FIXME*/double peak_delta = tv_delta(&tv_start, &tv_end);
+ /*FIXME*/double total_delta = tv_delta(&tv_program_start, &tv_end);
+ fprintf(stderr, "Exhausted key attempt %lu in %.2f seconds (peak %.3f MH/s, average %.3f MH/s).\r", key_number, peak_delta, (HASH_PER_RUN/peak_delta/1e6), (key_number*HASH_PER_RUN/total_delta/1e6));
+
+ key_number++;
+
+ if (!buffer) {
+ perror("host data buffer malloc");
+ return 1;
+ }
+// fprintf(stderr, "Reading data from device... ");
+ if (tramp_copy_data((void*)&buffer, 4*65536)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+
+ /* FIXME */ int i = 0;
+ /* FIXME */ int count = 0;
+ /* FIXME BUG: temporarily looping backwards to increase chance of using
+ * something beginning with bit '1' as our exponent to highligt bug */
+ for (i = 65536; i >= 0; i--) {
+ if (buffer[i] != 0) {
+ count++;
+ fprintf(stderr, "%x had %x \n", i, buffer[i]);
+ /* FIXME */unsigned char byte_e[4] = {};
+ /* FIXME */uint16_t smalls = (unsigned int)buffer[i];
+ /* FIXME */uint16_t biggies = (unsigned int)i;
+ byte_e[0] = (biggies >> 8) & 0xFF;
+ byte_e[1] = biggies & 0xFF;
+ byte_e[2] = (smalls >> 8) & 0xFF;
+ byte_e[3] = smalls & 0xFF;
+
+ if (truffle_valid(search_raw, raw_len, bitmask, sha_c, byte_e)) {
+ eBE = byte_e[0] << 24 | byte_e[1] << 16 | byte_e[2] << 8 | byte_e[3];
+ } else {
+ fprintf(stderr, "GPU doesn't agree with CPU: bug or hardware fault?\n");
+ }
+ break;
+ }
+ }
+ } while (eBE == 0);
+ fprintf(stderr, "Done.\n");
+
+ fprintf(stderr, "Destroying CL trampoline... ");
+ tramp_destroy();
+ fprintf(stderr, "Blown to smitherines.\n");
+
+ free(buffer);
+
+
+
+
+
+// e = run(preferred_platform, search_raw, raw_len, search_len, &sha_c);
+ e = eBE; /* FIXME */
+
+//fprintf(stderr, "exponent is %lx\n", e);
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
if (BN_set_word(bignum_e, e) != 1) {
@@ -335,13 +340,11 @@ fprintf(stderr, "exponent is %lx\n", e);
return 1;
}
PEM_write_PrivateKey(stdout, evp_key, NULL, NULL, 0, NULL, NULL);
- EVP_PKEY_free(evp_key);
- return 1;
+ //EVP_PKEY_free(evp_key);
} else {
fprintf(stderr, "Key invalid:");
ERR_print_errors_fp(stderr);
}
-
return 0;
}
diff --git a/slurp.c b/slurp.c
index ea7e2f7..e98f48f 100644
--- a/slurp.c
+++ b/slurp.c
@@ -17,7 +17,7 @@ char *slurp(FILE *f, size_t *size)
while (!feof(f)) {
nread = fread(&buffer[*size], 1, BUFFER_STEP, f);
*size += nread;
- printf("size is %d\n",*size);
+ fprintf(stderr, "size is %d\n",*size);
buffer = realloc(buffer, *size);
if (!buffer) {
perror("realloc");
diff --git a/trampoline.c b/trampoline.c
index 9064335..24573f1 100644
--- a/trampoline.c
+++ b/trampoline.c
@@ -395,7 +395,7 @@ int tramp_run_kernel()
cl_event event;
cl_int ret = 0;
size_t workgroup_sizes[2];
- workgroup_sizes[0] = 63356;
+ workgroup_sizes[0] = 65536;
workgroup_sizes[1] = 1;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, workgroup_sizes, NULL, 0, NULL, &event);