aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2017-10-02 23:59:34 +1300
committerDavid Phillips <david@sighup.nz>2017-10-03 00:00:31 +1300
commit630135558c432a4cae1e74fdad09007ee17932ad (patch)
treeceab9848d4db4ff9d76f7070b2e87761bd41d0bf
parentefeab5984a62ad6d7a4a7285e2fc7275c38339c5 (diff)
downloadsand-leek-630135558c432a4cae1e74fdad09007ee17932ad.tar.xz
Dump WIP of opencl port
-rw-r--r--Makefile4
-rw-r--r--cl/onion.cl207
-rw-r--r--cl_error.c84
-rw-r--r--cl_error.h3
-rw-r--r--sand-leek-cl.c200
-rw-r--r--slurp.c31
-rw-r--r--slurp.h1
-rw-r--r--trampoline.c97
-rw-r--r--trampoline.h6
9 files changed, 609 insertions, 24 deletions
diff --git a/Makefile b/Makefile
index 8f4b5b8..13ca240 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS += -Wall -Wextra -O2 -I/usr/include/CL
-LDFLAGS += -lssl -lcrypto -lpthread -lOpenCL
+CFLAGS += -Wall -Wextra -O2 -I/usr/include/CL -g
+LDFLAGS += -lssl -lcrypto -lpthread -lOpenCL -g
all: sand-leek sand-leek-cl
diff --git a/cl/onion.cl b/cl/onion.cl
new file mode 100644
index 0000000..da6a654
--- /dev/null
+++ b/cl/onion.cl
@@ -0,0 +1,207 @@
+#define SHA_CHUNK_LEN 64
+#define ROL(x, shamt) ((x << shamt) | ((x >> sizeof(x)*8) - shamt))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+struct sha_data {
+ unsigned int a;
+ unsigned int b;
+ unsigned int c;
+ unsigned int d;
+ unsigned int e;
+ unsigned long len;
+ unsigned long data_len;
+ char data[SHA_CHUNK_LEN];
+};
+
+void sha_chunk(char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {
+ unsigned int w[80] = {0};
+ unsigned int new_a = 0;
+ unsigned int a = sha->a;
+ unsigned int b = sha->b;
+ unsigned int c = sha->c;
+ unsigned int d = sha->d;
+ unsigned int e = sha->e;
+ unsigned int i = 0;
+ unsigned int bo = 0;
+
+ unsigned int k[] = {
+ 0x5A827999,
+ 0x6ED9EBA1,
+ 0x8F1BBCDC,
+ 0xCA62C1D6
+ };
+
+ for (i = 0; i < 80; i++, bo+=4) {
+ w[i] = ((unsigned int)(*buf)[bo]) << 24;
+ w[i] |= (*buf)[bo+1] << 16;
+ w[i] |= (*buf)[bo+2] << 8;
+ w[i] |= (*buf)[bo+3];
+ }
+
+ /* FIXME unroll these operations? */
+ for (i = 16; i < 80; i++) {
+ w[i] = ROL((w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]), 1);
+ }
+
+ for (i = 0; i < 20; i++) {
+ new_a = ROL(a, 5) + ((b&c)|((~b)&d)) + e + w[i] + k[0];
+ e = d;
+ d = c;
+ c = ROL(b, 30);
+ b = a;
+ a = new_a;
+ }
+
+ for (i = 20; i < 40; i++) {
+ new_a = ROL(a, 5) + (b^c^d) + e + w[i] + k[1];
+ e = d;
+ d = c;
+ c = ROL(b, 30);
+ b = a;
+ a = new_a;
+ }
+
+ for (i = 40; i < 60; i++) {
+ new_a = ROL(a, 5) + ((b&c)|(b&d)|(c&d)) + e + w[i] + k[2];
+ e = d;
+ d = c;
+ c = ROL(b, 30);
+ b = a;
+ a = new_a;
+ }
+
+ for (i = 60; i < 80; i++) {
+ new_a = ROL(a, 5) + (b^c^d) + e + w[i] + k[3];
+ e = d;
+ d = c;
+ c = ROL(b, 30);
+ b = a;
+ a = new_a;
+ }
+ sha->a += a;
+ sha->b += b;
+ sha->c += c;
+ sha->d += d;
+ sha->e += e;
+}
+
+void sha_update(struct sha_data *c, void *data, unsigned int size) {
+ unsigned int i = 0;
+ size_t remaining = size;
+ char *bdata = (char*)data;
+
+
+ size_t count = MIN(size, SHA_CHUNK_LEN - c->data_len);
+ for (i = 0; i < count; i++)
+ c->data[c->data_len+i] = ((char*)data)[i];
+// memcpy(&(c->data[c->data_len]), data, count);
+ c->data_len += count;
+ remaining -= count;
+
+
+ while (c->data_len == SHA_CHUNK_LEN) {
+ sha_chunk(&(c->data), c);
+ count = MIN(remaining, SHA_CHUNK_LEN);
+ //memcpy(c->data, &bdata[size-remaining], count);
+ remaining -= count;
+ c->data_len = count;
+ }
+
+ /* representative of all data throughput, inclusive of the buffer in
+ * the context */
+ c->len += size;
+}
+
+void sha_final(unsigned char *digest, struct sha_data *c) {
+ size_t i = 0;
+
+ c->data[c->data_len++] = 0x80;
+
+ /* Transform byte len to bit len */
+ c->len *= 8;
+
+ for (i = c->data_len; i < SHA_CHUNK_LEN; i++)
+ c->data[i] = 0;
+
+ /* still room for the 64-bit message length at the end of this chunk? */
+ if (c->data_len + 8 > SHA_CHUNK_LEN) {
+ sha_chunk(&(c->data), c);
+ for (i = 0; i < SHA_CHUNK_LEN; i++)
+ c->data[i] = 0;
+ }
+
+ /* FIXME loop or leave unrolled? */
+ c->data[56] = c->len >> 56;
+ c->data[57] = c->len >> 48;
+ c->data[58] = c->len >> 40;
+ c->data[59] = c->len >> 32;
+ c->data[60] = c->len >> 24;
+ c->data[61] = c->len >> 16;
+ c->data[62] = c->len >> 8;
+ c->data[63] = c->len;
+
+ sha_chunk(&(c->data), c);
+
+
+ /* FIXME loop or leave unrolled? */
+ digest[ 0] = c->a >> 24;
+ digest[ 1] = c->a >> 16;
+ digest[ 2] = c->a >> 8;
+ digest[ 3] = c->a;
+
+
+ digest[ 4] = c->b >> 24;
+ digest[ 5] = c->b >> 16;
+ digest[ 6] = c->b >> 8;
+ digest[ 7] = c->b;
+
+ digest[ 8] = c->c >> 24;
+ digest[ 9] = c->c >> 16;
+ digest[10] = c->c >> 8;
+ digest[11] = c->c;
+
+ digest[12] = c->d >> 24;
+ digest[13] = c->d >> 16;
+ digest[14] = c->d >> 8;
+ digest[15] = c->d;
+
+ digest[16] = c->e >> 24;
+ digest[17] = c->e >> 16;
+ digest[18] = c->e >> 8;
+ digest[19] = c->e;
+}
+
+__kernel void fractal_gen(
+ __global unsigned char *results,
+ __constant struct sha_data *partial,
+ __constant unsigned char *search,
+ const unsigned int raw_length)
+{
+ unsigned int tx = get_global_id(0);
+ unsigned int ty = get_global_id(1);
+ unsigned int i;
+
+ struct sha_data ctx;
+ ctx.a = partial->a;
+ ctx.b = partial->b;
+ ctx.c = partial->c;
+ ctx.d = partial->d;
+ ctx.e = partial->e;
+ ctx.len = partial->len;
+ ctx.data_len = partial->data_len;
+ for (i = 0; i < SHA_CHUNK_LEN; i++) {
+ ctx.data[i] = partial->data[i];
+ }
+
+ /* FIXME dummy e (big-endian) */
+ char e[4] = {0x1F, 0xFF, 0xFF, 0xFF};
+ char digest[20];
+ for (i = 0; i < 65536; i++) {
+ sha_update(&ctx, &e, 4);
+
+ sha_final(&digest, &ctx);
+ }
+
+// buffer[(size*y)+x] = (i*255)/iterations;
+ return;
+}
diff --git a/cl_error.c b/cl_error.c
new file mode 100644
index 0000000..55b91bd
--- /dev/null
+++ b/cl_error.c
@@ -0,0 +1,84 @@
+#include <cl.h>
+
+static const char *cl_error_strings[] = {
+ "CL_SUCCESS",
+ "CL_DEVICE_NOT_FOUND",
+ "CL_DEVICE_NOT_AVAILABLE",
+ "CL_COMPILER_NOT_AVAILABLE",
+ "CL_MEM_OBJECT_ALLOCATION_FAILURE",
+ "CL_OUT_OF_RESOURCES",
+ "CL_OUT_OF_HOST_MEMORY",
+ "CL_PROFILING_INFO_NOT_AVAILABLE",
+ "CL_MEM_COPY_OVERLAP",
+ "CL_IMAGE_FORMAT_MISMATCH",
+ "CL_IMAGE_FORMAT_NOT_SUPPORTED",
+ "CL_BUILD_PROGRAM_FAILURE",
+ "CL_MAP_FAILURE",
+ "CL_MISALIGNED_SUB_BUFFER_OFFSET",
+ "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST",
+ "CL_COMPILE_PROGRAM_FAILURE",
+ "CL_LINKER_NOT_AVAILABLE",
+ "CL_LINK_PROGRAM_FAILURE",
+ "CL_DEVICE_PARTITION_FAILED",
+ "CL_KERNEL_ARG_INFO_NOT_AVAILABLE",
+ "(Invalid)",
+ "(Invalid)",
+ "(Invalid)",
+ "(Invalid)",
+ "(Invalid)",
+ "(Invalid)",
+ "(Invalid)",
+ "(Invalid)",
+ "(Invalid)",
+ "(Invalid)",
+ "CL_INVALID_VALUE",
+ "CL_INVALID_DEVICE_TYPE",
+ "CL_INVALID_PLATFORM",
+ "CL_INVALID_DEVICE",
+ "CL_INVALID_CONTEXT",
+ "CL_INVALID_QUEUE_PROPERTIES",
+ "CL_INVALID_COMMAND_QUEUE",
+ "CL_INVALID_HOST_PTR",
+ "CL_INVALID_MEM_OBJECT",
+ "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
+ "CL_INVALID_IMAGE_SIZE",
+ "CL_INVALID_SAMPLER",
+ "CL_INVALID_BINARY",
+ "CL_INVALID_BUILD_OPTIONS",
+ "CL_INVALID_PROGRAM",
+ "CL_INVALID_PROGRAM_EXECUTABLE",
+ "CL_INVALID_KERNEL_NAME",
+ "CL_INVALID_KERNEL_DEFINITION",
+ "CL_INVALID_KERNEL",
+ "CL_INVALID_ARG_INDEX",
+ "CL_INVALID_ARG_VALUE",
+ "CL_INVALID_ARG_SIZE",
+ "CL_INVALID_KERNEL_ARGS",
+ "CL_INVALID_WORK_DIMENSION",
+ "CL_INVALID_WORK_GROUP_SIZE",
+ "CL_INVALID_WORK_ITEM_SIZE",
+ "CL_INVALID_GLOBAL_OFFSET",
+ "CL_INVALID_EVENT_WAIT_LIST",
+ "CL_INVALID_EVENT",
+ "CL_INVALID_OPERATION",
+ "CL_INVALID_GL_OBJECT",
+ "CL_INVALID_BUFFER_SIZE",
+ "CL_INVALID_MIP_LEVEL",
+ "CL_INVALID_GLOBAL_WORK_SIZE",
+ "CL_INVALID_PROPERTY",
+ "CL_INVALID_IMAGE_DESCRIPTOR",
+ "CL_INVALID_COMPILER_OPTIONS",
+ "CL_INVALID_LINKER_OPTIONS",
+ "CL_INVALID_DEVICE_PARTITION_COUNT",
+ "CL_INVALID_PIPE_SIZE",
+ "CL_INVALID_DEVICE_QUEUE",
+};
+
+const char *get_cl_error_string(cl_int error)
+{
+ error = -error;
+ if (error < 0 || error >= sizeof(cl_error_strings) / sizeof(const char *))
+ return "Error value out of bounds";
+
+ return cl_error_strings[error];
+}
diff --git a/cl_error.h b/cl_error.h
new file mode 100644
index 0000000..7f4a95d
--- /dev/null
+++ b/cl_error.h
@@ -0,0 +1,3 @@
+#include <cl.h>
+
+const char *get_cl_error_string(cl_int error);
diff --git a/sand-leek-cl.c b/sand-leek-cl.c
new file mode 100644
index 0000000..a21829d
--- /dev/null
+++ b/sand-leek-cl.c
@@ -0,0 +1,200 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+//#include <openssl/sha.h>
+#include <openssl/rsa.h>
+
+#include "onion_base32.h"
+#include "trampoline.h"
+//#include "sha1.h"
+
+
+/* FIXME make loop internal to run(), rather than rebuilding kernel etc
+ * each new key */
+int run(const char *preferred_platform, unsigned char *search_raw, size_t raw_len, struct sha_data *sha)
+{
+ fprintf(stderr, "Building CL trampoline... ");
+ if (tramp_init(preferred_platform)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+
+ fprintf(stderr, "Loading kernel source from file... ");
+ if (tramp_load_kernel(CL_SRC_DIR"onion.cl")) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Loaded.\n");
+
+ fprintf(stderr, "Compiling kernel source... ");
+ if (tramp_compile_kernel()) {
+ fprintf(stderr, "Failed:\n%s\n", tramp_get_build_log());
+ return 1;
+ }
+ fprintf(stderr, "Compiled.\n");
+
+ fprintf(stderr, "Setting kernel arguments... ");
+ if (tramp_set_kernel_args(raw_len)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+
+ fprintf(stderr, "Transferring search target to device... ");
+ if (tramp_copy_search(search_raw)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+
+ fprintf(stderr, "Transferring partial SHA work to device... ");
+ if (tramp_copy_sha(sha)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+
+ fprintf(stderr, "Running kernel... ");
+ if (tramp_run_kernel()) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+
+/* char *buffer = malloc(size*size);
+ if (!buffer) {
+ perror("host data buffer malloc");
+ return 1;
+ }
+ fprintf(stderr, "Reading data from device... ");
+ if (tramp_copy_data((void*)&buffer, size*size)) {
+ fprintf(stderr, "Failed.\n");
+ return 1;
+ }
+ fprintf(stderr, "Done.\n");
+*/
+ fprintf(stderr, "Destroying CL trampoline... ");
+ tramp_destroy();
+ fprintf(stderr, "Blown to smitherines.\n");
+
+/* free(buffer);*/
+ return 0;
+}
+
+void die_help(char *argv0)
+{
+ fprintf(stderr, "Syntax:\n%s [-p platform] [-s search]\n", argv0);
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ const char *search = 0;
+ char *preferred_platform = NULL;
+ char c = '\0';
+
+ while ((c = getopt(argc, argv, "s:p:")) != -1) {
+ switch (c) {
+ case 's':
+ search = optarg;
+ break;
+ case 'p':
+ preferred_platform = optarg;
+ break;
+ case '?':
+ die_help(argv[0]);
+ return 1; /* mostly unreachable */
+ break; /* unreachable */
+ }
+ }
+
+ /* FIXME sanatise the input search for non-base32 chars
+ * Also investigate performance benefit from pre-unbase32-ing it
+ * like the CPU-bound version does */
+
+ unsigned char search_raw[10];
+ /* padded array of the human-readable search */
+ char search_pad[16] = {0};
+ strncpy(search_pad, search, sizeof(search_pad));
+
+ /* decode desired base32 */
+ onion_base32_dec(search_raw, search_pad);
+
+ /* hangover code from sand-leek.c */
+ /* bitmasks to be used to compare remainder bits */
+ unsigned char bitmasks[] = {
+ [1] = 0xF8, /* 5 MSB */
+ [2] = 0xC0, /* 2 MSB */
+ [3] = 0xFE, /* 7 MSB */
+ [4] = 0xF0, /* 4 MSB */
+ [5] = 0x80, /* 1 MSB */
+ [6] = 0xFC, /* 6 MSB */
+ [7] = 0xE0 /* 3 MSB */
+ };
+
+ /* number of whole bytes of raw hash to compare:
+ * 10 is the size of the data a full onion address covers
+ * 16 is the size of the base32-encoded onion address */
+ size_t search_len = strlen(search);
+ int raw_len = (search_len*10)/16;
+ int bitmask = bitmasks[search_len % 8];
+ /* end hangover code from sand-leek.c */
+
+ RSA* rsa_key = NULL;
+ rsa_key = RSA_new();
+ if (!rsa_key) {
+ fprintf(stderr, "Failed to allocate RSA key\n");
+ return 1;
+ }
+
+#define EXPONENT_SIZE_BYTES 4
+#define EXPONENT_MIN 0x1FFFFFFF
+#define RSA_KEY_BITS 1024
+
+ unsigned long e = EXPONENT_MIN;
+ unsigned char *der_data = NULL;
+ unsigned char *tmp_data = NULL;
+ int der_length = 0;
+ struct sha_data sha_c;
+ BIGNUM *bignum_e = NULL;
+
+
+ bignum_e = BN_new();
+ if (!bignum_e) {
+ fprintf(stderr, "Failed to allocate bignum for exponent\n");
+ return 1;
+ }
+
+ e = EXPONENT_MIN;
+ BN_set_word(bignum_e, e);
+ if (!RSA_generate_key_ex(rsa_key, RSA_KEY_BITS, bignum_e, NULL)) {
+ fprintf(stderr, "Failed to generate RSA key\n");
+ return 1;
+ }
+ der_length = i2d_RSAPublicKey(rsa_key, NULL);
+ if (der_length <= 0) {
+ fprintf(stderr, "i2d failed\n");
+ return 1;
+ }
+ der_data = malloc(der_length);
+ if (!der_data) {
+ fprintf(stderr, "DER data malloc failed\n");
+ return 1;
+ }
+ tmp_data = der_data;
+ if (i2d_RSAPublicKey(rsa_key, &tmp_data) != der_length) {
+ fprintf(stderr, "DER formatting failed\n");
+ return 1;
+ }
+
+ sha_init(&sha_c);
+ sha_update(&sha_c, der_data, der_length - EXPONENT_SIZE_BYTES);
+ free(der_data);
+
+
+ run(preferred_platform, search_raw, raw_len, &sha_c);
+ return 0;
+}
diff --git a/slurp.c b/slurp.c
new file mode 100644
index 0000000..ea7e2f7
--- /dev/null
+++ b/slurp.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#define BUFFER_STEP 10240
+
+char *slurp(FILE *f, size_t *size)
+{
+ char *buffer = NULL;
+ size_t nread = 0;
+
+ buffer = malloc(BUFFER_STEP);
+ if (!buffer) {
+ perror("malloc");
+ return NULL;
+ }
+
+ while (!feof(f)) {
+ nread = fread(&buffer[*size], 1, BUFFER_STEP, f);
+ *size += nread;
+ printf("size is %d\n",*size);
+ buffer = realloc(buffer, *size);
+ if (!buffer) {
+ perror("realloc");
+ return NULL;
+ }
+ }
+ if (ferror(f)) {
+ perror("slurp/fread");
+ }
+ return buffer;
+}
diff --git a/slurp.h b/slurp.h
new file mode 100644
index 0000000..114014b
--- /dev/null
+++ b/slurp.h
@@ -0,0 +1 @@
+char *slurp(FILE *f, size_t *size);
diff --git a/trampoline.c b/trampoline.c
index 6659223..bed457a 100644
--- a/trampoline.c
+++ b/trampoline.c
@@ -4,6 +4,7 @@
#include "cl_error.h"
#include "slurp.h"
+#include "sha1.h"
static cl_platform_id platform;
static cl_context context;
@@ -11,14 +12,13 @@ static cl_device_id* devices;
static cl_uint device_count;
static unsigned int device_in_use;
static cl_command_queue command_queue;
-static cl_mem device_buffer;
+static cl_mem device_result;
+static cl_mem device_sha;
+static cl_mem device_search;
static cl_kernel kernel;
static cl_program program;
-static unsigned int size;
-static unsigned int iterations;
-
/**
* Wrapper to help with fetching string-based information about an OpenCL
* platform.
@@ -35,7 +35,7 @@ char *get_platform_info(cl_platform_id id, cl_platform_info value_name)
ret = clGetPlatformInfo(id, value_name, 0, NULL, &value_len);
if (ret != CL_SUCCESS) {
- fprintf(stderr, "Failed to get platform info for platform %d: %s\n", id, get_cl_error_string(ret));
+ fprintf(stderr, "Failed to get platform info for platform: %s\n", get_cl_error_string(ret));
return NULL;
}
@@ -47,7 +47,7 @@ char *get_platform_info(cl_platform_id id, cl_platform_info value_name)
ret = clGetPlatformInfo(id, value_name, value_len, value, &value_len);
if (ret != CL_SUCCESS) {
- fprintf(stderr, "Failed to get platform info for platform %d: %s\n", id, get_cl_error_string(ret));
+ fprintf(stderr, "Failed to get platform info for platform: %s\n", get_cl_error_string(ret));
return NULL;
}
@@ -176,7 +176,7 @@ int tramp_init(const char *preferred_platform)
/* FIXME expose to user */
device_in_use = 0;
- command_queue = clCreateCommandQueue(context, devices[device_in_use], 0, &ret);
+ command_queue = clCreateCommandQueueWithProperties(context, devices[device_in_use], NULL, &ret);
if (ret != CL_SUCCESS) {
fprintf(stderr, "Failed to create command queue on context: %s ", get_cl_error_string(ret));
return 1;
@@ -322,34 +322,49 @@ int tramp_compile_kernel()
*
* FIXME investigate using something more flexible?
*/
-int tramp_set_kernel_args(unsigned int s, unsigned int it)
+int tramp_set_kernel_args(unsigned int raw_len)
{
cl_int ret = 0;
- size = s;
- iterations = it;
-
- device_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, size*size, NULL, &ret);
+ device_result = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 1024, NULL, &ret);
if (ret != CL_SUCCESS) {
fprintf(stderr, "Failed to create buffer for slave device: %s ", get_cl_error_string(ret));
return 1;
}
- ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), &device_buffer);
+ device_sha = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(struct sha_data), NULL, &ret);
+ if (ret != CL_SUCCESS) {
+ fprintf(stderr, "Failed to create SHA buffer for slave device: %s ", get_cl_error_string(ret));
+ return 1;
+ }
+
+ device_search = clCreateBuffer(context, CL_MEM_READ_WRITE, 10, NULL, &ret);
if (ret != CL_SUCCESS) {
- fprintf(stderr, "Error on buffer argument: %s ", get_cl_error_string(ret));
+ fprintf(stderr, "Failed to create search buffer for slave device: %s ", get_cl_error_string(ret));
return 1;
}
- ret = clSetKernelArg(kernel, 1, sizeof(unsigned int), &size);
+ ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), &device_result);
if (ret != CL_SUCCESS) {
- fprintf(stderr, "Error on size argument: %s ", get_cl_error_string(ret));
+ fprintf(stderr, "Error on result buffer argument: %s ", get_cl_error_string(ret));
return 1;
}
- ret = clSetKernelArg(kernel, 2, sizeof(unsigned long), &iterations);
+ ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), &device_sha);
if (ret != CL_SUCCESS) {
- fprintf(stderr, "Error on iteration argument: %s ", get_cl_error_string(ret));
+ fprintf(stderr, "Error on SHA buffer argument: %s ", get_cl_error_string(ret));
+ return 1;
+ }
+
+ ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), &device_search);
+ if (ret != CL_SUCCESS) {
+ fprintf(stderr, "Error on raw search argument: %s ", get_cl_error_string(ret));
+ return 1;
+ }
+
+ ret = clSetKernelArg(kernel, 3, sizeof(cl_int), &raw_len);
+ if (ret != CL_SUCCESS) {
+ fprintf(stderr, "Error on raw length argument: %s ", get_cl_error_string(ret));
return 1;
}
@@ -368,8 +383,8 @@ int tramp_run_kernel()
cl_event event;
cl_int ret = 0;
size_t workgroup_sizes[2];
- workgroup_sizes[0] = size;
- workgroup_sizes[1] = size;
+ workgroup_sizes[0] = 65536;
+ workgroup_sizes[1] = 1;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, workgroup_sizes, NULL, 0, NULL, &event);
if (ret != CL_SUCCESS) {
@@ -395,7 +410,7 @@ int tramp_copy_data(void **buffer, size_t size)
cl_event event;
cl_int ret = 0;
- ret = clEnqueueReadBuffer(command_queue, device_buffer, CL_TRUE, 0, size, *buffer, 0, NULL, &event);
+ ret = clEnqueueReadBuffer(command_queue, device_result, CL_TRUE, 0, size, *buffer, 0, NULL, &event);
if (ret != CL_SUCCESS) {
fprintf(stderr, "Failed to enqueue read command for data: %s ", get_cl_error_string(ret));
return 1;
@@ -404,3 +419,43 @@ int tramp_copy_data(void **buffer, size_t size)
return 0;
}
+
+/**
+ * Copy an SHA context to the device
+ *
+ * Returns 0 on success, non-zero otherwise.
+ */
+int tramp_copy_sha(struct sha_data *sha)
+{
+ cl_event event;
+ cl_int ret = 0;
+
+ ret = clEnqueueWriteBuffer(command_queue, device_sha, CL_TRUE, 0, sizeof(struct sha_data), sha, 0, NULL, &event);
+ if (ret != CL_SUCCESS) {
+ fprintf(stderr, "Failed to enqueue write command for SHA partial work: %s ", get_cl_error_string(ret));
+ return 1;
+ }
+ clReleaseEvent(event);
+
+ return 0;
+}
+
+/**
+ * Copy a raw search target to the device buffer
+ *
+ * Returns 0 on success, non-zero otherwise.
+ */
+int tramp_copy_search(unsigned int search_raw[10])
+{
+ cl_event event;
+ cl_int ret = 0;
+
+ ret = clEnqueueWriteBuffer(command_queue, device_search, CL_TRUE, 0, 10, search_raw, 0, NULL, &event);
+ if (ret != CL_SUCCESS) {
+ fprintf(stderr, "Failed to enqueue write command for SHA partial work: %s ", get_cl_error_string(ret));
+ return 1;
+ }
+ clReleaseEvent(event);
+
+ return 0;
+}
diff --git a/trampoline.h b/trampoline.h
index 35624a9..67cbd33 100644
--- a/trampoline.h
+++ b/trampoline.h
@@ -1,8 +1,12 @@
+#include "sha1.h"
+
int tramp_init(const char *preferred_platform);
void tramp_destroy(void);
int tramp_load_kernel(const char *filename);
char *tramp_get_build_log(void);
int tramp_compile_kernel(void);
-int tramp_set_kernel_args(unsigned long size, unsigned long iterations);
+int tramp_set_kernel_args(unsigned int raw_len);
int tramp_run_kernel(void);
int tramp_copy_data(void **buffer, size_t size);
+int tramp_copy_sha(struct sha_data *sha);
+int tramp_copy_search(unsigned char search_raw[10]);