aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2017-11-19 23:12:13 +1300
committerDavid Phillips <david@sighup.nz>2018-04-24 17:11:30 +1200
commit27e9f33ed36c10b1a4e34c0ebc86d87ab2cfb5ff (patch)
treed1d5ea55e9875d83c41b1fbb572a420850102fc3
parent036c470e94c278aeddcd9528f857eb66809ac7d6 (diff)
downloadsand-leek-27e9f33ed36c10b1a4e34c0ebc86d87ab2cfb5ff.tar.xz
Initial dump of SHA CL port
-rw-r--r--Makefile2
-rw-r--r--cl/onion.cl162
-rw-r--r--sand-leek-cl.c201
-rw-r--r--trampoline.c18
-rw-r--r--trampoline.h2
5 files changed, 309 insertions, 76 deletions
diff --git a/Makefile b/Makefile
index 13ca240..7322ac3 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ all: sand-leek sand-leek-cl
sand-leek: sand-leek.o onion_base32.o key_update.o
$(CC) -o $@ $^ $(LDFLAGS)
-sand-leek-cl: sand-leek-cl.o onion_base32.o trampoline.o cl_error.o slurp.o sha1.o
+sand-leek-cl: sand-leek-cl.o onion_base32.o trampoline.o cl_error.o slurp.o sha1.o key_update.o
$(CC) -o $@ $^ $(LDFLAGS)
sand-leek-cl.o: sand-leek-cl.c
diff --git a/cl/onion.cl b/cl/onion.cl
index 035749c..2972617 100644
--- a/cl/onion.cl
+++ b/cl/onion.cl
@@ -10,10 +10,22 @@ struct sha_data {
unsigned int e;
unsigned long len;
unsigned long data_len;
- char data[SHA_CHUNK_LEN];
+ unsigned char data[SHA_CHUNK_LEN];
};
-void sha_chunk(char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {
+void memcpy(void *restrict dest, void *restrict src, int len) {
+ unsigned char *dest_ = (unsigned char*)dest;
+ unsigned char *src_ = (unsigned char*)src;
+ int i = 0;
+ for (i = 0; i < len; i++) {
+ dest_[i] = src_[i];
+ }
+// while (len-- >= 0) {
+// dest_[len] = src_[len];
+// }
+}
+
+void sha_chunk(unsigned char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {
unsigned int w[80] = {0};
unsigned int new_a = 0;
unsigned int a = sha->a;
@@ -24,25 +36,27 @@ void sha_chunk(char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {
unsigned int i = 0;
unsigned int bo = 0;
- unsigned int k[] = {
+ const unsigned int k[] = {
0x5A827999,
0x6ED9EBA1,
0x8F1BBCDC,
0xCA62C1D6
};
+ #pragma unroll
for (i = 0; i < 80; i++, bo+=4) {
- w[i] = ((unsigned int)(*buf)[bo]) << 24;
- w[i] |= (*buf)[bo+1] << 16;
- w[i] |= (*buf)[bo+2] << 8;
- w[i] |= (*buf)[bo+3];
+ w[i] = ((*buf)[bo]) << 24;
+ w[i] |= ((*buf)[bo+1]) << 16;
+ w[i] |= ((*buf)[bo+2]) << 8;
+ w[i] |= ((*buf)[bo+3]);
}
- /* FIXME unroll these operations? */
+// #pragma unroll
for (i = 16; i < 80; i++) {
w[i] = ROL((w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]), 1);
}
+// #pragma unroll
for (i = 0; i < 20; i++) {
new_a = ROL(a, 5) + ((b&c)|((~b)&d)) + e + w[i] + k[0];
e = d;
@@ -52,6 +66,7 @@ void sha_chunk(char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {
a = new_a;
}
+// #pragma unroll
for (i = 20; i < 40; i++) {
new_a = ROL(a, 5) + (b^c^d) + e + w[i] + k[1];
e = d;
@@ -61,6 +76,7 @@ void sha_chunk(char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {
a = new_a;
}
+// #pragma unroll
for (i = 40; i < 60; i++) {
new_a = ROL(a, 5) + ((b&c)|(b&d)|(c&d)) + e + w[i] + k[2];
e = d;
@@ -69,7 +85,7 @@ void sha_chunk(char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {
b = a;
a = new_a;
}
-
+// #pragma unroll
for (i = 60; i < 80; i++) {
new_a = ROL(a, 5) + (b^c^d) + e + w[i] + k[3];
e = d;
@@ -88,13 +104,13 @@ void sha_chunk(char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {
void sha_update(struct sha_data *c, void *data, unsigned int size) {
unsigned int i = 0;
size_t remaining = size;
- char *bdata = (char*)data;
+ unsigned char *bdata = (unsigned char*)data;
size_t count = MIN(size, SHA_CHUNK_LEN - c->data_len);
for (i = 0; i < count; i++)
c->data[c->data_len+i] = ((char*)data)[i];
-// memcpy(&(c->data[c->data_len]), data, count);
+ //memcpy(&(c->data[c->data_len]), data, count);
c->data_len += count;
remaining -= count;
@@ -102,7 +118,7 @@ void sha_update(struct sha_data *c, void *data, unsigned int size) {
while (c->data_len == SHA_CHUNK_LEN) {
sha_chunk(&(c->data), c);
count = MIN(remaining, SHA_CHUNK_LEN);
- //memcpy(c->data, &bdata[size-remaining], count);
+ memcpy(c->data, &bdata[size-remaining], count);
remaining -= count;
c->data_len = count;
}
@@ -142,7 +158,6 @@ void sha_final(unsigned char *digest, struct sha_data *c) {
sha_chunk(&(c->data), c);
-
/* FIXME loop or leave unrolled? */
digest[ 0] = c->a >> 24;
digest[ 1] = c->a >> 16;
@@ -157,6 +172,9 @@ void sha_final(unsigned char *digest, struct sha_data *c) {
digest[ 8] = c->c >> 24;
digest[ 9] = c->c >> 16;
+
+
+// second half of hash not needed eh?
digest[10] = c->c >> 8;
digest[11] = c->c;
@@ -172,36 +190,118 @@ void sha_final(unsigned char *digest, struct sha_data *c) {
}
__kernel void fractal_gen(
- __global unsigned char *results,
+ __global unsigned int *results,
__constant struct sha_data *partial,
__constant unsigned char *search,
- const unsigned int raw_length)
+ const unsigned int raw_length,
+ const unsigned int bitmask)
{
unsigned int tx = get_global_id(0);
unsigned int ty = get_global_id(1);
- unsigned int i;
+ unsigned int i,j;
struct sha_data ctx;
- ctx.a = partial->a;
- ctx.b = partial->b;
- ctx.c = partial->c;
- ctx.d = partial->d;
- ctx.e = partial->e;
- ctx.len = partial->len;
- ctx.data_len = partial->data_len;
- for (i = 0; i < SHA_CHUNK_LEN; i++) {
- ctx.data[i] = partial->data[i];
- }
/* FIXME dummy e (big-endian) */
- char e[4] = {0x1F, 0xFF, 0xFF, 0xFF};
- char digest[20];
- for (i = 0; i < 65536; i++) {
- sha_update(&ctx, &e, 4);
+ unsigned char e[4] = {0x1F, 0xFF, 0xFF, 0xFF};
+ unsigned char digest[20];
+ /* first half of e is our worker number rest is determined later */
+ e[0] = ((tx >> 8) & 0xFF);
+
+ /* if MSB is 0, then it doesn't need to be stored in the key, so violates
+ * law of sizeof(e) == 4, messing everything up */
+ if (e[0] == 0) {
+ return;
+ }
+
+ e[1] = tx & 0xFF;
+ results[tx] = 0;
+
+ for (i = 3; i < 65536; i+=2) {
+ e[2] = (i >> 8) & 0xFF;
+ e[3] = i & 0xFF;
+
+ #pragma unroll
+ for (j = 0; j < SHA_CHUNK_LEN; j++) {
+ ctx.data[j] = partial->data[j];
+ }
+ ctx.a = partial->a;
+ ctx.b = partial->b;
+ ctx.c = partial->c;
+ ctx.d = partial->d;
+ ctx.e = partial->e;
+ ctx.len = partial->len;
+ ctx.data_len = partial->data_len;
+
+ sha_update(&ctx, &e, 4);
sha_final(&digest, &ctx);
+
+ int all_clear = 1;
+ for (j = 0; j < raw_length; j++) {
+ if (search[j] != digest[j]) {
+ all_clear = 0;
+ }
+ }
+ if (all_clear == 1 && (digest[j] & bitmask) == (search[j] & bitmask)) {
+ results[tx] = i;
+ }
}
-// buffer[(size*y)+x] = (i*255)/iterations;
return;
}
+
+
+void unused() {
+/*
+#define R2(w, a, b, c, d, e, i) a = ROL(a, 5) + (b^c^d) + e + w[i] + 0x6ED9EBA1; b = ROL(b, 30);
+
+ R2(w, a, b, c, d, e, 20);
+ R2(w, e, a, b, c, d, 21);
+ R2(w, d, e, a, b, c, 22);
+ R2(w, c, d, e, a, b, 23);
+ R2(w, b, c, d, e, a, 24);
+ R2(w, a, b, c, d, e, 25);
+ R2(w, e, a, b, c, d, 26);
+ R2(w, d, e, a, b, c, 27);
+ R2(w, c, d, e, a, b, 28);
+ R2(w, b, c, d, e, a, 29);
+ R2(w, a, b, c, d, e, 30);
+ R2(w, e, a, b, c, d, 31);
+ R2(w, d, e, a, b, c, 32);
+ R2(w, c, d, e, a, b, 33);
+ R2(w, b, c, d, e, a, 34);
+ R2(w, a, b, c, d, e, 35);
+ R2(w, e, a, b, c, d, 36);
+ R2(w, d, e, a, b, c, 37);
+ R2(w, c, d, e, a, b, 38);
+ R2(w, b, c, d, e, a, 39);*/
+
+/*
+#define R3(w, a, b, c, d, e, i) a = ROL(a, 5) + ((b&c)|(b&d)|(c&d)) + e + w[i] + 0x8F1BBCDC; b = ROL(b, 30);
+
+ R3(w, a, b, c, d, e, 40);
+ R3(w, e, a, b, c, d, 41);
+ R3(w, d, e, a, b, c, 42);
+ R3(w, c, d, e, a, b, 43);
+ R3(w, b, c, d, e, a, 44);
+ R3(w, a, b, c, d, e, 45);
+ R3(w, e, a, b, c, d, 46);
+ R3(w, d, e, a, b, c, 47);
+ R3(w, c, d, e, a, b, 48);
+ R3(w, b, c, d, e, a, 49);
+ R3(w, a, b, c, d, e, 50);
+ R3(w, e, a, b, c, d, 51);
+ R3(w, d, e, a, b, c, 52);
+ R3(w, c, d, e, a, b, 53);
+ R3(w, b, c, d, e, a, 54);
+ R3(w, a, b, c, d, e, 55);
+ R3(w, e, a, b, c, d, 56);
+ R3(w, d, e, a, b, c, 57);
+ R3(w, c, d, e, a, b, 58);
+ R3(w, b, c, d, e, a, 59);
+*/
+
+
+
+}
diff --git a/sand-leek-cl.c b/sand-leek-cl.c
index a21829d..67157a0 100644
--- a/sand-leek-cl.c
+++ b/sand-leek-cl.c
@@ -2,19 +2,78 @@
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
+#include <cl.h>
+#include <string.h>
-//#include <openssl/sha.h>
+#include <openssl/pem.h>
#include <openssl/rsa.h>
+#include <openssl/err.h>
+#include "key_update.h"
#include "onion_base32.h"
#include "trampoline.h"
//#include "sha1.h"
+/* hangover code from sand-leek.c */
+/* bitmasks to be used to compare remainder bits */
+unsigned char bitmasks[] = {
+ [0] = 0x00,
+ [1] = 0xF8, /* 5 MSB */
+ [2] = 0xC0, /* 2 MSB */
+ [3] = 0xFE, /* 7 MSB */
+ [4] = 0xF0, /* 4 MSB */
+ [5] = 0x80, /* 1 MSB */
+ [6] = 0xFC, /* 6 MSB */
+ [7] = 0xE0 /* 3 MSB */
+};
+
+int truffle_valid(unsigned char *search_raw, int raw_len, char bitmask, struct sha_data sha, unsigned char e[4]) {
+ unsigned char digest[20] = {};
+ sha_update(&sha, e, 4);
+ sha_final(&digest, &sha);
+ fprintf(stderr, "Need %x%x%x%x%x%x%x%x%x%x (%d)\n",
+ search_raw[0],
+ search_raw[1],
+ search_raw[2],
+ search_raw[3],
+ search_raw[4],
+ search_raw[5],
+ search_raw[6],
+ search_raw[7],
+ search_raw[8],
+ search_raw[9],
+ raw_len
+ );
+ fprintf(stderr, "GPU got %x%x%x%x%x%x%x%x%x%x\n",
+ digest[0],
+ digest[1],
+ digest[2],
+ digest[3],
+ digest[4],
+ digest[5],
+ digest[6],
+ digest[7],
+ digest[8],
+ digest[9]
+ );
+ return memcmp(digest, search_raw, raw_len) == 0 &&
+ (search_raw[raw_len] & bitmask) == (digest[raw_len] & bitmask);
+}
+
+double tv_delta(struct timespec *start, struct timespec *end) {
+ double s_delta = end->tv_sec - start->tv_sec;
+ long ns_delta = end->tv_nsec - start->tv_nsec;
+ return s_delta + (double)ns_delta/1e9;
+}
/* FIXME make loop internal to run(), rather than rebuilding kernel etc
* each new key */
-int run(const char *preferred_platform, unsigned char *search_raw, size_t raw_len, struct sha_data *sha)
+unsigned long run(const char *preferred_platform, unsigned char *search_raw, size_t raw_len, size_t search_len, struct sha_data *sha)
{
+ struct timespec tv_start = {};
+ struct timespec tv_end = {};
+ int bitmask = bitmasks[search_len % 8];
+
fprintf(stderr, "Building CL trampoline... ");
if (tramp_init(preferred_platform)) {
fprintf(stderr, "Failed.\n");
@@ -37,7 +96,7 @@ int run(const char *preferred_platform, unsigned char *search_raw, size_t raw_le
fprintf(stderr, "Compiled.\n");
fprintf(stderr, "Setting kernel arguments... ");
- if (tramp_set_kernel_args(raw_len)) {
+ if (tramp_set_kernel_args(raw_len, bitmask)) {
fprintf(stderr, "Failed.\n");
return 1;
}
@@ -58,29 +117,66 @@ int run(const char *preferred_platform, unsigned char *search_raw, size_t raw_le
fprintf(stderr, "Done.\n");
fprintf(stderr, "Running kernel... ");
+ clock_gettime(CLOCK_MONOTONIC, &tv_start);
+/* FIXME magic */
+/* 65536 kernels doing 32767 each, except if it's 00xxxxxx */
+#define HASH_PER_RUN ((65536UL*32767UL) - (1<<24))
if (tramp_run_kernel()) {
fprintf(stderr, "Failed.\n");
return 1;
}
- fprintf(stderr, "Done.\n");
+ clock_gettime(CLOCK_MONOTONIC, &tv_end);
+ /*FIXME*/double clock_delta = tv_delta(&tv_start, &tv_end);
+ fprintf(stderr, "Done in %.2f seconds (%.3f MH/s).\n", clock_delta, (HASH_PER_RUN/clock_delta/1e6));
-/* char *buffer = malloc(size*size);
+ /* FIXME */cl_int *buffer = malloc(4*65536);
if (!buffer) {
perror("host data buffer malloc");
return 1;
}
fprintf(stderr, "Reading data from device... ");
- if (tramp_copy_data((void*)&buffer, size*size)) {
+ if (tramp_copy_data((void*)&buffer, 4*65536)) {
fprintf(stderr, "Failed.\n");
return 1;
}
fprintf(stderr, "Done.\n");
-*/
+
+ fprintf(stderr, "Analysing batch results. Successful nonces: \n");
+
+ /* FIXME */ int i = 0;
+ /* FIXME */ int count = 0;
+ for (i = 0; i < 65536; i++) {
+ if (buffer[i] != 0) {
+ count++;
+ fprintf(stderr, "%d \n", buffer[i]);
+ /* FIXME */unsigned char e[4] = {};
+ /* FIXME */unsigned int smalls = (unsigned int)buffer[i];
+ /* FIXME */unsigned int biggies = (unsigned int)i;
+ e[0] = (biggies >> 8) & 0xFF;
+ e[1] = biggies & 0xFF;
+ e[2] = (smalls >> 8) & 0xFF;
+ e[3] = smalls & 0xFF;
+ if (truffle_valid(search_raw, raw_len, bitmask, *sha, e)) {
+ fprintf(stderr, "«%x %x %x %x»\n", e[0], e[1], e[2], e[3]);
+ /* FIXME */unsigned long eLE = e[0] << 24 | e[1] << 16 | e[2] << 8 | e[3];
+ fprintf(stderr, "Got eem: %xul!\n", eLE);
+ return eLE;
+ } else {
+ fprintf(stderr, "GPU doesn't agree with CPU: bug or hardware fault?\n");
+ }
+ break;
+ }
+ }
+ if (count == 0) {
+ fprintf(stderr, "None. ");
+ }
+ fprintf(stderr, "Done.\n");
+
fprintf(stderr, "Destroying CL trampoline... ");
tramp_destroy();
fprintf(stderr, "Blown to smitherines.\n");
-/* free(buffer);*/
+ free(buffer);
return 0;
}
@@ -123,24 +219,11 @@ int main(int argc, char **argv)
/* decode desired base32 */
onion_base32_dec(search_raw, search_pad);
- /* hangover code from sand-leek.c */
- /* bitmasks to be used to compare remainder bits */
- unsigned char bitmasks[] = {
- [1] = 0xF8, /* 5 MSB */
- [2] = 0xC0, /* 2 MSB */
- [3] = 0xFE, /* 7 MSB */
- [4] = 0xF0, /* 4 MSB */
- [5] = 0x80, /* 1 MSB */
- [6] = 0xFC, /* 6 MSB */
- [7] = 0xE0 /* 3 MSB */
- };
-
/* number of whole bytes of raw hash to compare:
* 10 is the size of the data a full onion address covers
* 16 is the size of the base32-encoded onion address */
size_t search_len = strlen(search);
int raw_len = (search_len*10)/16;
- int bitmask = bitmasks[search_len % 8];
/* end hangover code from sand-leek.c */
RSA* rsa_key = NULL;
@@ -161,7 +244,6 @@ int main(int argc, char **argv)
struct sha_data sha_c;
BIGNUM *bignum_e = NULL;
-
bignum_e = BN_new();
if (!bignum_e) {
fprintf(stderr, "Failed to allocate bignum for exponent\n");
@@ -170,31 +252,70 @@ int main(int argc, char **argv)
e = EXPONENT_MIN;
BN_set_word(bignum_e, e);
- if (!RSA_generate_key_ex(rsa_key, RSA_KEY_BITS, bignum_e, NULL)) {
- fprintf(stderr, "Failed to generate RSA key\n");
- return 1;
- }
- der_length = i2d_RSAPublicKey(rsa_key, NULL);
- if (der_length <= 0) {
- fprintf(stderr, "i2d failed\n");
+
+ do {
+ if (!RSA_generate_key_ex(rsa_key, RSA_KEY_BITS, bignum_e, NULL)) {
+ fprintf(stderr, "Failed to generate RSA key\n");
+ return 1;
+ }
+ der_length = i2d_RSAPublicKey(rsa_key, NULL);
+ if (der_length <= 0) {
+ fprintf(stderr, "i2d failed\n");
+ return 1;
+ }
+ der_data = malloc(der_length);
+ if (!der_data) {
+ fprintf(stderr, "DER data malloc failed\n");
+ return 1;
+ }
+ tmp_data = der_data;
+ if (i2d_RSAPublicKey(rsa_key, &tmp_data) != der_length) {
+ fprintf(stderr, "DER formatting failed\n");
+ return 1;
+ }
+
+ sha_init(&sha_c);
+ sha_update(&sha_c, der_data, der_length - EXPONENT_SIZE_BYTES);
+ free(der_data);
+
+ e = run(preferred_platform, search_raw, raw_len, search_len, &sha_c);
+ } while (e == 0);
+
+ BN_set_word(bignum_e, e);
+fprintf(stderr, "exponent is %lx\n", e);
+
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+ if (BN_set_word(bignum_e, e) != 1) {
+ fprintf(stderr, "BN_set_word failed\n");
return 1;
}
- der_data = malloc(der_length);
- if (!der_data) {
- fprintf(stderr, "DER data malloc failed\n");
+ RSA_set0_key(rsa_key, NULL, bignum_e, NULL);
+ /* allocate what was freed by above function call */
+ bignum_e = BN_new();
+#else
+ /* much tidier to be honest */
+ BN_set_word(rsa_key->e, e);
+#endif
+ if (key_update_d(rsa_key)) {
+ printf("Error updating d component of RSA key, stop.\n");
return 1;
}
- tmp_data = der_data;
- if (i2d_RSAPublicKey(rsa_key, &tmp_data) != der_length) {
- fprintf(stderr, "DER formatting failed\n");
+
+ if (RSA_check_key(rsa_key) == 1) {
+ fprintf(stderr, "Key valid\n");
+ EVP_PKEY *evp_key = EVP_PKEY_new();
+ if (!EVP_PKEY_assign_RSA(evp_key, rsa_key)) {
+ fprintf(stderr, "EVP_PKEY assignment failed\n");
+ return 1;
+ }
+ PEM_write_PrivateKey(stdout, evp_key, NULL, NULL, 0, NULL, NULL);
+ EVP_PKEY_free(evp_key);
return 1;
+ } else {
+ fprintf(stderr, "Key invalid:");
+ ERR_print_errors_fp(stderr);
}
- sha_init(&sha_c);
- sha_update(&sha_c, der_data, der_length - EXPONENT_SIZE_BYTES);
- free(der_data);
-
- run(preferred_platform, search_raw, raw_len, &sha_c);
return 0;
}
diff --git a/trampoline.c b/trampoline.c
index bfbc508..bebb160 100644
--- a/trampoline.c
+++ b/trampoline.c
@@ -193,9 +193,15 @@ void tramp_destroy()
{
clReleaseKernel(kernel);
clReleaseProgram(program);
+ clFlush(command_queue);
+ clFinish(command_queue);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
+ clReleaseMemObject(device_result);
+ clReleaseMemObject(device_sha);
+ clReleaseMemObject(device_search);
+
if (devices) {
free(devices);
devices = NULL;
@@ -322,11 +328,11 @@ int tramp_compile_kernel()
*
* FIXME investigate using something more flexible?
*/
-int tramp_set_kernel_args(unsigned int raw_len)
+int tramp_set_kernel_args(unsigned int raw_len, unsigned int bitmask)
{
cl_int ret = 0;
- device_result = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 65536, NULL, &ret);
+ device_result = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 65536*4, NULL, &ret);
if (ret != CL_SUCCESS) {
fprintf(stderr, "Failed to create buffer for slave device: %s ", get_cl_error_string(ret));
return 1;
@@ -368,6 +374,12 @@ int tramp_set_kernel_args(unsigned int raw_len)
return 1;
}
+ ret = clSetKernelArg(kernel, 4, sizeof(cl_int), &bitmask);
+ if (ret != CL_SUCCESS) {
+ fprintf(stderr, "Error on bitmask argument: %s ", get_cl_error_string(ret));
+ return 1;
+ }
+
return 0;
}
@@ -383,7 +395,7 @@ int tramp_run_kernel()
cl_event event;
cl_int ret = 0;
size_t workgroup_sizes[2];
- workgroup_sizes[0] = 65536;
+ workgroup_sizes[0] = 63356;
workgroup_sizes[1] = 1;
ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, workgroup_sizes, NULL, 0, NULL, &event);
diff --git a/trampoline.h b/trampoline.h
index 67cbd33..5f4a76e 100644
--- a/trampoline.h
+++ b/trampoline.h
@@ -5,7 +5,7 @@ void tramp_destroy(void);
int tramp_load_kernel(const char *filename);
char *tramp_get_build_log(void);
int tramp_compile_kernel(void);
-int tramp_set_kernel_args(unsigned int raw_len);
+int tramp_set_kernel_args(unsigned int raw_len, unsigned int bitmask);
int tramp_run_kernel(void);
int tramp_copy_data(void **buffer, size_t size);
int tramp_copy_sha(struct sha_data *sha);