diff options
| author | David Phillips <david@sighup.nz> | 2018-04-23 20:06:16 +1200 | 
|---|---|---|
| committer | David Phillips <david@sighup.nz> | 2018-04-24 17:11:30 +1200 | 
| commit | 43fc2e02818737e1979c4f21d726a356784d16f2 (patch) | |
| tree | c15a6ae2eabfc9ea6b5627742b1ffe866a32285b /cl | |
| parent | 27e9f33ed36c10b1a4e34c0ebc86d87ab2cfb5ff (diff) | |
| download | sand-leek-43fc2e02818737e1979c4f21d726a356784d16f2.tar.xz | |
Misc performance improvements in CL kernel
Diffstat (limited to 'cl')
| -rw-r--r-- | cl/onion.cl | 179 | 
1 files changed, 23 insertions, 156 deletions
| diff --git a/cl/onion.cl b/cl/onion.cl index 2972617..4200859 100644 --- a/cl/onion.cl +++ b/cl/onion.cl @@ -1,6 +1,6 @@ +#define INITIAL_DATA_LEN 9  #define SHA_CHUNK_LEN 64  #define ROL(x, shamt) ((x << shamt) | (x >> (sizeof(x)*8 - shamt))) -#define MIN(a, b) ((a) < (b) ? (a) : (b))  struct sha_data {  	unsigned int a; @@ -13,18 +13,6 @@ struct sha_data {  	unsigned char data[SHA_CHUNK_LEN];  }; -void memcpy(void *restrict dest, void *restrict src, int len) { -	unsigned char *dest_ = (unsigned char*)dest; -	unsigned char *src_ = (unsigned char*)src; -	int i = 0; -	for (i = 0; i < len; i++) { -		dest_[i] = src_[i]; -	} -//	while (len-- >= 0) { -//		dest_[len] = src_[len]; -//	} -} -  void sha_chunk(unsigned char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {  	unsigned int w[80] = {0};  	unsigned int new_a = 0; @@ -101,70 +89,20 @@ void sha_chunk(unsigned char (*buf)[SHA_CHUNK_LEN], struct sha_data *sha) {  	sha->e += e;  } -void sha_update(struct sha_data *c, void *data, unsigned int size) { -	unsigned int i = 0; -	size_t remaining = size; -	unsigned char *bdata = (unsigned char*)data; - - -	size_t count = MIN(size, SHA_CHUNK_LEN - c->data_len); -	for (i = 0; i < count; i++) -		c->data[c->data_len+i] = ((char*)data)[i]; -	//memcpy(&(c->data[c->data_len]), data, count); -	c->data_len += count; -	remaining -= count; - - -	while (c->data_len == SHA_CHUNK_LEN) { -		sha_chunk(&(c->data), c); -		count = MIN(remaining, SHA_CHUNK_LEN); -		memcpy(c->data, &bdata[size-remaining], count); -		remaining -= count; -		c->data_len = count; -	} - -	/* representative of all data throughput, inclusive of the buffer in -	 * the context */ -	c->len += size; -} -  void sha_final(unsigned char *digest, struct sha_data *c) {  	size_t i = 0; -	c->data[c->data_len++] = 0x80; - -	/* Transform byte len to bit len */ -	c->len *= 8; - -	for (i = c->data_len; i < SHA_CHUNK_LEN; i++) +	#pragma unroll +	for (i = INITIAL_DATA_LEN+5; i < SHA_CHUNK_LEN-8; i++)  		c->data[i] = 0; -	/* still room for the 64-bit message length at the end of this chunk? */ -	if (c->data_len + 8 > SHA_CHUNK_LEN) { -		sha_chunk(&(c->data), c); -		for (i = 0; i < SHA_CHUNK_LEN; i++) -			c->data[i] = 0; -	} - -	/* FIXME loop or leave unrolled? */ -	c->data[56] = c->len >> 56; -	c->data[57] = c->len >> 48; -	c->data[58] = c->len >> 40; -	c->data[59] = c->len >> 32; -	c->data[60] = c->len >> 24; -	c->data[61] = c->len >> 16; -	c->data[62] = c->len >> 8; -	c->data[63] = c->len; -  	sha_chunk(&(c->data), c); -	/* FIXME loop or leave unrolled? */  	digest[ 0] = c->a >> 24;  	digest[ 1] = c->a >> 16;  	digest[ 2] = c->a >> 8;  	digest[ 3] = c->a; -  	digest[ 4] = c->b >> 24;  	digest[ 5] = c->b >> 16;  	digest[ 6] = c->b >> 8; @@ -172,24 +110,9 @@ void sha_final(unsigned char *digest, struct sha_data *c) {  	digest[ 8] = c->c >> 24;  	digest[ 9] = c->c >> 16; - - -// second half of hash not needed eh? -	digest[10] = c->c >> 8; -	digest[11] = c->c; - -	digest[12] = c->d >> 24; -	digest[13] = c->d >> 16; -	digest[14] = c->d >> 8; -	digest[15] = c->d; - -	digest[16] = c->e >> 24; -	digest[17] = c->e >> 16; -	digest[18] = c->e >> 8; -	digest[19] = c->e;  } -__kernel void fractal_gen( +__kernel void key_brute(  	__global unsigned int *results,  	__constant struct sha_data *partial,  	__constant unsigned char *search, @@ -202,41 +125,40 @@ __kernel void fractal_gen(  	struct sha_data ctx; -	/* FIXME dummy e (big-endian) */ -	unsigned char e[4] = {0x1F, 0xFF, 0xFF, 0xFF};  	unsigned char digest[20]; -	/* first half of e is our worker number rest is determined later */ -	e[0] = ((tx >> 8) & 0xFF); +	results[tx] = 0; + +	/* Data area plus (useless) exponent area, and end bit */ +	#pragma unroll +	for (j = 0; j < INITIAL_DATA_LEN+5; j++) { +		ctx.data[j] = partial->data[j]; +	} +	#pragma unroll +	for (j = SHA_CHUNK_LEN - 8; j < SHA_CHUNK_LEN; j++) { +		ctx.data[j] = partial->data[j]; +	} + +	ctx.data[INITIAL_DATA_LEN] = tx >> 8;  	/* if MSB is 0, then it doesn't need to be stored in the key, so violates  	 * law of sizeof(e) == 4, messing everything up */ -	if (e[0] == 0) { +	if (ctx.data[INITIAL_DATA_LEN] == 0) {  		return;  	} - -	e[1] = tx & 0xFF; -	results[tx] = 0; +	ctx.data[INITIAL_DATA_LEN + 1] = tx;  	for (i = 3; i < 65536; i+=2) { -		e[2] = (i >> 8) & 0xFF; -		e[3] = i & 0xFF; - -		#pragma unroll -		for (j = 0; j < SHA_CHUNK_LEN; j++) { -			ctx.data[j] = partial->data[j]; -		}  		ctx.a = partial->a;  		ctx.b = partial->b;  		ctx.c = partial->c;  		ctx.d = partial->d;  		ctx.e = partial->e; -		ctx.len = partial->len; -		ctx.data_len = partial->data_len; - -		sha_update(&ctx, &e, 4); +////////////////////////////////////////////////////////////// +		ctx.data[INITIAL_DATA_LEN + 2] = i >> 8; +		ctx.data[INITIAL_DATA_LEN + 3] = i; +/////////////////////////////////////////////////////////////  		sha_final(&digest, &ctx); -  		int all_clear = 1;  		for (j = 0; j < raw_length; j++) {  			if (search[j] != digest[j]) { @@ -250,58 +172,3 @@ __kernel void fractal_gen(  	return;  } - - -void unused() { -/* -#define R2(w, a, b, c, d, e, i) a = ROL(a, 5) + (b^c^d) + e + w[i] + 0x6ED9EBA1; b = ROL(b, 30); - -	R2(w, a, b, c, d, e, 20); -	R2(w, e, a, b, c, d, 21); -	R2(w, d, e, a, b, c, 22); -	R2(w, c, d, e, a, b, 23); -	R2(w, b, c, d, e, a, 24); -	R2(w, a, b, c, d, e, 25); -	R2(w, e, a, b, c, d, 26); -	R2(w, d, e, a, b, c, 27); -	R2(w, c, d, e, a, b, 28); -	R2(w, b, c, d, e, a, 29); -	R2(w, a, b, c, d, e, 30); -	R2(w, e, a, b, c, d, 31); -	R2(w, d, e, a, b, c, 32); -	R2(w, c, d, e, a, b, 33); -	R2(w, b, c, d, e, a, 34); -	R2(w, a, b, c, d, e, 35); -	R2(w, e, a, b, c, d, 36); -	R2(w, d, e, a, b, c, 37); -	R2(w, c, d, e, a, b, 38); -	R2(w, b, c, d, e, a, 39);*/ - -/* -#define R3(w, a, b, c, d, e, i) a = ROL(a, 5) + ((b&c)|(b&d)|(c&d)) + e + w[i] + 0x8F1BBCDC; b = ROL(b, 30); - -	R3(w, a, b, c, d, e, 40); -	R3(w, e, a, b, c, d, 41); -	R3(w, d, e, a, b, c, 42); -	R3(w, c, d, e, a, b, 43); -	R3(w, b, c, d, e, a, 44); -	R3(w, a, b, c, d, e, 45); -	R3(w, e, a, b, c, d, 46); -	R3(w, d, e, a, b, c, 47); -	R3(w, c, d, e, a, b, 48); -	R3(w, b, c, d, e, a, 49); -	R3(w, a, b, c, d, e, 50); -	R3(w, e, a, b, c, d, 51); -	R3(w, d, e, a, b, c, 52); -	R3(w, c, d, e, a, b, 53); -	R3(w, b, c, d, e, a, 54); -	R3(w, a, b, c, d, e, 55); -	R3(w, e, a, b, c, d, 56); -	R3(w, d, e, a, b, c, 57); -	R3(w, c, d, e, a, b, 58); -	R3(w, b, c, d, e, a, 59); -*/ - - - -} | 
