From 133dd90a0de29b7c24217ca87af57ede4da1247b Mon Sep 17 00:00:00 2001 From: David Phillips Date: Tue, 24 Apr 2018 17:22:10 +1200 Subject: Marginally improve GPU comparison speed This is a lot less readable, but on the GTX1070, this improves overall throughput by 2.6% --- cl/onion.cl | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/cl/onion.cl b/cl/onion.cl index 66075a9..e6835b9 100644 --- a/cl/onion.cl +++ b/cl/onion.cl @@ -120,7 +120,6 @@ __kernel void key_brute( const unsigned int bitmask) { unsigned int tx = get_global_id(0); - unsigned int ty = get_global_id(1); unsigned int i,j; struct sha_data ctx; @@ -159,15 +158,48 @@ __kernel void key_brute( ctx.data[INITIAL_DATA_LEN + 3] = i; ///////////////////////////////////////////////////////////// sha_final(&digest, &ctx); - int all_clear = 1; - for (j = 0; j < raw_length; j++) { - if (search[j] != digest[j]) { - all_clear = 0; - } + int all_clear; + switch (raw_length) { + case 10: /* nop */ break; + case 9: all_clear = (search[9] & bitmask) == (bitmask & (ctx.c >> 24)); break; + case 8: all_clear = (search[8] & bitmask) == (bitmask & (ctx.b )); break; + case 7: all_clear = (search[7] & bitmask) == (bitmask & (ctx.b >> 8)); break; + case 6: all_clear = (search[6] & bitmask) == (bitmask & (ctx.b >> 16)); break; + case 5: /* nop */ break; + case 4: all_clear = (search[4] & bitmask) == (bitmask & (ctx.b >> 24)); break; + case 3: all_clear = (search[3] & bitmask) == (bitmask & (ctx.a )); break; + case 2: all_clear = (search[2] & bitmask) == (bitmask & (ctx.a >> 8)); break; + case 1: all_clear = (search[1] & bitmask) == (bitmask & (ctx.a >> 16)); break; + case 0: all_clear = (search[0] & bitmask) == (bitmask & (ctx.a >> 24)); break; + + } + + switch (raw_length) { + case 10: all_clear &= (search[9] == ctx.c >> 16); /* fallthrough */ + case 9: all_clear &= (search[8] == ctx.c >> 24); /* fallthrough */ + case 8: all_clear &= (search[7] == ctx.b); /* fallthrough */ + case 7: all_clear &= (search[6] == ctx.b >> 8); /* fallthrough */ + case 6: all_clear &= (search[5] == ctx.b >> 16); /* fallthrough */ + case 5: all_clear &= (search[4] == ctx.b >> 24); /* fallthrough */ + case 4: all_clear &= (search[3] == ctx.a); /* fallthrough */ + case 3: all_clear &= (search[2] == ctx.a >> 8); /* fallthrough */ + case 2: all_clear &= (search[1] == ctx.a >> 16); /* fallthrough */ + case 1: all_clear &= (search[0] == ctx.a >> 24); /* fallthrough */ + case 0: /* nop */; } - if (all_clear == 1 && (digest[j] & bitmask) == (search[j] & bitmask)) { + if (all_clear != 0) { results[tx] = i; } + +// int all_clear = 1; +// for (j = 0; j < raw_length; j++) { +// if (search[j] != digest[j]) { +// all_clear = 0; +// } +// } +// if (all_clear == 1 && (digest[j] & bitmask) == (search[j] & bitmask)) { +// results[tx] = i; +// } } return; -- cgit v1.1