From: Iceman Date: Sat, 6 Jan 2018 17:54:27 +0000 (+0100) Subject: Merge pull request #538 from merlokk/hardnestedsetsimd X-Git-Tag: v3.1.0~92 X-Git-Url: http://git.zerfleddert.de/cgi-bin/gitweb.cgi/proxmark3-svn/commitdiff_plain/932af56ea0f7e931aa39020c4af46ab81f4ca845?hp=91ca74f0ed2a6db99df87e7b318660f45b6858f9 Merge pull request #538 from merlokk/hardnestedsetsimd hardnested SIMD select --- diff --git a/client/cmdhfmf.c b/client/cmdhfmf.c index c471fdf7..4956df8c 100644 --- a/client/cmdhfmf.c +++ b/client/cmdhfmf.c @@ -25,6 +25,7 @@ #include "mifarehost.h" #include "mifare.h" #include "mfkey.h" +#include "hardnested/hardnested_bf_core.h" #define NESTED_SECTOR_RETRY 10 // how often we try mfested() until we give up @@ -862,6 +863,13 @@ int CmdHF14AMfNestedHard(const char *Cmd) PrintAndLog(" w: Acquire nonces and write them to binary file nonces.bin"); PrintAndLog(" s: Slower acquisition (required by some non standard cards)"); PrintAndLog(" r: Read nonces.bin and start attack"); + PrintAndLog(" iX: set type of SIMD instructions. Without this flag programs autodetect it."); + PrintAndLog(" i5: AVX512"); + PrintAndLog(" i2: AVX2"); + PrintAndLog(" ia: AVX"); + PrintAndLog(" is: SSE2"); + PrintAndLog(" im: MMX"); + PrintAndLog(" in: none (use CPU regular instruction set)"); PrintAndLog(" "); PrintAndLog(" sample1: hf mf hardnested 0 A FFFFFFFFFFFF 4 A"); PrintAndLog(" sample2: hf mf hardnested 0 A FFFFFFFFFFFF 4 A w"); @@ -880,15 +888,20 @@ int CmdHF14AMfNestedHard(const char *Cmd) int tests = 0; + uint16_t iindx = 0; if (ctmp == 'R' || ctmp == 'r') { nonce_file_read = true; + iindx = 1; if (!param_gethex(Cmd, 1, trgkey, 12)) { know_target_key = true; + iindx = 2; } } else if (ctmp == 'T' || ctmp == 't') { tests = param_get32ex(Cmd, 1, 100, 10); + iindx = 2; if (!param_gethex(Cmd, 2, trgkey, 12)) { know_target_key = true; + iindx = 3; } } else { blockNo = param_get8(Cmd, 0); @@ -922,19 +935,54 @@ int CmdHF14AMfNestedHard(const char *Cmd) know_target_key = true; i++; } + iindx = i; while ((ctmp = param_getchar(Cmd, i))) { if (ctmp == 's' || ctmp == 'S') { slow = true; } else if (ctmp == 'w' || ctmp == 'W') { nonce_file_write = true; + } else if (param_getlength(Cmd, i) == 2 && ctmp == 'i') { + iindx = i; } else { - PrintAndLog("Possible options are w and/or s"); + PrintAndLog("Possible options are w , s and/or iX"); return 1; } i++; } } + + SetSIMDInstr(SIMD_AUTO); + if (iindx > 0) { + while ((ctmp = param_getchar(Cmd, iindx))) { + if (param_getlength(Cmd, iindx) == 2 && ctmp == 'i') { + switch(param_getchar_indx(Cmd, 1, iindx)) { + case '5': + SetSIMDInstr(SIMD_AVX512); + break; + case '2': + SetSIMDInstr(SIMD_AVX2); + break; + case 'a': + SetSIMDInstr(SIMD_AVX); + break; + case 's': + SetSIMDInstr(SIMD_SSE2); + break; + case 'm': + SetSIMDInstr(SIMD_MMX); + break; + case 'n': + SetSIMDInstr(SIMD_NONE); + break; + default: + PrintAndLog("Unknown SIMD type. %c", param_getchar_indx(Cmd, 1, iindx)); + return 1; + } + } + iindx++; + } + } PrintAndLog("--target block no:%3d, target key type:%c, known target key: 0x%02x%02x%02x%02x%02x%02x%s, file action: %s, Slow: %s, Tests: %d ", trgBlockNo, diff --git a/client/cmdhfmfhard.c b/client/cmdhfmfhard.c index 0153541e..96c3a989 100644 --- a/client/cmdhfmfhard.c +++ b/client/cmdhfmfhard.c @@ -32,6 +32,7 @@ #include "crapto1/crapto1.h" #include "parity.h" #include "hardnested/hardnested_bruteforce.h" +#include "hardnested/hardnested_bf_core.h" #include "hardnested/hardnested_bitarray_core.h" #include "zlib.h" @@ -71,27 +72,32 @@ static float brute_force_per_second; static void get_SIMD_instruction_set(char* instruction_set) { -#if defined (__i386__) || defined (__x86_64__) - #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) - #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) strcpy(instruction_set, "AVX512F"); - else if (__builtin_cpu_supports("avx2")) strcpy(instruction_set, "AVX2"); - #else - if (__builtin_cpu_supports("avx2")) strcpy(instruction_set, "AVX2"); - #endif - else if (__builtin_cpu_supports("avx")) strcpy(instruction_set, "AVX"); - else if (__builtin_cpu_supports("sse2")) strcpy(instruction_set, "SSE2"); - else if (__builtin_cpu_supports("mmx")) strcpy(instruction_set, "MMX"); - else - #endif -#endif - strcpy(instruction_set, "no"); + switch(GetSIMDInstrAuto()) { + case SIMD_AVX512: + strcpy(instruction_set, "AVX512F"); + break; + case SIMD_AVX2: + strcpy(instruction_set, "AVX2"); + break; + case SIMD_AVX: + strcpy(instruction_set, "AVX"); + break; + case SIMD_SSE2: + strcpy(instruction_set, "SSE2"); + break; + case SIMD_MMX: + strcpy(instruction_set, "MMX"); + break; + default: + strcpy(instruction_set, "no"); + break; + } } static void print_progress_header(void) { char progress_text[80]; - char instr_set[12] = ""; + char instr_set[12] = {0}; get_SIMD_instruction_set(instr_set); sprintf(progress_text, "Start using %d threads and %s SIMD core", num_CPUs(), instr_set); PrintAndLog("\n\n"); @@ -2528,6 +2534,10 @@ static void set_test_state(uint8_t byte) int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests) { char progress_text[80]; + + char instr_set[12] = {0}; + get_SIMD_instruction_set(instr_set); + PrintAndLog("Using %s SIMD core.", instr_set); srand((unsigned) time(NULL)); brute_force_per_second = brute_force_benchmark(); diff --git a/client/hardnested/hardnested_bf_core.c b/client/hardnested/hardnested_bf_core.c index 3c0c044f..78384bbc 100644 --- a/client/hardnested/hardnested_bf_core.c +++ b/client/hardnested/hardnested_bf_core.c @@ -548,44 +548,93 @@ out: crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch; -// determine the available instruction set at runtime and call the correct function -const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { +static SIMDExecInstr intSIMDInstr = SIMD_AUTO; + +void SetSIMDInstr(SIMDExecInstr instr) { + intSIMDInstr = instr; + + crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; + bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch; +} + +SIMDExecInstr GetSIMDInstr() { + SIMDExecInstr instr = SIMD_NONE; + #if defined (__i386__) || defined (__x86_64__) #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; - else if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512; + else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2; #else - if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2; #endif - else if (__builtin_cpu_supports("avx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX; - else if (__builtin_cpu_supports("sse2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2; - else if (__builtin_cpu_supports("mmx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; + else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX; + else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2; + else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX; else #endif #endif - crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD; + instr = SIMD_NONE; + + return instr; +} + +SIMDExecInstr GetSIMDInstrAuto() { + SIMDExecInstr instr = intSIMDInstr; + if (instr == SIMD_AUTO) + return GetSIMDInstr(); + + return instr; +} + +// determine the available instruction set at runtime and call the correct function +const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { + switch(GetSIMDInstrAuto()) { + case SIMD_AVX512: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; + break; + case SIMD_AVX2: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + break; + case SIMD_AVX: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX; + break; + case SIMD_SSE2: + crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2; + break; + case SIMD_MMX: + crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; + break; + default: + crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD; + break; + } // call the most optimized function for this CPU return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces); } void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { -#if defined (__i386__) || defined (__x86_64__) - #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) - #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; - else if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; - #else - if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; - #endif - else if (__builtin_cpu_supports("avx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX; - else if (__builtin_cpu_supports("sse2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2; - else if (__builtin_cpu_supports("mmx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; - else - #endif -#endif - bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD; + switch(GetSIMDInstrAuto()) { + case SIMD_AVX512: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; + break; + case SIMD_AVX2: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; + break; + case SIMD_AVX: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX; + break; + case SIMD_SSE2: + bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2; + break; + case SIMD_MMX: + bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; + break; + default: + bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD; + break; + } // call the most optimized function for this CPU (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par); diff --git a/client/hardnested/hardnested_bf_core.h b/client/hardnested/hardnested_bf_core.h index 7a445993..b3df0547 100644 --- a/client/hardnested/hardnested_bf_core.h +++ b/client/hardnested/hardnested_bf_core.h @@ -52,6 +52,18 @@ THE SOFTWARE. #include "hardnested_bruteforce.h" // statelist_t +typedef enum { + SIMD_AUTO, + SIMD_AVX512, + SIMD_AVX2, + SIMD_AVX, + SIMD_SSE2, + SIMD_MMX, + SIMD_NONE, +} SIMDExecInstr; +extern void SetSIMDInstr(SIMDExecInstr instr); +extern SIMDExecInstr GetSIMDInstrAuto(); + extern const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonces_2nd_byte, noncelist_t *nonces); extern void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonces, uint8_t *bf_test_nonce_par); diff --git a/client/hardnested/hardnested_bruteforce.c b/client/hardnested/hardnested_bruteforce.c index 718b7c5d..3072fc52 100644 --- a/client/hardnested/hardnested_bruteforce.c +++ b/client/hardnested/hardnested_bruteforce.c @@ -140,7 +140,7 @@ bool verify_key(uint32_t cuid, noncelist_t *nonces, uint8_t *best_first_bytes, u } -static void* crack_states_thread(void* x){ +static void* __attribute__((force_align_arg_pointer)) crack_states_thread(void* x){ struct arg { bool silent;