From: Iceman Date: Mon, 15 Jan 2018 15:06:37 +0000 (+0100) Subject: Merge pull request #503 from merlokk/travis X-Git-Tag: v3.1.0~86 X-Git-Url: http://git.zerfleddert.de/cgi-bin/gitweb.cgi/proxmark3-svn/commitdiff_plain/138a0e7a6b0d043cd3b1736f194213291d42d30d?hp=84e86ed9e3c435bd386370ea4c18887decfb48c6 Merge pull request #503 from merlokk/travis Travis --- diff --git a/armsrc/appmain.c b/armsrc/appmain.c index e292483b..1c3d9398 100644 --- a/armsrc/appmain.c +++ b/armsrc/appmain.c @@ -135,35 +135,28 @@ void Dbhexdump(int len, uint8_t *d, bool bAsci) { // return that. //----------------------------------------------------------------------------- static int ReadAdc(int ch) -{ - uint32_t d; - - AT91C_BASE_ADC->ADC_CR = AT91C_ADC_SWRST; - AT91C_BASE_ADC->ADC_MR = - ADC_MODE_PRESCALE(63 /* was 32 */) | // ADC_CLK = MCK / ((63+1) * 2) = 48MHz / 128 = 375kHz - ADC_MODE_STARTUP_TIME(1 /* was 16 */) | // Startup Time = (1+1) * 8 / ADC_CLK = 16 / 375kHz = 42,7us Note: must be > 20us - ADC_MODE_SAMPLE_HOLD_TIME(15 /* was 8 */); // Sample & Hold Time SHTIM = 15 / ADC_CLK = 15 / 375kHz = 40us - +{ // Note: ADC_MODE_PRESCALE and ADC_MODE_SAMPLE_HOLD_TIME are set to the maximum allowed value. - // Both AMPL_LO and AMPL_HI are very high impedance (10MOhm) outputs, the input capacitance of the ADC is 12pF (typical). This results in a time constant - // of RC = 10MOhm * 12pF = 120us. Even after the maximum configurable sample&hold time of 40us the input capacitor will not be fully charged. + // AMPL_HI is are high impedance (10MOhm || 1MOhm) output, the input capacitance of the ADC is 12pF (typical). This results in a time constant + // of RC = (0.91MOhm) * 12pF = 10.9us. Even after the maximum configurable sample&hold time of 40us the input capacitor will not be fully charged. // // The maths are: // If there is a voltage v_in at the input, the voltage v_cap at the capacitor (this is what we are measuring) will be // - // v_cap = v_in * (1 - exp(-RC/SHTIM)) = v_in * (1 - exp(-3)) = v_in * 0,95 (i.e. an error of 5%) - // - // Note: with the "historic" values in the comments above, the error was 34% !!! - - AT91C_BASE_ADC->ADC_CHER = ADC_CHANNEL(ch); + // v_cap = v_in * (1 - exp(-SHTIM/RC)) = v_in * (1 - exp(-40us/10.9us)) = v_in * 0,97 (i.e. an error of 3%) - AT91C_BASE_ADC->ADC_CR = AT91C_ADC_START; + AT91C_BASE_ADC->ADC_CR = AT91C_ADC_SWRST; + AT91C_BASE_ADC->ADC_MR = + ADC_MODE_PRESCALE(63) | // ADC_CLK = MCK / ((63+1) * 2) = 48MHz / 128 = 375kHz + ADC_MODE_STARTUP_TIME(1) | // Startup Time = (1+1) * 8 / ADC_CLK = 16 / 375kHz = 42,7us Note: must be > 20us + ADC_MODE_SAMPLE_HOLD_TIME(15); // Sample & Hold Time SHTIM = 15 / ADC_CLK = 15 / 375kHz = 40us - while(!(AT91C_BASE_ADC->ADC_SR & ADC_END_OF_CONVERSION(ch))) - ; - d = AT91C_BASE_ADC->ADC_CDR[ch]; + AT91C_BASE_ADC->ADC_CHER = ADC_CHANNEL(ch); + AT91C_BASE_ADC->ADC_CR = AT91C_ADC_START; - return d; + while(!(AT91C_BASE_ADC->ADC_SR & ADC_END_OF_CONVERSION(ch))) {}; + + return AT91C_BASE_ADC->ADC_CDR[ch]; } int AvgAdc(int ch) // was static - merlok @@ -193,6 +186,8 @@ void MeasureAntennaTuningLfOnly(int *vLf125, int *vLf134, int *peakf, int *peakv FpgaDownloadAndGo(FPGA_BITSTREAM_LF); FpgaWriteConfWord(FPGA_MAJOR_MODE_LF_ADC | FPGA_LF_ADC_READER_FIELD); + SpinDelay(50); + for (i=255; i>=19; i--) { WDT_HIT(); FpgaSendCommand(FPGA_CMD_SET_DIVISOR, i); @@ -201,7 +196,7 @@ void MeasureAntennaTuningLfOnly(int *vLf125, int *vLf134, int *peakf, int *peakv if (i==95) *vLf125 = adcval; // voltage at 125Khz if (i==89) *vLf134 = adcval; // voltage at 134Khz - LF_Results[i] = adcval>>8; // scale int to fit in byte for graphing purposes + LF_Results[i] = adcval >> 9; // scale int to fit in byte for graphing purposes if(LF_Results[i] > peak) { *peakv = adcval; peak = LF_Results[i]; diff --git a/armsrc/iso14443a.c b/armsrc/iso14443a.c index 94ca52f5..7d589f0e 100644 --- a/armsrc/iso14443a.c +++ b/armsrc/iso14443a.c @@ -2482,7 +2482,7 @@ void RAMFUNC SniffMifare(uint8_t param) { for(uint32_t sniffCounter = 0; true; ) { if(BUTTON_PRESS()) { - DbpString("cancelled by button"); + DbpString("Canceled by button."); break; } @@ -2576,7 +2576,7 @@ void RAMFUNC SniffMifare(uint8_t param) { } // main cycle - DbpString("COMMAND FINISHED"); + DbpString("COMMAND FINISHED."); FpgaDisableSscDma(); MfSniffEnd(); diff --git a/armsrc/mifarecmd.c b/armsrc/mifarecmd.c index fcfd7e8f..3854b589 100644 --- a/armsrc/mifarecmd.c +++ b/armsrc/mifarecmd.c @@ -20,6 +20,9 @@ #include "parity.h" #include "crc.h" +#define HARDNESTED_AUTHENTICATION_TIMEOUT 848 // card times out 1ms after wrong authentication (according to NXP documentation) +#define HARDNESTED_PRE_AUTHENTICATION_LEADTIME 400 // some (non standard) cards need a pause after select before they are ready for first authentication + // the block number for the ISO14443-4 PCB static uint8_t pcb_blocknum = 0; // Deselect card by sending a s-block. the crc is precalced for speed @@ -677,7 +680,7 @@ void MifareAcquireEncryptedNonces(uint32_t arg0, uint32_t arg1, uint32_t flags, } if (slow) { - timeout = GetCountSspClk() + PRE_AUTHENTICATION_LEADTIME; + timeout = GetCountSspClk() + HARDNESTED_PRE_AUTHENTICATION_LEADTIME; while(GetCountSspClk() < timeout); } @@ -694,10 +697,12 @@ void MifareAcquireEncryptedNonces(uint32_t arg0, uint32_t arg1, uint32_t flags, continue; } - // send a dummy response in order to trigger the cards authentication failure timeout - uint8_t dummy_answer[8] = {0}; - ReaderTransmit(dummy_answer, 8, NULL); + // send an incomplete dummy response in order to trigger the card's authentication failure timeout + uint8_t dummy_answer[1] = {0}; + ReaderTransmit(dummy_answer, 1, NULL); + timeout = GetCountSspClk() + HARDNESTED_AUTHENTICATION_TIMEOUT; + num_nonces++; if (num_nonces % 2) { memcpy(buf+i, receivedAnswer, 4); @@ -709,6 +714,9 @@ void MifareAcquireEncryptedNonces(uint32_t arg0, uint32_t arg1, uint32_t flags, i += 9; } + // wait for the card to become ready again + while(GetCountSspClk() < timeout); + } LED_C_OFF(); diff --git a/armsrc/mifareutil.h b/armsrc/mifareutil.h index c34dc8f4..b2912895 100644 --- a/armsrc/mifareutil.h +++ b/armsrc/mifareutil.h @@ -24,8 +24,6 @@ #define CRYPT_REQUEST 2 #define AUTH_FIRST 0 #define AUTH_NESTED 2 -#define AUTHENTICATION_TIMEOUT 848 // card times out 1ms after wrong authentication (according to NXP documentation) -#define PRE_AUTHENTICATION_LEADTIME 400 // some (non standard) cards need a pause after select before they are ready for first authentication // mifare 4bit card answers #define CARD_ACK 0x0A // 1010 - ACK diff --git a/client/cmdhf14a.c b/client/cmdhf14a.c index e4b245c3..480923d6 100644 --- a/client/cmdhf14a.c +++ b/client/cmdhf14a.c @@ -35,8 +35,13 @@ static int CmdHelp(const char *Cmd); static int waitCmd(uint8_t iLen); +// structure and database for uid -> tagtype lookups +typedef struct { + uint8_t uid; + char* desc; +} manufactureName; -const manufactureName manufactureMapping[] = { +static const manufactureName manufactureMapping[] = { // ID, "Vendor Country" { 0x01, "Motorola UK" }, { 0x02, "ST Microelectronics SA France" }, @@ -154,7 +159,7 @@ int CmdHF14AReader(const char *Cmd) { break; case 'x': case 'X': - cm = cm - ISO14A_CONNECT; + cm &= ~ISO14A_CONNECT; break; default: PrintAndLog("Unknown command."); @@ -192,7 +197,7 @@ int CmdHF14AReader(const char *Cmd) { PrintAndLog(" UID : %s", sprint_hex(card.uid, card.uidlen)); PrintAndLog("ATQA : %02x %02x", card.atqa[1], card.atqa[0]); - PrintAndLog(" SAK : %02x [%d]", card.sak, resp.arg[0]); + PrintAndLog(" SAK : %02x [%" PRIu64 "]", card.sak, resp.arg[0]); if(card.ats_len >= 3) { // a valid ATS consists of at least the length byte (TL) and 2 CRC bytes PrintAndLog(" ATS : %s", sprint_hex(card.ats, card.ats_len)); } @@ -244,7 +249,7 @@ int CmdHF14AInfo(const char *Cmd) PrintAndLog(" UID : %s", sprint_hex(card.uid, card.uidlen)); PrintAndLog("ATQA : %02x %02x", card.atqa[1], card.atqa[0]); - PrintAndLog(" SAK : %02x [%d]", card.sak, resp.arg[0]); + PrintAndLog(" SAK : %02x [%" PRIu64 "]", card.sak, resp.arg[0]); bool isMifareClassic = true; switch (card.sak) { diff --git a/client/cmdhf14a.h b/client/cmdhf14a.h index 401cead0..71007f95 100644 --- a/client/cmdhf14a.h +++ b/client/cmdhf14a.h @@ -15,12 +15,6 @@ #include #include -// structure and database for uid -> tagtype lookups -typedef struct { - uint8_t uid; - char* desc; -} manufactureName; - int CmdHF14A(const char *Cmd); int CmdHF14AList(const char *Cmd); int CmdHF14AMifare(const char *Cmd); diff --git a/client/cmdhfmf.c b/client/cmdhfmf.c index c471fdf7..b653cf30 100644 --- a/client/cmdhfmf.c +++ b/client/cmdhfmf.c @@ -25,6 +25,7 @@ #include "mifarehost.h" #include "mifare.h" #include "mfkey.h" +#include "hardnested/hardnested_bf_core.h" #define NESTED_SECTOR_RETRY 10 // how often we try mfested() until we give up @@ -862,6 +863,13 @@ int CmdHF14AMfNestedHard(const char *Cmd) PrintAndLog(" w: Acquire nonces and write them to binary file nonces.bin"); PrintAndLog(" s: Slower acquisition (required by some non standard cards)"); PrintAndLog(" r: Read nonces.bin and start attack"); + PrintAndLog(" iX: set type of SIMD instructions. Without this flag programs autodetect it."); + PrintAndLog(" i5: AVX512"); + PrintAndLog(" i2: AVX2"); + PrintAndLog(" ia: AVX"); + PrintAndLog(" is: SSE2"); + PrintAndLog(" im: MMX"); + PrintAndLog(" in: none (use CPU regular instruction set)"); PrintAndLog(" "); PrintAndLog(" sample1: hf mf hardnested 0 A FFFFFFFFFFFF 4 A"); PrintAndLog(" sample2: hf mf hardnested 0 A FFFFFFFFFFFF 4 A w"); @@ -880,15 +888,20 @@ int CmdHF14AMfNestedHard(const char *Cmd) int tests = 0; + uint16_t iindx = 0; if (ctmp == 'R' || ctmp == 'r') { nonce_file_read = true; + iindx = 1; if (!param_gethex(Cmd, 1, trgkey, 12)) { know_target_key = true; + iindx = 2; } } else if (ctmp == 'T' || ctmp == 't') { tests = param_get32ex(Cmd, 1, 100, 10); + iindx = 2; if (!param_gethex(Cmd, 2, trgkey, 12)) { know_target_key = true; + iindx = 3; } } else { blockNo = param_get8(Cmd, 0); @@ -922,19 +935,54 @@ int CmdHF14AMfNestedHard(const char *Cmd) know_target_key = true; i++; } + iindx = i; while ((ctmp = param_getchar(Cmd, i))) { if (ctmp == 's' || ctmp == 'S') { slow = true; } else if (ctmp == 'w' || ctmp == 'W') { nonce_file_write = true; + } else if (param_getlength(Cmd, i) == 2 && ctmp == 'i') { + iindx = i; } else { - PrintAndLog("Possible options are w and/or s"); + PrintAndLog("Possible options are w , s and/or iX"); return 1; } i++; } } + + SetSIMDInstr(SIMD_AUTO); + if (iindx > 0) { + while ((ctmp = param_getchar(Cmd, iindx))) { + if (param_getlength(Cmd, iindx) == 2 && ctmp == 'i') { + switch(param_getchar_indx(Cmd, 1, iindx)) { + case '5': + SetSIMDInstr(SIMD_AVX512); + break; + case '2': + SetSIMDInstr(SIMD_AVX2); + break; + case 'a': + SetSIMDInstr(SIMD_AVX); + break; + case 's': + SetSIMDInstr(SIMD_SSE2); + break; + case 'm': + SetSIMDInstr(SIMD_MMX); + break; + case 'n': + SetSIMDInstr(SIMD_NONE); + break; + default: + PrintAndLog("Unknown SIMD type. %c", param_getchar_indx(Cmd, 1, iindx)); + return 1; + } + } + iindx++; + } + } PrintAndLog("--target block no:%3d, target key type:%c, known target key: 0x%02x%02x%02x%02x%02x%02x%s, file action: %s, Slow: %s, Tests: %d ", trgBlockNo, @@ -2479,14 +2527,13 @@ int CmdHF14AMfSniff(const char *Cmd){ } UsbCommand resp; - if (WaitForResponseTimeout(CMD_ACK,&resp,2000)) { + if (WaitForResponseTimeoutW(CMD_ACK, &resp, 2000, false)) { res = resp.arg[0] & 0xff; uint16_t traceLen = resp.arg[1]; len = resp.arg[2]; if (res == 0) { // we are done - free(buf); - return 0; + break; } if (res == 1) { // there is (more) data to be transferred @@ -2562,6 +2609,9 @@ int CmdHF14AMfSniff(const char *Cmd){ } // while (true) free(buf); + + msleep(300); // wait for exiting arm side. + PrintAndLog("Done."); return 0; } diff --git a/client/cmdhfmfhard.c b/client/cmdhfmfhard.c index 0153541e..96c3a989 100644 --- a/client/cmdhfmfhard.c +++ b/client/cmdhfmfhard.c @@ -32,6 +32,7 @@ #include "crapto1/crapto1.h" #include "parity.h" #include "hardnested/hardnested_bruteforce.h" +#include "hardnested/hardnested_bf_core.h" #include "hardnested/hardnested_bitarray_core.h" #include "zlib.h" @@ -71,27 +72,32 @@ static float brute_force_per_second; static void get_SIMD_instruction_set(char* instruction_set) { -#if defined (__i386__) || defined (__x86_64__) - #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) - #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) strcpy(instruction_set, "AVX512F"); - else if (__builtin_cpu_supports("avx2")) strcpy(instruction_set, "AVX2"); - #else - if (__builtin_cpu_supports("avx2")) strcpy(instruction_set, "AVX2"); - #endif - else if (__builtin_cpu_supports("avx")) strcpy(instruction_set, "AVX"); - else if (__builtin_cpu_supports("sse2")) strcpy(instruction_set, "SSE2"); - else if (__builtin_cpu_supports("mmx")) strcpy(instruction_set, "MMX"); - else - #endif -#endif - strcpy(instruction_set, "no"); + switch(GetSIMDInstrAuto()) { + case SIMD_AVX512: + strcpy(instruction_set, "AVX512F"); + break; + case SIMD_AVX2: + strcpy(instruction_set, "AVX2"); + break; + case SIMD_AVX: + strcpy(instruction_set, "AVX"); + break; + case SIMD_SSE2: + strcpy(instruction_set, "SSE2"); + break; + case SIMD_MMX: + strcpy(instruction_set, "MMX"); + break; + default: + strcpy(instruction_set, "no"); + break; + } } static void print_progress_header(void) { char progress_text[80]; - char instr_set[12] = ""; + char instr_set[12] = {0}; get_SIMD_instruction_set(instr_set); sprintf(progress_text, "Start using %d threads and %s SIMD core", num_CPUs(), instr_set); PrintAndLog("\n\n"); @@ -2528,6 +2534,10 @@ static void set_test_state(uint8_t byte) int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests) { char progress_text[80]; + + char instr_set[12] = {0}; + get_SIMD_instruction_set(instr_set); + PrintAndLog("Using %s SIMD core.", instr_set); srand((unsigned) time(NULL)); brute_force_per_second = brute_force_benchmark(); diff --git a/client/emv/crypto_polarssl.c b/client/emv/crypto_polarssl.c index 760395c4..3d11afe5 100644 --- a/client/emv/crypto_polarssl.c +++ b/client/emv/crypto_polarssl.c @@ -215,7 +215,7 @@ static unsigned char *crypto_pk_polarssl_encrypt(const struct crypto_pk *_cp, co res = rsa_public(&cp->ctx, buf, result); if(res) { - printf("RSA encrypt failed. Error: %x data len: %d key len: %d\n", res * -1, len, keylen); + printf("RSA encrypt failed. Error: %x data len: %zd key len: %zd\n", res * -1, len, keylen); return NULL; } @@ -241,7 +241,7 @@ static unsigned char *crypto_pk_polarssl_decrypt(const struct crypto_pk *_cp, co res = rsa_private(&cp->ctx, buf, result); // CHECK??? if(res) { - printf("RSA decrypt failed. Error: %x data len: %d key len: %d\n", res * -1, len, keylen); + printf("RSA decrypt failed. Error: %x data len: %zd key len: %zd\n", res * -1, len, keylen); return NULL; } diff --git a/client/emv/emv_pki.c b/client/emv/emv_pki.c index 7803060e..f79e3045 100644 --- a/client/emv/emv_pki.c +++ b/client/emv/emv_pki.c @@ -53,7 +53,7 @@ static unsigned char *emv_pki_decode_message(const struct emv_pk *enc_pk, } if (cert_tlv->len != enc_pk->mlen) { - printf("ERROR: Certificate length (%d) not equal key length (%d)\n", cert_tlv->len, enc_pk->mlen); + printf("ERROR: Certificate length (%zd) not equal key length (%zd)\n", cert_tlv->len, enc_pk->mlen); return NULL; } kcp = crypto_pk_open(enc_pk->pk_algo, @@ -451,7 +451,7 @@ struct tlvdb *emv_pki_perform_cda_ex(const struct emv_pk *enc_pk, const struct t un_tlv, NULL); if (!data || data_len < 3) { - printf("ERROR: can't decode message. len %d\n", data_len); + printf("ERROR: can't decode message. len %zd\n", data_len); return NULL; } diff --git a/client/emv/test/crypto_test.c b/client/emv/test/crypto_test.c index ff18b9da..352f48b4 100644 --- a/client/emv/test/crypto_test.c +++ b/client/emv/test/crypto_test.c @@ -219,7 +219,7 @@ static int test_pk(bool verbose) tmp = crypto_pk_get_parameter(pubk, 0, &tmp_len); if (tmp_len != sizeof(pk_N) || memcmp(tmp, pk_N, tmp_len)) { - fprintf(stderr, "ERROR: crypto_pk_get_parameter(0) Modulus. param len %d len %d\n", tmp_len, sizeof(pk_N)); + fprintf(stderr, "ERROR: crypto_pk_get_parameter(0) Modulus. param len %zd len %zd\n", tmp_len, sizeof(pk_N)); free(tmp); goto close_pub; } @@ -256,7 +256,7 @@ static int test_pk(bool verbose) tmp = crypto_pk_get_parameter(privk, 0, &tmp_len); if (tmp_len != sizeof(pk_N) || memcmp(tmp, pk_N, tmp_len)) { - fprintf(stderr, "ERROR: crypto_pk_get_parameter(0) Modulus. param len %d len %d\n", tmp_len, sizeof(pk_N)); + fprintf(stderr, "ERROR: crypto_pk_get_parameter(0) Modulus. param len %zd len %zd\n", tmp_len, sizeof(pk_N)); free(tmp); goto close; } diff --git a/client/hardnested/hardnested_bf_core.c b/client/hardnested/hardnested_bf_core.c index 3c0c044f..d02209e9 100644 --- a/client/hardnested/hardnested_bf_core.c +++ b/client/hardnested/hardnested_bf_core.c @@ -548,44 +548,105 @@ out: crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch; -// determine the available instruction set at runtime and call the correct function -const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { +static SIMDExecInstr intSIMDInstr = SIMD_AUTO; + +void SetSIMDInstr(SIMDExecInstr instr) { + intSIMDInstr = instr; + + crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; + bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch; +} + +SIMDExecInstr GetSIMDInstr() { + SIMDExecInstr instr = SIMD_NONE; + #if defined (__i386__) || defined (__x86_64__) #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; - else if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + if (__builtin_cpu_supports("avx512f")) instr = SIMD_AVX512; + else if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2; #else - if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + if (__builtin_cpu_supports("avx2")) instr = SIMD_AVX2; #endif - else if (__builtin_cpu_supports("avx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX; - else if (__builtin_cpu_supports("sse2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2; - else if (__builtin_cpu_supports("mmx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; + else if (__builtin_cpu_supports("avx")) instr = SIMD_AVX; + else if (__builtin_cpu_supports("sse2")) instr = SIMD_SSE2; + else if (__builtin_cpu_supports("mmx")) instr = SIMD_MMX; else #endif #endif - crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD; + instr = SIMD_NONE; + + return instr; +} + +SIMDExecInstr GetSIMDInstrAuto() { + SIMDExecInstr instr = intSIMDInstr; + if (instr == SIMD_AUTO) + return GetSIMDInstr(); + + return instr; +} + +// determine the available instruction set at runtime and call the correct function +const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { + switch(GetSIMDInstrAuto()) { +#if defined (__i386__) || defined (__x86_64__) +#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) +#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) + case SIMD_AVX512: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; + break; +#endif + case SIMD_AVX2: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; + break; + case SIMD_AVX: + crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX; + break; + case SIMD_SSE2: + crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2; + break; + case SIMD_MMX: + crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; + break; +#endif +#endif + default: + crack_states_bitsliced_function_p = &crack_states_bitsliced_NOSIMD; + break; + } // call the most optimized function for this CPU return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces); } void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { -#if defined (__i386__) || defined (__x86_64__) - #if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) - #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) - if (__builtin_cpu_supports("avx512f")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; - else if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; - #else - if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; - #endif - else if (__builtin_cpu_supports("avx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX; - else if (__builtin_cpu_supports("sse2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2; - else if (__builtin_cpu_supports("mmx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; - else - #endif + switch(GetSIMDInstrAuto()) { +#if defined (__i386__) || defined (__x86_64__) +#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8 || __clang_major__ == 8 && __clang_minor__ >= 1)) +#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2) + case SIMD_AVX512: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; + break; +#endif + case SIMD_AVX2: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; + break; + case SIMD_AVX: + bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX; + break; + case SIMD_SSE2: + bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2; + break; + case SIMD_MMX: + bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; + break; +#endif #endif - bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD; + default: + bitslice_test_nonces_function_p = &bitslice_test_nonces_NOSIMD; + break; + } // call the most optimized function for this CPU (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par); diff --git a/client/hardnested/hardnested_bf_core.h b/client/hardnested/hardnested_bf_core.h index 7a445993..b3df0547 100644 --- a/client/hardnested/hardnested_bf_core.h +++ b/client/hardnested/hardnested_bf_core.h @@ -52,6 +52,18 @@ THE SOFTWARE. #include "hardnested_bruteforce.h" // statelist_t +typedef enum { + SIMD_AUTO, + SIMD_AVX512, + SIMD_AVX2, + SIMD_AVX, + SIMD_SSE2, + SIMD_MMX, + SIMD_NONE, +} SIMDExecInstr; +extern void SetSIMDInstr(SIMDExecInstr instr); +extern SIMDExecInstr GetSIMDInstrAuto(); + extern const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonces_2nd_byte, noncelist_t *nonces); extern void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonces, uint8_t *bf_test_nonce_par); diff --git a/client/hardnested/hardnested_bruteforce.c b/client/hardnested/hardnested_bruteforce.c index 718b7c5d..3072fc52 100644 --- a/client/hardnested/hardnested_bruteforce.c +++ b/client/hardnested/hardnested_bruteforce.c @@ -140,7 +140,7 @@ bool verify_key(uint32_t cuid, noncelist_t *nonces, uint8_t *best_first_bytes, u } -static void* crack_states_thread(void* x){ +static void* __attribute__((force_align_arg_pointer)) crack_states_thread(void* x){ struct arg { bool silent;