--- /dev/null
+//-----------------------------------------------------------------------------
+// Copyright (C) 2016, 2017 by piwi
+//
+// This code is licensed to you under the terms of the GNU GPL, version 2 or,
+// at your option, any later version. See the LICENSE.txt file for the text of
+// the license.
+//-----------------------------------------------------------------------------
+// Implements a card only attack based on crypto text (encrypted nonces
+// received during a nested authentication) only. Unlike other card only
+// attacks this doesn't rely on implementation errors but only on the
+// inherent weaknesses of the crypto1 cypher. Described in
+// Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
+// Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on
+// Computer and Communications Security, 2015
+//-----------------------------------------------------------------------------
+//
+// brute forcing is based on @aczids bitsliced brute forcer
+// https://github.com/aczid/crypto1_bs with some modifications. Mainly:
+// - don't rollback. Start with 2nd byte of nonce instead
+// - reuse results of filter subfunctions
+// - reuse results of previous nonces if some first bits are identical
+//
+//-----------------------------------------------------------------------------
+// aczid's Copyright notice:
+//
+// Bit-sliced Crypto-1 brute-forcing implementation
+// Builds on the data structures returned by CraptEV1 craptev1_get_space(nonces, threshold, uid)
+/*
+Copyright (c) 2015-2016 Aram Verstegen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "hardnested_bf_core.h"
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <malloc.h>
+#include <string.h>
+#include "crapto1/crapto1.h"
+#include "parity.h"
+
+// bitslice type
+// while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic
+// same for AVX2 and 512 bit vectors
+// using larger vectors works but seems to generate more register pressure
+#if defined(__AVX512F__)
+#define MAX_BITSLICES 512
+#elif defined(__AVX2__)
+#define MAX_BITSLICES 256
+#elif defined(__AVX__)
+#define MAX_BITSLICES 128
+#elif defined(__SSE2__)
+#define MAX_BITSLICES 128
+#else // MMX or SSE
+#define MAX_BITSLICES 64
+#endif
+
+#define VECTOR_SIZE (MAX_BITSLICES/8)
+typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t;
+typedef union {
+ bitslice_value_t value;
+ uint64_t bytes64[MAX_BITSLICES/64];
+ uint8_t bytes[MAX_BITSLICES/8];
+} bitslice_t;
+
+// filter function (f20)
+// sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur
+#define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d)))
+#define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d)))
+#define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e))))
+
+// bit indexing
+#define get_bit(n, word) (((word) >> (n)) & 1)
+#define get_vector_bit(slice, value) get_bit((slice)&0x3f, value.bytes64[(slice)>>6])
+
+// size of crypto-1 state
+#define STATE_SIZE 48
+// size of nonce to be decrypted
+#define KEYSTREAM_SIZE 24
+
+// endianness conversion
+#define rev32(word) ((((word) & 0xff) << 24) | ((((word) >> 8) & 0xff) << 16) | ((((word) >> 16) & 0xff) << 8) | ((((word) >> 24) & 0xff)))
+
+// this needs to be compiled several times for each instruction set.
+// For each instruction set, define a dedicated function name:
+#if defined (__AVX512F__)
+#define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX512
+#define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX512
+#elif defined (__AVX2__)
+#define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX2
+#define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX2
+#elif defined (__AVX__)
+#define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX
+#define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX
+#elif defined (__SSE2__)
+#define BITSLICE_TEST_NONCES bitslice_test_nonces_SSE2
+#define CRACK_STATES_BITSLICED crack_states_bitsliced_SSE2
+#elif defined (__MMX__)
+#define BITSLICE_TEST_NONCES bitslice_test_nonces_MMX
+#define CRACK_STATES_BITSLICED crack_states_bitsliced_MMX
+#endif
+
+// typedefs and declaration of functions:
+typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t*, statelist_t*, uint32_t*, uint64_t*, uint32_t, uint8_t*, noncelist_t*);
+crack_states_bitsliced_t crack_states_bitsliced_AVX512;
+crack_states_bitsliced_t crack_states_bitsliced_AVX2;
+crack_states_bitsliced_t crack_states_bitsliced_AVX;
+crack_states_bitsliced_t crack_states_bitsliced_SSE2;
+crack_states_bitsliced_t crack_states_bitsliced_MMX;
+crack_states_bitsliced_t crack_states_bitsliced_dispatch;
+
+typedef void bitslice_test_nonces_t(uint32_t, uint32_t*, uint8_t*);
+bitslice_test_nonces_t bitslice_test_nonces_AVX512;
+bitslice_test_nonces_t bitslice_test_nonces_AVX2;
+bitslice_test_nonces_t bitslice_test_nonces_AVX;
+bitslice_test_nonces_t bitslice_test_nonces_SSE2;
+bitslice_test_nonces_t bitslice_test_nonces_MMX;
+bitslice_test_nonces_t bitslice_test_nonces_dispatch;
+
+#ifdef _WIN32
+#define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES/8), MAX_BITSLICES/8)
+#define free_bitslice(x) _aligned_free(x)
+#else
+#define malloc_bitslice(x) memalign(MAX_BITSLICES/8, (x))
+#define free_bitslice(x) free(x)
+#endif
+
+#if defined (__MMX__) // (including more sophisticated instruction sets)
+typedef enum {
+ EVEN_STATE = 0,
+ ODD_STATE = 1
+} odd_even_t;
+
+
+// arrays of bitsliced states with identical values in all slices
+static bitslice_t bitsliced_encrypted_nonces[256][KEYSTREAM_SIZE];
+static bitslice_t bitsliced_encrypted_parity_bits[256][4];
+// 1 and 0 vectors
+static bitslice_t bs_ones;
+static bitslice_t bs_zeroes;
+
+
+void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
+
+ // initialize 1 and 0 vectors
+ memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
+ memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
+
+ // bitslice nonces' 2nd to 4th byte
+ for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
+ for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){
+ bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, rev32(bf_test_nonce[i] << 8));
+ if(bit){
+ bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value;
+ } else {
+ bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value;
+ }
+ }
+ }
+ // bitslice nonces' parity (4 bits)
+ for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
+ for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){
+ bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]);
+ if(bit){
+ bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value;
+ } else {
+ bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value;
+ }
+ }
+ }
+
+}
+
+
+const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces){
+
+ // Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce.
+ // We know that the best first byte is already shifted in. Testing with the remaining three bytes of
+ // the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
+ // brute forcing (including roll back).
+
+ bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
+ bitslice_t * restrict state_p;
+ uint64_t key = -1;
+ uint64_t bucket_states_tested = 0;
+ uint32_t bucket_size[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
+ uint32_t bitsliced_blocks = 0;
+ uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE];
+#if defined (DEBUG_BRUTE_FORCE)
+ uint32_t elimination_step = 0;
+ #define MAX_ELIMINATION_STEP 32
+ uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0};
+#endif
+#ifdef DEBUG_KEY_ELIMINATION
+ bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
+#endif
+
+ // constant ones/zeroes
+ bitslice_t bs_ones;
+ memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
+ bitslice_t bs_zeroes;
+ memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
+
+ // bitslice all the even states
+ bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_t *));
+ if (bitsliced_even_states == NULL) {
+ printf("Out of memory error in brute_force. Aborting...");
+ exit(4);
+ }
+ bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
+ if (bitsliced_even_feedback == NULL) {
+ printf("Out of memory error in brute_force. Aborting...");
+ exit(4);
+ }
+ for(uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES){
+ bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE/2*sizeof(bitslice_t));
+ if (lstate_p == NULL) {
+ printf("Out of memory error in brute_force. Aborting... \n");
+ exit(4);
+ }
+ memset(lstate_p, 0x00, STATE_SIZE/2*sizeof(bitslice_t)); // zero even bits
+ // bitslice even half-states
+ const uint32_t max_slices = (p_even_end-p_even) < MAX_BITSLICES ? p_even_end-p_even : MAX_BITSLICES;
+ bucket_size[bitsliced_blocks] = max_slices;
+#ifdef DEBUG_KEY_ELIMINATION
+ bucket_contains_test_key[bitsliced_blocks] = false;
+#endif
+ uint32_t slice_idx;
+ for(slice_idx = 0; slice_idx < max_slices; ++slice_idx){
+ uint32_t e = *(p_even+slice_idx);
+#ifdef DEBUG_KEY_ELIMINATION
+ if (known_target_key != -1 && e == test_state[EVEN_STATE]) {
+ bucket_contains_test_key[bitsliced_blocks] = true;
+ // printf("bucket %d contains test key even state\n", bitsliced_blocks);
+ // printf("in slice %d\n", slice_idx);
+ }
+#endif
+ for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
+ // set even bits
+ if(e&1){
+ lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
+ }
+ }
+ }
+ // padding with last even state
+ for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) {
+ uint32_t e = *(p_even_end-1);
+ for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
+ // set even bits
+ if(e&1){
+ lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
+ }
+ }
+ }
+ bitsliced_even_states[bitsliced_blocks] = lstate_p;
+ // bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
+ bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^
+ lstate_p[(47-10)/2].value ^ lstate_p[(47-12)/2].value ^ lstate_p[(47-14)/2].value ^
+ lstate_p[(47-24)/2].value ^ lstate_p[(47-42)/2].value;
+ bitsliced_blocks++;
+ }
+ // bitslice every odd state to every block of even states
+ for(uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd){
+ // early abort
+ if(*keys_found){
+ goto out;
+ }
+
+ // set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states
+
+ state_p = &states[KEYSTREAM_SIZE];
+ uint32_t o = *p_odd;
+
+ // pre-compute the odd feedback bit
+ bool odd_feedback_bit = evenparity32(o&0x29ce5c);
+ const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
+
+ // set odd state bits
+ for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) {
+ if (o & 1){
+ state_p[state_idx] = bs_ones;
+ } else {
+ state_p[state_idx] = bs_zeroes;
+ }
+ }
+
+ bitslice_value_t crypto1_bs_f20b_2[16];
+ bitslice_value_t crypto1_bs_f20b_3[8];
+
+ crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
+ crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
+
+ bitslice_value_t ksb[8];
+ ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value),
+ f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value),
+ crypto1_bs_f20b_2[0],
+ f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value),
+ crypto1_bs_f20b_3[0]);
+
+ uint32_t *restrict p_even = p->states[EVEN_STATE];
+ for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx, p_even += MAX_BITSLICES) {
+
+#ifdef DEBUG_KEY_ELIMINATION
+ // if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
+ // printf("Now testing known target key.\n");
+ // printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
+ // }
+#endif
+ // add the even state bits
+ const bitslice_t const *restrict bitsliced_even_state = bitsliced_even_states[block_idx];
+ for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
+ state_p[state_idx] = bitsliced_even_state[state_idx/2];
+ }
+
+ // pre-compute first feedback bit vector. This is the same for all nonces
+ bitslice_value_t fbb[8];
+ fbb[0] = odd_feedback ^ bitsliced_even_feedback[block_idx];
+
+ // vector to contain test results (1 = passed, 0 = failed)
+ bitslice_t results = bs_ones;
+
+ // parity_bits
+ bitslice_value_t par[8];
+ par[0] = bs_zeroes.value;
+ uint32_t next_common_bits = 0;
+
+ for(uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests){
+ // common bits with preceding test nonce
+ uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
+ next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0;
+ uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte
+ bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce
+ bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits
+ bitslice_value_t parity_bit_vector = par[common_bits]; // dito for first parity vector
+ // bitslice_value_t fb_bits = fbb[0]; // start with precomputed feedback bits from previous nonce
+ // bitslice_value_t ks_bits = ksb[0]; // dito for first keystream bits
+ // bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector
+ state_p -= common_bits; // and reuse the already calculated state bits
+ // highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte),
+ // or the highest bit which differs from the previous nonce
+ for (int32_t ks_idx = KEYSTREAM_SIZE-1-common_bits; ks_idx >= 0; --ks_idx) {
+
+ // decrypt nonce bits
+ const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
+ const bitslice_value_t decrypted_nonce_bit_vector = encrypted_nonce_bit_vector ^ ks_bits;
+
+ // compute real parity bits on the fly
+ parity_bit_vector ^= decrypted_nonce_bit_vector;
+
+ // update state
+ state_p--;
+ state_p[0].value = fb_bits ^ decrypted_nonce_bit_vector;
+
+ // update crypto1 subfunctions
+ bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3;
+ f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value);
+ f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
+ if (ks_idx > KEYSTREAM_SIZE - 8) {
+ f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
+ f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
+ f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
+ crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
+ crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3;
+ } else if (ks_idx > KEYSTREAM_SIZE - 16) {
+ f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
+ f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
+ f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
+ crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
+ } else if (ks_idx > KEYSTREAM_SIZE - 24){
+ f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
+ f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
+ f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16];
+ } else {
+ f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
+ f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
+ f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
+ }
+ // update keystream bit
+ ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3);
+
+ // for each completed byte:
+ if ((ks_idx & 0x07) == 0) {
+ // get encrypted parity bits
+ const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value;
+
+ // decrypt parity bits
+ const bitslice_value_t decrypted_parity_bit_vector = encrypted_parity_bit_vector ^ ks_bits;
+
+ // compare actual parity bits with decrypted parity bits and take count in results vector
+ results.value &= ~parity_bit_vector ^ decrypted_parity_bit_vector;
+
+ // make sure we still have a match in our set
+ // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){
+
+ // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
+ // the short-circuiting also helps
+ if(results.bytes64[0] == 0
+#if MAX_BITSLICES > 64
+ && results.bytes64[1] == 0
+#endif
+#if MAX_BITSLICES > 128
+ && results.bytes64[2] == 0
+ && results.bytes64[3] == 0
+#endif
+ ) {
+#if defined (DEBUG_BRUTE_FORCE)
+ if (elimination_step < MAX_ELIMINATION_STEP) {
+ keys_eliminated[elimination_step] += MAX_BITSLICES;
+ }
+#endif
+#ifdef DEBUG_KEY_ELIMINATION
+ if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
+ printf("Known target key eliminated in brute_force.\n");
+ printf("block_idx = %d/%d, nonce = %d/%d\n", block_idx, bitsliced_blocks, tests, nonces_to_bruteforce);
+ }
+#endif
+ goto stop_tests;
+ }
+ // prepare for next nonce byte
+#if defined (DEBUG_BRUTE_FORCE)
+ elimination_step++;
+#endif
+ parity_bit_vector = bs_zeroes.value;
+ }
+ // update feedback bit vector
+ if (ks_idx != 0) {
+ fb_bits =
+ (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
+ state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
+ state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
+ state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
+ state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
+ state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
+ }
+ // remember feedback and keystream vectors for later use
+ uint8_t bit = KEYSTREAM_SIZE - ks_idx;
+ if (bit <= next_common_bits) { // if needed and not yet stored
+ fbb[bit] = fb_bits;
+ ksb[bit] = ks_bits;
+ par[bit] = parity_bit_vector;
+ }
+ }
+ // prepare for next nonce. Revert to initial state
+ state_p = &states[KEYSTREAM_SIZE];
+ }
+
+ // all nonce tests were successful: we've found a possible key in this block!
+ uint32_t *p_even_test = p_even;
+ for (uint32_t results_word = 0; results_word < MAX_BITSLICES / 64; ++results_word) {
+ uint64_t results64 = results.bytes64[results_word];
+ for (uint32_t results_bit = 0; results_bit < 64; results_bit++) {
+ if (results64 & 0x01) {
+ if (verify_key(cuid, nonces, best_first_bytes, *p_odd, *p_even_test)) {
+ struct Crypto1State pcs;
+ pcs.odd = *p_odd;
+ pcs.even = *p_even_test;
+ lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
+ crypto1_get_lfsr(&pcs, &key);
+ bucket_states_tested += 64 * results_word + results_bit;
+ goto out;
+ }
+#ifdef DEBUG_KEY_ELIMINATION
+ if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
+ printf("Known target key eliminated in brute_force verification.\n");
+ printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
+ }
+#endif
+ }
+#ifdef DEBUG_KEY_ELIMINATION
+ if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
+ printf("Known target key eliminated in brute_force (results_bit == 0).\n");
+ printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
+ }
+#endif
+ results64 >>= 1;
+ p_even_test++;
+ if (p_even_test == p_even_end) {
+ goto stop_tests;
+ }
+ }
+ }
+stop_tests:
+#if defined (DEBUG_BRUTE_FORCE)
+ elimination_step = 0;
+#endif
+ bucket_states_tested += bucket_size[block_idx];
+ // prepare to set new states
+ state_p = &states[KEYSTREAM_SIZE];
+ continue;
+ }
+ }
+out:
+ for(uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
+ free_bitslice(bitsliced_even_states[block_idx]);
+ }
+ free(bitsliced_even_states);
+ free_bitslice(bitsliced_even_feedback);
+ __sync_fetch_and_add(num_keys_tested, bucket_states_tested);
+
+#if defined (DEBUG_BRUTE_FORCE)
+ for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) {
+ printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100);
+ }
+#endif
+ return key;
+}
+#endif
+
+
+#ifndef __MMX__
+
+// pointers to functions:
+crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
+bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
+
+// determine the available instruction set at runtime and call the correct function
+const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
+ if (__builtin_cpu_supports("avx512f")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
+ else if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
+ else if (__builtin_cpu_supports("avx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX;
+ else if (__builtin_cpu_supports("sse2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2;
+ else if (__builtin_cpu_supports("mmx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX;
+ else {
+ printf("\nFatal: you need at least a CPU with MMX instruction set support. Aborting...\n");
+ exit(5);
+ }
+ // call the most optimized function for this CPU
+ return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
+}
+
+void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
+ if (__builtin_cpu_supports("avx512f")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
+ else if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
+ else if (__builtin_cpu_supports("avx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX;
+ else if (__builtin_cpu_supports("sse2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2;
+ else if (__builtin_cpu_supports("mmx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX;
+ else {
+ printf("\nFatal: you need at least a CPU with MMX instruction set support. Aborting...\n");
+ exit(5);
+ }
+ // call the most optimized function for this CPU
+ (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
+}
+
+// Entries to dispatched function calls
+const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
+ return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
+}
+
+void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
+ (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
+}
+
+#endif