From 9492e0b0986a557afe1c85f08fd02a7fb979f536 Mon Sep 17 00:00:00 2001
From: "micki.held@gmx.de"
 <micki.held@gmx.de@ef4ab9da-24cd-11de-8aaa-f3a34680c41f>
Date: Sun, 15 Sep 2013 09:33:17 +0000
Subject: [PATCH] Major rework of hf mf nested: - PM: used GetCountMifare in
 MifareNested() for improved timing accuracy and to deliver better quality
 nonces - PM: MifareNested now delivers exactly two different nonces to avoid
 time consuming multiple lfsr_recovery32() on client side - Client: replaced
 quicksort by bucketsort in crapto1.c which is faster - Client: use
 multithreading (two parallel calls to lfsr_recovery32()) - Client: fixed a
 small bug in mfnested() (always showed trgkey=0) - Client: introduced a mutex
 for PrintAndLog() to avoid interlaced printing Minor rework of hf mf chk: -
 Avoid time consuming off/on cycles. Send a "halt" instead.

---
 armsrc/apps.h              |   1 +
 armsrc/epa.c               |   2 +-
 armsrc/iso14443a.c         | 233 ++++++++++++------------
 armsrc/iso14443a.h         |   6 +-
 armsrc/mifarecmd.c         | 356 ++++++++++++++++++-------------------
 armsrc/mifareutil.c        |  28 +--
 armsrc/mifareutil.h        |  13 +-
 armsrc/util.c              |   9 +
 client/cmdhfmf.c           |  77 ++++----
 client/mifarehost.c        | 285 +++++++++++++++--------------
 client/mifarehost.h        |  14 +-
 client/nonce2key/crapto1.c | 176 +++++++++++++-----
 client/proxmark3.c         |  80 +++++----
 client/ui.c                |  54 +++---
 14 files changed, 716 insertions(+), 618 deletions(-)

diff --git a/armsrc/apps.h b/armsrc/apps.h
index 9574a937..64ec29a3 100644
--- a/armsrc/apps.h
+++ b/armsrc/apps.h
@@ -156,6 +156,7 @@ void EPA_PACE_Collect_Nonce(UsbCommand * c);
 
 // mifarecmd.h
 void ReaderMifare(bool first_try);
+int32_t dist_nt(uint32_t nt1, uint32_t nt2);
 void MifareReadBlock(uint8_t arg0, uint8_t arg1, uint8_t arg2, uint8_t *data);
 void MifareReadSector(uint8_t arg0, uint8_t arg1, uint8_t arg2, uint8_t *datain);
 void MifareWriteBlock(uint8_t arg0, uint8_t arg1, uint8_t arg2, uint8_t *datain);
diff --git a/armsrc/epa.c b/armsrc/epa.c
index 73c3a755..1300b515 100644
--- a/armsrc/epa.c
+++ b/armsrc/epa.c
@@ -432,7 +432,7 @@ int EPA_Setup()
 	}
 
 	// send the PPS request
-	ReaderTransmit((uint8_t *)pps, sizeof(pps));
+	ReaderTransmit((uint8_t *)pps, sizeof(pps), NULL);
 	uint8_t pps_response[3];
 	return_code = ReaderReceive(pps_response);
 	if (return_code != 3 || pps_response[0] != 0xD0) {
diff --git a/armsrc/iso14443a.c b/armsrc/iso14443a.c
index 56afaeb8..bd7e758d 100644
--- a/armsrc/iso14443a.c
+++ b/armsrc/iso14443a.c
@@ -1218,47 +1218,75 @@ void SimulateIso14443aTag(int tagType, int uid_1st, int uid_2nd, byte_t* data)
 	LED_A_OFF();
 }
 
+
+// prepare a delayed transfer. This simply shifts ToSend[] by a number
+// of bits specified in the delay parameter.
+void PrepareDelayedTransfer(uint16_t delay)
+{
+	uint8_t bitmask = 0;
+	uint8_t bits_to_shift = 0;
+	uint8_t bits_shifted = 0;
+	
+	delay &= 0x07;
+	if (delay) {
+		for (uint16_t i = 0; i < delay; i++) {
+			bitmask |= (0x01 << i);
+		}
+		ToSend[++ToSendMax] = 0x00;
+		for (uint16_t i = 0; i < ToSendMax; i++) {
+			bits_to_shift = ToSend[i] & bitmask;
+			ToSend[i] = ToSend[i] >> delay;
+			ToSend[i] = ToSend[i] | (bits_shifted << (8 - delay));
+			bits_shifted = bits_to_shift;
+		}
+	}
+}
+
+
+
+
 //-----------------------------------------------------------------------------
 // Transmit the command (to the tag) that was placed in ToSend[].
+// Parameter timing:
+// if NULL: ignored
+// if == 0:	return time of transfer
+// if != 0: delay transfer until time specified
 //-----------------------------------------------------------------------------
-static void TransmitFor14443a(const uint8_t *cmd, int len, int *samples, int *wait)
+static void TransmitFor14443a(const uint8_t *cmd, int len, uint32_t *timing)
 {
-  int c;
+	int c;
 
-  FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_READER_MOD);
+	FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_READER_MOD);
 
-	if (wait)
-    if(*wait < 10)
-      *wait = 10;
 
-  for(c = 0; c < *wait;) {
-    if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_TXRDY)) {
-      AT91C_BASE_SSC->SSC_THR = 0x00;		// For exact timing!
-      c++;
-    }
-    if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_RXRDY)) {
-      volatile uint32_t r = AT91C_BASE_SSC->SSC_RHR;
-      (void)r;
-    }
-    WDT_HIT();
-  }
+	if (timing) {
+		if(*timing == 0) {										// Measure time
+			*timing = (GetCountMifare() + 8) & 0xfffffff8;
+		} else {
+			PrepareDelayedTransfer(*timing & 0x00000007);		// Delay transfer (fine tuning - up to 7 MF clock ticks)
+		}
+		if(MF_DBGLEVEL >= 4 && GetCountMifare() >= (*timing & 0xfffffff8)) Dbprintf("TransmitFor14443a: Missed timing");
+		while(GetCountMifare() < (*timing & 0xfffffff8));		// Delay transfer (multiple of 8 MF clock ticks)
+	}
+
+	for(c = 0; c < 10;) {	// standard delay for each transfer (allow tag to be ready after last transmission)
+		if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_TXRDY)) {
+			AT91C_BASE_SSC->SSC_THR = 0x00;	
+			c++;
+		}
+	}
+	
+	c = 0;
+	for(;;) {
+		if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_TXRDY)) {
+			AT91C_BASE_SSC->SSC_THR = cmd[c];
+			c++;
+			if(c >= len) {
+				break;
+			}
+		}
+	}
 
-  c = 0;
-  for(;;) {
-    if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_TXRDY)) {
-      AT91C_BASE_SSC->SSC_THR = cmd[c];
-      c++;
-      if(c >= len) {
-        break;
-      }
-    }
-    if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_RXRDY)) {
-      volatile uint32_t r = AT91C_BASE_SSC->SSC_RHR;
-      (void)r;
-    }
-    WDT_HIT();
-  }
-	if (samples) *samples = (c + *wait) << 3;
 }
 
 //-----------------------------------------------------------------------------
@@ -1528,10 +1556,10 @@ static int GetIso14443aAnswerFromTag(uint8_t *receivedResponse, int maxLen, int
 	for(;;) {
 		WDT_HIT();
 
-		if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_TXRDY)) {
-			AT91C_BASE_SSC->SSC_THR = 0x00;  // To make use of exact timing of next command from reader!!
-			if (elapsed) (*elapsed)++;
-		}
+		// if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_TXRDY)) {
+			// AT91C_BASE_SSC->SSC_THR = 0x00;  // To make use of exact timing of next command from reader!!
+			// if (elapsed) (*elapsed)++;
+		// }
 		if(AT91C_BASE_SSC->SSC_SR & (AT91C_SSC_RXRDY)) {
 			if(c < iso14a_timeout) { c++; } else { return FALSE; }
 			b = (uint8_t)AT91C_BASE_SSC->SSC_RHR;
@@ -1547,17 +1575,13 @@ static int GetIso14443aAnswerFromTag(uint8_t *receivedResponse, int maxLen, int
 	}
 }
 
-void ReaderTransmitBitsPar(uint8_t* frame, int bits, uint32_t par)
+void ReaderTransmitBitsPar(uint8_t* frame, int bits, uint32_t par, uint32_t *timing)
 {
-  int wait = 0;
-  int samples = 0;
-  
-  // This is tied to other size changes
-  // 	uint8_t* frame_addr = ((uint8_t*)BigBuf) + 2024;
+ 
   CodeIso14443aBitsAsReaderPar(frame,bits,par);
   
   // Select the card
-  TransmitFor14443a(ToSend, ToSendMax, &samples, &wait);
+  TransmitFor14443a(ToSend, ToSendMax, timing);
   if(trigger)
   	LED_A_ON();
   
@@ -1565,15 +1589,15 @@ void ReaderTransmitBitsPar(uint8_t* frame, int bits, uint32_t par)
   if (tracing) LogTrace(frame,nbytes(bits),0,par,TRUE);
 }
 
-void ReaderTransmitPar(uint8_t* frame, int len, uint32_t par)
+void ReaderTransmitPar(uint8_t* frame, int len, uint32_t par, uint32_t *timing)
 {
-  ReaderTransmitBitsPar(frame,len*8,par);
+  ReaderTransmitBitsPar(frame,len*8,par, timing);
 }
 
-void ReaderTransmit(uint8_t* frame, int len)
+void ReaderTransmit(uint8_t* frame, int len, uint32_t *timing)
 {
   // Generate parity and redirect
-  ReaderTransmitBitsPar(frame,len*8,GetParity(frame,len));
+  ReaderTransmitBitsPar(frame,len*8,GetParity(frame,len), timing);
 }
 
 int ReaderReceive(uint8_t* receivedAnswer)
@@ -1612,7 +1636,7 @@ int iso14443a_select_card(byte_t* uid_ptr, iso14a_card_select_t* p_hi14a_card, u
   int len;
 	 
   // Broadcast for a card, WUPA (0x52) will force response from all cards in the field
-    ReaderTransmitBitsPar(wupa,7,0);
+    ReaderTransmitBitsPar(wupa,7,0, NULL);
   // Receive the ATQA
   if(!ReaderReceive(resp)) return 0;
 //  Dbprintf("atqa: %02x %02x",resp[0],resp[1]);
@@ -1636,7 +1660,7 @@ int iso14443a_select_card(byte_t* uid_ptr, iso14a_card_select_t* p_hi14a_card, u
     sel_uid[0] = sel_all[0] = 0x93 + cascade_level * 2;
 
     // SELECT_ALL
-    ReaderTransmit(sel_all,sizeof(sel_all));
+    ReaderTransmit(sel_all,sizeof(sel_all), NULL);
     if (!ReaderReceive(resp)) return 0;
     
     // First backup the current uid
@@ -1644,15 +1668,15 @@ int iso14443a_select_card(byte_t* uid_ptr, iso14a_card_select_t* p_hi14a_card, u
     uid_resp_len = 4;
     //    Dbprintf("uid: %02x %02x %02x %02x",uid_resp[0],uid_resp[1],uid_resp[2],uid_resp[3]);
     
-		// calculate crypto UID
-		if(cuid_ptr) {
-      *cuid_ptr = bytes_to_num(uid_resp, 4);
+	// calculate crypto UID. Always use last 4 Bytes.
+	if(cuid_ptr) {
+		*cuid_ptr = bytes_to_num(uid_resp, 4);
     }
 
     // Construct SELECT UID command
 		memcpy(sel_uid+2,resp,5);
     AppendCrc14443a(sel_uid,7);
-    ReaderTransmit(sel_uid,sizeof(sel_uid));
+    ReaderTransmit(sel_uid,sizeof(sel_uid), NULL);
 
     // Receive the SAK
     if (!ReaderReceive(resp)) return 0;
@@ -1687,7 +1711,7 @@ int iso14443a_select_card(byte_t* uid_ptr, iso14a_card_select_t* p_hi14a_card, u
 
   // Request for answer to select
   AppendCrc14443a(rats, 2);
-  ReaderTransmit(rats, sizeof(rats));
+  ReaderTransmit(rats, sizeof(rats), NULL);
 
   if (!(len = ReaderReceive(resp))) return 0;
 
@@ -1702,13 +1726,13 @@ int iso14443a_select_card(byte_t* uid_ptr, iso14a_card_select_t* p_hi14a_card, u
 }
 
 void iso14443a_setup() {
-  // Set up the synchronous serial port
-  FpgaSetupSsc();
+	// Set up the synchronous serial port
+	FpgaSetupSsc();
 	// Start from off (no field generated)
 	// Signal field is off with the appropriate LED
-	LED_D_OFF();
-	FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
-	SpinDelay(50);
+//	LED_D_OFF();
+//	FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
+	// SpinDelay(50);
 
 	SetAdcMuxFor(GPIO_MUXSEL_HIPKD);
 
@@ -1716,7 +1740,7 @@ void iso14443a_setup() {
 	// Signal field is on with the appropriate LED
 	LED_D_ON();
 	FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_READER_MOD);
-	SpinDelay(50);
+	SpinDelay(7); // iso14443-3 specifies 5ms max.
 
 	iso14a_timeout = 2048; //default
 }
@@ -1730,7 +1754,7 @@ int iso14_apdu(uint8_t * cmd, size_t cmd_len, void * data) {
 	memcpy(real_cmd+2, cmd, cmd_len);
 	AppendCrc14443a(real_cmd,cmd_len+2);
  
-	ReaderTransmit(real_cmd, cmd_len+4);
+	ReaderTransmit(real_cmd, cmd_len+4, NULL);
 	size_t len = ReaderReceive(data);
 	uint8_t * data_bytes = (uint8_t *) data;
 	if (!len)
@@ -1757,21 +1781,20 @@ void ReaderIso14443a(UsbCommand * c)
 	iso14a_command_t param = c->arg[0];
 	uint8_t * cmd = c->d.asBytes;
 	size_t len = c->arg[1];
-  uint32_t arg0 = 0;
-  byte_t buf[USB_CMD_DATA_SIZE];
+	uint32_t arg0 = 0;
+	byte_t buf[USB_CMD_DATA_SIZE];
   
-  iso14a_clear_trace();
-  iso14a_set_tracing(true);
+	iso14a_clear_trace();
+	iso14a_set_tracing(true);
 
 	if(param & ISO14A_REQUEST_TRIGGER) {
-    iso14a_set_trigger(1);
-  }
+		iso14a_set_trigger(1);
+	}
 
 	if(param & ISO14A_CONNECT) {
 		iso14443a_setup();
-		arg0 = iso14443a_select_card(NULL,(iso14a_card_select_t*)buf,NULL);
+		arg0 = iso14443a_select_card(NULL, (iso14a_card_select_t*)buf, NULL);
 		cmd_send(CMD_ACK,arg0,0,0,buf,sizeof(iso14a_card_select_t));
-//    UsbSendPacket((void *)ack, sizeof(UsbCommand));
 	}
 
 	if(param & ISO14A_SET_TIMEOUT) {
@@ -1785,7 +1808,6 @@ void ReaderIso14443a(UsbCommand * c)
 	if(param & ISO14A_APDU) {
 		arg0 = iso14_apdu(cmd, len, buf);
 		cmd_send(CMD_ACK,arg0,0,0,buf,sizeof(buf));
-//		UsbSendPacket((void *)ack, sizeof(UsbCommand));
 	}
 
 	if(param & ISO14A_RAW) {
@@ -1793,50 +1815,24 @@ void ReaderIso14443a(UsbCommand * c)
 			AppendCrc14443a(cmd,len);
 			len += 2;
 		}
-		ReaderTransmit(cmd,len);
+		ReaderTransmit(cmd,len, NULL);
 		arg0 = ReaderReceive(buf);
-//		UsbSendPacket((void *)ack, sizeof(UsbCommand));
-    cmd_send(CMD_ACK,arg0,0,0,buf,sizeof(buf));
+		cmd_send(CMD_ACK,arg0,0,0,buf,sizeof(buf));
 	}
 
 	if(param & ISO14A_REQUEST_TRIGGER) {
-    iso14a_set_trigger(0);
-  }
+		iso14a_set_trigger(0);
+	}
 
 	if(param & ISO14A_NO_DISCONNECT) {
 		return;
-  }
+	}
 
 	FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
 	LEDsoff();
 }
 
 
-// prepare the Mifare AUTH transfer with an added necessary delay.
-void PrepareDelayedAuthTransfer(uint8_t* frame, int len, uint16_t delay)
-{
-	CodeIso14443aBitsAsReaderPar(frame, len*8, GetParity(frame,len));
-
-	uint8_t bitmask = 0;
-	uint8_t bits_to_shift = 0;
-	uint8_t bits_shifted = 0;
-	
-	if (delay) {
-		for (uint16_t i = 0; i < delay; i++) {
-			bitmask |= (0x01 << i);
-		}
-		ToSend[++ToSendMax] = 0x00;
-		for (uint16_t i = 0; i < ToSendMax; i++) {
-			bits_to_shift = ToSend[i] & bitmask;
-			ToSend[i] = ToSend[i] >> delay;
-			ToSend[i] = ToSend[i] | (bits_shifted << (8 - delay));
-			bits_shifted = bits_to_shift;
-		}
-	}
-}
-
-
-
 // Determine the distance between two nonces.
 // Assume that the difference is small, but we don't know which is first.
 // Therefore try in alternating directions.
@@ -1904,11 +1900,8 @@ void ReaderMifare(bool first_try)
 		StartCountMifare();
 		mf_nr_ar3 = 0;
 		iso14443a_setup();
-		FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_TAGSIM_LISTEN); // resets some FPGA internal registers
 		while((GetCountMifare() & 0xffff0000) != 0x10000);		// wait for counter to reset and "warm up" 
-		while(AT91C_BASE_PIOA->PIO_PDSR & GPIO_SSC_FRAME); 		// wait for ssp_frame to be low
-		while(!(AT91C_BASE_PIOA->PIO_PDSR & GPIO_SSC_FRAME)); 	// sync on rising edge of ssp_frame
-		sync_time = GetCountMifare();
+		sync_time = GetCountMifare() & 0xfffffff8;
 		sync_cycles = 65536;									// theory: Mifare Classic's random generator repeats every 2^16 cycles (and so do the nonces).
 		nt_attacked = 0;
 		nt = 0;
@@ -1939,41 +1932,37 @@ void ReaderMifare(bool first_try)
 		LED_C_ON();
 
 		if(!iso14443a_select_card(uid, NULL, &cuid)) {
+			if (MF_DBGLEVEL >= 1)	Dbprintf("Mifare: Can't select card");
 			continue;
 		}
 
 		//keep the card active
 		FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_READER_MOD);
 
-		PrepareDelayedAuthTransfer(mf_auth, sizeof(mf_auth), (sync_cycles + catch_up_cycles) & 0x00000007);
+		// CodeIso14443aBitsAsReaderPar(mf_auth, sizeof(mf_auth)*8, GetParity(mf_auth, sizeof(mf_auth)*8));
 
-		sync_time = sync_time + ((sync_cycles + catch_up_cycles) & 0xfffffff8);
+		sync_time = (sync_time & 0xfffffff8) + sync_cycles + catch_up_cycles;
 		catch_up_cycles = 0;
 
 		// if we missed the sync time already, advance to the next nonce repeat
 		while(GetCountMifare() > sync_time) {
-			sync_time = sync_time + (sync_cycles & 0xfffffff8);
+			sync_time = (sync_time & 0xfffffff8) + sync_cycles;
 		}
 
-		// now sync. After syncing, the following Classic Auth will return the same tag nonce (mostly)
-		while(GetCountMifare() < sync_time);
-		
-		// Transmit MIFARE_CLASSIC_AUTH
-		int samples = 0;
-		int wait = 0;
-		TransmitFor14443a(ToSend, ToSendMax, &samples, &wait);
+		// Transmit MIFARE_CLASSIC_AUTH at synctime. Should result in returning the same tag nonce (== nt_attacked) 
+		ReaderTransmit(mf_auth, sizeof(mf_auth), &sync_time);
 
 		// Receive the (4 Byte) "random" nonce
 		if (!ReaderReceive(receivedAnswer)) {
+			if (MF_DBGLEVEL >= 1)	Dbprintf("Mifare: Couldn't receive tag nonce");
 			continue;
 		  }
 
- 
 		previous_nt = nt;
 		nt = bytes_to_num(receivedAnswer, 4);
 
 		// Transmit reader nonce with fake par
-		ReaderTransmitPar(mf_nr_ar, sizeof(mf_nr_ar), par);
+		ReaderTransmitPar(mf_nr_ar, sizeof(mf_nr_ar), par, NULL);
 
 		if (first_try && previous_nt && !nt_attacked) { // we didn't calibrate our clock yet
 			int nt_distance = dist_nt(previous_nt, nt);
@@ -1985,7 +1974,7 @@ void ReaderMifare(bool first_try)
 					continue;
 				}
 				sync_cycles = (sync_cycles - nt_distance);
-//				Dbprintf("calibrating in cycle %d. nt_distance=%d, Sync_cycles: %d\n", i, nt_distance, sync_cycles);
+				if (MF_DBGLEVEL >= 3) Dbprintf("calibrating in cycle %d. nt_distance=%d, Sync_cycles: %d\n", i, nt_distance, sync_cycles);
 				continue;
 			}
 		}
@@ -2004,11 +1993,11 @@ void ReaderMifare(bool first_try)
 			    consecutive_resyncs = 0;
 			}
 			if (consecutive_resyncs < 3) {
-				Dbprintf("Lost sync in cycle %d. nt_distance=%d. Consecutive Resyncs = %d. Trying one time catch up...\n", i, -catch_up_cycles, consecutive_resyncs);
+				if (MF_DBGLEVEL >= 3) Dbprintf("Lost sync in cycle %d. nt_distance=%d. Consecutive Resyncs = %d. Trying one time catch up...\n", i, -catch_up_cycles, consecutive_resyncs);
 			}
 			else {	
 				sync_cycles = sync_cycles + catch_up_cycles;
-				Dbprintf("Lost sync in cycle %d for the fourth time consecutively (nt_distance = %d). Adjusting sync_cycles to %d.\n", i, -catch_up_cycles, sync_cycles);
+				if (MF_DBGLEVEL >= 3) Dbprintf("Lost sync in cycle %d for the fourth time consecutively (nt_distance = %d). Adjusting sync_cycles to %d.\n", i, -catch_up_cycles, sync_cycles);
 			}
 			continue;
 		}
@@ -2018,7 +2007,7 @@ void ReaderMifare(bool first_try)
 		// Receive answer. This will be a 4 Bit NACK when the 8 parity bits are OK after decoding
 		if (ReaderReceive(receivedAnswer))
 		{
-			catch_up_cycles = 8; 	// the PRNG doesn't run during data transfers. 4 Bit = 8 cycles
+			catch_up_cycles = 8; 	// the PRNG is delayed by 8 cycles due to the NAC (4Bits = 0x05 encrypted) transfer
 	
 			if (nt_diff == 0)
 			{
diff --git a/armsrc/iso14443a.h b/armsrc/iso14443a.h
index c3051d48..4c3c6674 100644
--- a/armsrc/iso14443a.h
+++ b/armsrc/iso14443a.h
@@ -82,9 +82,9 @@ extern byte_t oddparity (const byte_t bt);
 extern uint32_t GetParity(const uint8_t * pbtCmd, int iLen);
 extern void AppendCrc14443a(uint8_t* data, int len);
 
-extern void ReaderTransmit(uint8_t* frame, int len);
-extern void ReaderTransmitBitsPar(uint8_t* frame, int bits, uint32_t par);
-extern void ReaderTransmitPar(uint8_t* frame, int len, uint32_t par);
+extern void ReaderTransmit(uint8_t* frame, int len, uint32_t *timing);
+extern void ReaderTransmitBitsPar(uint8_t* frame, int bits, uint32_t par, uint32_t *timing);
+extern void ReaderTransmitPar(uint8_t* frame, int len, uint32_t par, uint32_t *timing);
 extern int ReaderReceive(uint8_t* receivedAnswer);
 extern int ReaderReceivePar(uint8_t* receivedAnswer, uint32_t * parptr);
 
diff --git a/armsrc/mifarecmd.c b/armsrc/mifarecmd.c
index 02470702..fa0ff627 100644
--- a/armsrc/mifarecmd.c
+++ b/armsrc/mifarecmd.c
@@ -261,7 +261,7 @@ void MifareWriteBlock(uint8_t arg0, uint8_t arg1, uint8_t arg2, uint8_t *datain)
 //	UsbCommand ack = {CMD_ACK, {isOK, 0, 0}};
 	
 	LED_B_ON();
-  cmd_send(CMD_ACK,isOK,0,0,0,0);
+	cmd_send(CMD_ACK,isOK,0,0,0,0);
 //	UsbSendPacket((uint8_t *)&ack, sizeof(UsbCommand));
 	LED_B_OFF();
 
@@ -280,184 +280,195 @@ int valid_nonce(uint32_t Nt, uint32_t NtEnc, uint32_t Ks1, byte_t * parity) {
 	(oddparity((Nt >> 8) & 0xFF) == ((parity[2]) ^ oddparity((NtEnc >> 8) & 0xFF) ^ BIT(Ks1,0)))) ? 1 : 0;
 }
 
+
+
 //-----------------------------------------------------------------------------
 // MIFARE nested authentication. 
 // 
 //-----------------------------------------------------------------------------
-void MifareNested(uint32_t arg0, uint32_t arg1, uint32_t arg2, uint8_t *datain)
+void MifareNested(uint32_t arg0, uint32_t arg1, uint32_t calibrate, uint8_t *datain)
 {
 	// params
-	uint8_t blockNo = arg0;
-	uint8_t keyType = arg1;
-	uint8_t targetBlockNo = arg2 & 0xff;
-	uint8_t targetKeyType = (arg2 >> 8) & 0xff;
+	uint8_t blockNo = arg0 & 0xff;
+	uint8_t keyType = (arg0 >> 8) & 0xff;
+	uint8_t targetBlockNo = arg1 & 0xff;
+	uint8_t targetKeyType = (arg1 >> 8) & 0xff;
 	uint64_t ui64Key = 0;
 
 	ui64Key = bytes_to_num(datain, 6);
 	
 	// variables
-	int rtr, i, j, m, len;
-	int davg, dmin, dmax;
+	uint16_t rtr, i, j, len;
+	uint16_t davg;
+	static uint16_t dmin, dmax;
 	uint8_t uid[10];
 	uint32_t cuid, nt1, nt2, nttmp, nttest, par, ks1;
+	uint32_t target_nt[2], target_ks[2];
+	
 	uint8_t par_array[4];
-	nestedVector nvector[NES_MAX_INFO + 1][11];
-	int nvectorcount[NES_MAX_INFO + 1];
-	int ncount = 0;
+	uint16_t ncount = 0;
 	struct Crypto1State mpcs = {0, 0};
 	struct Crypto1State *pcs;
 	pcs = &mpcs;
 	uint8_t* receivedAnswer = mifare_get_bigbufptr();
 
-	//init
-	for (i = 0; i < NES_MAX_INFO + 1; i++) nvectorcount[i] = 11;  //  11 - empty block;
-	
+	uint32_t auth1_time, auth2_time;
+	static uint16_t delta_time;
+
+	StartCountMifare();
+
 	// clear trace
 	iso14a_clear_trace();
-  iso14a_set_tracing(false);
+	iso14a_set_tracing(false);
 	
 	iso14443a_setup();
 
 	LED_A_ON();
-	LED_B_ON();
 	LED_C_OFF();
 
-  FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
-  SpinDelay(200);
-	
-	davg = dmax = 0;
-	dmin = 2000;
 
-	// test nonce distance
-	for (rtr = 0; rtr < 10; rtr++) {
-    FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
-    SpinDelay(100);
-    FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_READER_MOD);
+	while((GetCountMifare() & 0xffff0000) != 0x00010000);		// wait for counter to reset and "warm up" 
 
-    // Test if the action was cancelled
-    if(BUTTON_PRESS()) {
-      break;
-    }
+	// statistics on nonce distance
+	if (calibrate) {	// for first call only. Otherwise reuse previous calibration
+		LED_B_ON();
 
-		if(!iso14443a_select_card(uid, NULL, &cuid)) {
-			if (MF_DBGLEVEL >= 1)	Dbprintf("Can't select card");
-			break;
-		};
+		davg = dmax = 0;
+		dmin = 2000;
+		delta_time = 0;
 		
-		if(mifare_classic_authex(pcs, cuid, blockNo, keyType, ui64Key, AUTH_FIRST, &nt1)) {
-			if (MF_DBGLEVEL >= 1)	Dbprintf("Auth1 error");
-			break;
-		};
+		for (rtr = 0; rtr < 17; rtr++) {
 
-		if(mifare_classic_authex(pcs, cuid, blockNo, keyType, ui64Key, AUTH_NESTED, &nt2)) {
-			if (MF_DBGLEVEL >= 1)	Dbprintf("Auth2 error");
-			break;
-		};
-		
-		nttmp = prng_successor(nt1, 500);
-		for (i = 501; i < 2000; i++) {
-			nttmp = prng_successor(nttmp, 1);
-			if (nttmp == nt2) break;
+			// prepare next select. No need to power down the card.
+			if(mifare_classic_halt(pcs, cuid)) {
+				if (MF_DBGLEVEL >= 1)	Dbprintf("Nested: Halt error");
+				rtr--;
+				continue;
+			}
+
+			if(!iso14443a_select_card(uid, NULL, &cuid)) {
+				if (MF_DBGLEVEL >= 1)	Dbprintf("Nested: Can't select card");
+				rtr--;
+				continue;
+			};
+
+			auth1_time = 0;
+			if(mifare_classic_authex(pcs, cuid, blockNo, keyType, ui64Key, AUTH_FIRST, &nt1, &auth1_time)) {
+				if (MF_DBGLEVEL >= 1)	Dbprintf("Nested: Auth1 error");
+				rtr--;
+				continue;
+			};
+
+			if (delta_time) {
+				auth2_time = auth1_time + delta_time;
+			} else {
+				auth2_time = 0;
+			}
+			if(mifare_classic_authex(pcs, cuid, blockNo, keyType, ui64Key, AUTH_NESTED, &nt2, &auth2_time)) {
+				if (MF_DBGLEVEL >= 1)	Dbprintf("Nested: Auth2 error");
+				rtr--;
+				continue;
+			};
+
+			nttmp = prng_successor(nt1, 500);
+			for (i = 501; i < 1200; i++) {
+				nttmp = prng_successor(nttmp, 1);
+				if (nttmp == nt2) break;
+			}
+
+			if (i != 1200) {
+				if (rtr != 0) {
+					davg += i;
+					dmin = MIN(dmin, i);
+					dmax = MAX(dmax, i);
+				}
+				else {
+					delta_time = auth2_time - auth1_time + 32;  // allow some slack for proper timing
+				}
+				if (MF_DBGLEVEL >= 3) Dbprintf("Nested: calibrating... ntdist=%d", i);
+			}
 		}
 		
-		if (i != 2000) {
-			davg += i;
-			if (dmin > i) dmin = i;
-			if (dmax < i) dmax = i;
-			if (MF_DBGLEVEL >= 4)	Dbprintf("r=%d nt1=%08x nt2=%08x distance=%d", rtr, nt1, nt2, i);
-		}
-	}
-	
-	if (rtr == 0)	return;
-
-	davg = davg / rtr;
-	if (MF_DBGLEVEL >= 3)	Dbprintf("distance: min=%d max=%d avg=%d", dmin, dmax, davg);
+		if (rtr <= 1)	return;
 
-	LED_B_OFF();
+		davg = (davg + (rtr - 1)/2) / (rtr - 1);
+		
+		if (MF_DBGLEVEL >= 3) Dbprintf("min=%d max=%d avg=%d, delta_time=%d", dmin, dmax, davg, delta_time);
 
+		dmin = davg - 2;
+		dmax = davg + 2;
+		
+		LED_B_OFF();
+	
+	}
 //  -------------------------------------------------------------------------------------------------	
 	
 	LED_C_ON();
 
 	//  get crypted nonces for target sector
-	for (rtr = 0; rtr < NS_RETRIES_GETNONCE; rtr++) {
-	if (MF_DBGLEVEL >= 4)			Dbprintf("------------------------------");
+	for(i=0; i < 2; i++) { // look for exactly two different nonces
 
-		FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
-    SpinDelay(100);
-    FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_READER_MOD);
-
-    // Test if the action was cancelled
-    if(BUTTON_PRESS()) {
-      break;
-    }
-
-		if(!iso14443a_select_card(uid, NULL, &cuid)) {
-			if (MF_DBGLEVEL >= 1)	Dbprintf("Can't select card");
-			break;
-		};
+		target_nt[i] = 0;
+		while(target_nt[i] == 0) { // continue until we have an unambiguous nonce
 		
-		if(mifare_classic_authex(pcs, cuid, blockNo, keyType, ui64Key, AUTH_FIRST, &nt1)) {
-			if (MF_DBGLEVEL >= 1)	Dbprintf("Auth1 error");
-			break;
-		};
+			// prepare next select. No need to power down the card.
+			if(mifare_classic_halt(pcs, cuid)) {
+				if (MF_DBGLEVEL >= 1)	Dbprintf("Nested: Halt error");
+				continue;
+			}
 
-		// nested authentication
-		len = mifare_sendcmd_shortex(pcs, AUTH_NESTED, 0x60 + (targetKeyType & 0x01), targetBlockNo, receivedAnswer, &par);
-		if (len != 4) {
-			if (MF_DBGLEVEL >= 1)	Dbprintf("Auth2 error len=%d", len);
-			break;
-		};
-	
-		nt2 = bytes_to_num(receivedAnswer, 4);		
-		if (MF_DBGLEVEL >= 4)	Dbprintf("r=%d nt1=%08x nt2enc=%08x nt2par=%08x", rtr, nt1, nt2, par);
-		
-		// Parity validity check
-		for (i = 0; i < 4; i++) {
-			par_array[i] = (oddparity(receivedAnswer[i]) != ((par & 0x08) >> 3));
-			par = par << 1;
-		}
+			if(!iso14443a_select_card(uid, NULL, &cuid)) {
+				if (MF_DBGLEVEL >= 1)	Dbprintf("Nested: Can't select card");
+				continue;
+			};
 		
-		ncount = 0;
-		nttest = prng_successor(nt1, dmin - NS_TOLERANCE);
-		for (m = dmin - NS_TOLERANCE + 1; m < dmax + NS_TOLERANCE; m++) {
-			nttest = prng_successor(nttest, 1);
-			ks1 = nt2 ^ nttest;
-
-			if (valid_nonce(nttest, nt2, ks1, par_array) && (ncount < 11)){
-				
-				nvector[NES_MAX_INFO][ncount].nt = nttest;
-				nvector[NES_MAX_INFO][ncount].ks1 = ks1;
-				ncount++;
-				nvectorcount[NES_MAX_INFO] = ncount;
-				if (MF_DBGLEVEL >= 4)	Dbprintf("valid m=%d ks1=%08x nttest=%08x", m, ks1, nttest);
-			}
+			auth1_time = 0;
+			if(mifare_classic_authex(pcs, cuid, blockNo, keyType, ui64Key, AUTH_FIRST, &nt1, &auth1_time)) {
+				if (MF_DBGLEVEL >= 1)	Dbprintf("Nested: Auth1 error");
+				continue;
+			};
 
-		}
+			// nested authentication
+			auth2_time = auth1_time + delta_time;
+			len = mifare_sendcmd_shortex(pcs, AUTH_NESTED, 0x60 + (targetKeyType & 0x01), targetBlockNo, receivedAnswer, &par, &auth2_time);
+			if (len != 4) {
+				if (MF_DBGLEVEL >= 1)	Dbprintf("Nested: Auth2 error len=%d", len);
+				continue;
+			};
 		
-		// select vector with length less than got
-		if (nvectorcount[NES_MAX_INFO] != 0) {
-			m = NES_MAX_INFO;
+			nt2 = bytes_to_num(receivedAnswer, 4);		
+			if (MF_DBGLEVEL >= 3) Dbprintf("Nonce#%d: Testing nt1=%08x nt2enc=%08x nt2par=%02x", i+1, nt1, nt2, par);
 			
-			for (i = 0; i < NES_MAX_INFO; i++)
-				if (nvectorcount[i] > 10) {
-					m = i;
-					break;
-				}
-				
-			if (m == NES_MAX_INFO)
-				for (i = 0; i < NES_MAX_INFO; i++)
-					if (nvectorcount[NES_MAX_INFO] < nvectorcount[i]) {
-						m = i;
+			// Parity validity check
+			for (j = 0; j < 4; j++) {
+				par_array[j] = (oddparity(receivedAnswer[j]) != ((par & 0x08) >> 3));
+				par = par << 1;
+			}
+			
+			ncount = 0;
+			nttest = prng_successor(nt1, dmin - 1);
+			for (j = dmin; j < dmax + 1; j++) {
+				nttest = prng_successor(nttest, 1);
+				ks1 = nt2 ^ nttest;
+
+				if (valid_nonce(nttest, nt2, ks1, par_array)){
+					if (ncount > 0) { 		// we are only interested in disambiguous nonces, try again
+						if (MF_DBGLEVEL >= 3) Dbprintf("Nonce#%d: dismissed (ambigous), ntdist=%d", i+1, j);
+						target_nt[i] = 0;
+						break;
+					}
+					target_nt[i] = nttest;
+					target_ks[i] = ks1;
+					ncount++;
+					if (i == 1 && target_nt[1] == target_nt[0]) { // we need two different nonces
+						target_nt[i] = 0;
+						if (MF_DBGLEVEL >= 3) Dbprintf("Nonce#2: dismissed (= nonce#1), ntdist=%d", j);
 						break;
 					}
-					
-			if (m != NES_MAX_INFO) {
-				for (i = 0; i < nvectorcount[m]; i++) {
-					nvector[m][i] = nvector[NES_MAX_INFO][i];
+					if (MF_DBGLEVEL >= 3) Dbprintf("Nonce#%d: valid, ntdist=%d", i+1, j);
 				}
-				nvectorcount[m] = nvectorcount[NES_MAX_INFO];
 			}
+			if (target_nt[i] == 0 && j == dmax+1 && MF_DBGLEVEL >= 3) Dbprintf("Nonce#%d: dismissed (all invalid)", i+1);
 		}
 	}
 
@@ -470,57 +481,26 @@ void MifareNested(uint32_t arg0, uint32_t arg1, uint32_t arg2, uint8_t *datain)
 	memset(uid, 0x44, 4);
 	LogTrace(uid, 4, 0, 0, TRUE);
 
-//  UsbCommand ack = {CMD_ACK, {0, 0, 0}};
-
-	for (i = 0; i < NES_MAX_INFO; i++) {
-		if (nvectorcount[i] > 10) continue;
-		
-		for (j = 0; j < nvectorcount[i]; j += 5) {
-			ncount = nvectorcount[i] - j;
-			if (ncount > 5) ncount = 5; 
-
-//			ack.arg[0] = 0; // isEOF = 0
-//			ack.arg[1] = ncount;
-//			ack.arg[2] = targetBlockNo + (targetKeyType * 0x100);
-//			memset(ack.d.asBytes, 0x00, sizeof(ack.d.asBytes));
-			
-      byte_t buf[48];
-      memset(buf, 0x00, sizeof(buf));
-			memcpy(buf, &cuid, 4);
-			for (m = 0; m < ncount; m++) {
-				memcpy(buf + 8 + m * 8 + 0, &nvector[i][m + j].nt, 4);
-				memcpy(buf + 8 + m * 8 + 4, &nvector[i][m + j].ks1, 4);
-			}
-	
-			LED_B_ON();
-      cmd_send(CMD_ACK,0,ncount,targetBlockNo + (targetKeyType * 0x100),buf,48);
-//			UsbSendPacket((uint8_t *)&ack, sizeof(UsbCommand));
-			LED_B_OFF();
-		}
-	}
-
-	// finalize list
-//	ack.arg[0] = 1; // isEOF = 1
-//	ack.arg[1] = 0;
-//	ack.arg[2] = 0;
-//	memset(ack.d.asBytes, 0x00, sizeof(ack.d.asBytes));
+	byte_t buf[4 + 4 * 4];
+	memcpy(buf, &cuid, 4);
+	memcpy(buf+4, &target_nt[0], 4);
+	memcpy(buf+8, &target_ks[0], 4);
+	memcpy(buf+12, &target_nt[1], 4);
+	memcpy(buf+16, &target_ks[1], 4);
 	
 	LED_B_ON();
-//	UsbSendPacket((uint8_t *)&ack, sizeof(UsbCommand));
-  cmd_send(CMD_ACK,1,0,0,0,0);
+	cmd_send(CMD_ACK, 0, 2, targetBlockNo + (targetKeyType * 0x100), buf, sizeof(buf));
 	LED_B_OFF();
 
-	if (MF_DBGLEVEL >= 4)	DbpString("NESTED FINISHED");
+	if (MF_DBGLEVEL >= 3)	DbpString("NESTED FINISHED");
 
-	// Thats it...
 	FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
 	LEDsoff();
-	
-  iso14a_set_tracing(TRUE);
+	iso14a_set_tracing(TRUE);
 }
 
 //-----------------------------------------------------------------------------
-// MIFARE check keys. key count up to 8. 
+// MIFARE check keys. key count up to 85. 
 // 
 //-----------------------------------------------------------------------------
 void MifareChkKeys(uint8_t arg0, uint8_t arg1, uint8_t arg2, uint8_t *datain)
@@ -546,7 +526,7 @@ void MifareChkKeys(uint8_t arg0, uint8_t arg1, uint8_t arg2, uint8_t *datain)
 	
 	// clear trace
 	iso14a_clear_trace();
-  iso14a_set_tracing(TRUE);
+	iso14a_set_tracing(TRUE);
 
 	iso14443a_setup();
 
@@ -554,14 +534,20 @@ void MifareChkKeys(uint8_t arg0, uint8_t arg1, uint8_t arg2, uint8_t *datain)
 	LED_B_OFF();
 	LED_C_OFF();
 
-	SpinDelay(300);
+//	SpinDelay(300);
 	for (i = 0; i < keyCount; i++) {
-		FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
-    SpinDelay(100);
-    FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_READER_MOD);
+//		FpgaWriteConfWord(FPGA_MAJOR_MODE_OFF);
+//		SpinDelay(100);
+//		FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_READER_MOD);
+		// prepare next select by sending a HALT. There is no need to power down the card.
+		if(mifare_classic_halt(pcs, cuid)) {
+			if (MF_DBGLEVEL >= 1)	Dbprintf("ChkKeys: Halt error");
+		}
 
+		// SpinDelay(50);
+		
 		if(!iso14443a_select_card(uid, NULL, &cuid)) {
-			if (OLD_MF_DBGLEVEL >= 1)	Dbprintf("Can't select card");
+			if (OLD_MF_DBGLEVEL >= 1)	Dbprintf("ChkKeys: Can't select card");
 			break;
 		};
 
@@ -581,12 +567,8 @@ void MifareChkKeys(uint8_t arg0, uint8_t arg1, uint8_t arg2, uint8_t *datain)
 	memset(uid, 0x44, 4);
 	LogTrace(uid, 4, 0, 0, TRUE);
 
-//	UsbCommand ack = {CMD_ACK, {isOK, 0, 0}};
-//	if (isOK) memcpy(ack.d.asBytes, datain + i * 6, 6);
-	
 	LED_B_ON();
     cmd_send(CMD_ACK,isOK,0,0,datain + i * 6,6);
-//	UsbSendPacket((uint8_t *)&ack, sizeof(UsbCommand));
 	LED_B_OFF();
 
   // Thats it...
@@ -799,13 +781,13 @@ void MifareCSetBlock(uint32_t arg0, uint32_t arg1, uint32_t arg2, uint8_t *datai
 	
 		// reset chip
 		if (needWipe){
-      ReaderTransmitBitsPar(wupC1,7,0);
+      ReaderTransmitBitsPar(wupC1,7,0, NULL);
 			if(!ReaderReceive(receivedAnswer) || (receivedAnswer[0] != 0x0a)) {
 				if (MF_DBGLEVEL >= 1)	Dbprintf("wupC1 error");
 				break;
 			};
 
-			ReaderTransmit(wipeC, sizeof(wipeC));
+			ReaderTransmit(wipeC, sizeof(wipeC), NULL);
 			if(!ReaderReceive(receivedAnswer) || (receivedAnswer[0] != 0x0a)) {
 				if (MF_DBGLEVEL >= 1)	Dbprintf("wipeC error");
 				break;
@@ -819,20 +801,20 @@ void MifareCSetBlock(uint32_t arg0, uint32_t arg1, uint32_t arg2, uint8_t *datai
 
 		// write block
 		if (workFlags & 0x02) {
-      ReaderTransmitBitsPar(wupC1,7,0);
+      ReaderTransmitBitsPar(wupC1,7,0, NULL);
 			if(!ReaderReceive(receivedAnswer) || (receivedAnswer[0] != 0x0a)) {
 				if (MF_DBGLEVEL >= 1)	Dbprintf("wupC1 error");
 				break;
 			};
 
-			ReaderTransmit(wupC2, sizeof(wupC2));
+			ReaderTransmit(wupC2, sizeof(wupC2), NULL);
 			if(!ReaderReceive(receivedAnswer) || (receivedAnswer[0] != 0x0a)) {
 				if (MF_DBGLEVEL >= 1)	Dbprintf("wupC2 error");
 				break;
 			};
 		}
 
-		if ((mifare_sendcmd_short(NULL, 0, 0xA0, blockNo, receivedAnswer) != 1) || (receivedAnswer[0] != 0x0a)) {
+		if ((mifare_sendcmd_short(NULL, 0, 0xA0, blockNo, receivedAnswer, NULL) != 1) || (receivedAnswer[0] != 0x0a)) {
 			if (MF_DBGLEVEL >= 1)	Dbprintf("write block send command error");
 			break;
 		};
@@ -840,7 +822,7 @@ void MifareCSetBlock(uint32_t arg0, uint32_t arg1, uint32_t arg2, uint8_t *datai
 		memcpy(d_block, datain, 16);
 		AppendCrc14443a(d_block, 16);
 	
-		ReaderTransmit(d_block, sizeof(d_block));
+		ReaderTransmit(d_block, sizeof(d_block), NULL);
 		if ((ReaderReceive(receivedAnswer) != 1) || (receivedAnswer[0] != 0x0a)) {
 			if (MF_DBGLEVEL >= 1)	Dbprintf("write block send data error");
 			break;
@@ -923,13 +905,13 @@ void MifareCGetBlock(uint32_t arg0, uint32_t arg1, uint32_t arg2, uint8_t *datai
 
 	while (true) {
 		if (workFlags & 0x02) {
-      ReaderTransmitBitsPar(wupC1,7,0);
+      ReaderTransmitBitsPar(wupC1,7,0, NULL);
 			if(!ReaderReceive(receivedAnswer) || (receivedAnswer[0] != 0x0a)) {
 				if (MF_DBGLEVEL >= 1)	Dbprintf("wupC1 error");
 				break;
 			};
 
-			ReaderTransmit(wupC2, sizeof(wupC2));
+			ReaderTransmit(wupC2, sizeof(wupC2), NULL);
 			if(!ReaderReceive(receivedAnswer) || (receivedAnswer[0] != 0x0a)) {
 				if (MF_DBGLEVEL >= 1)	Dbprintf("wupC2 error");
 				break;
@@ -937,7 +919,7 @@ void MifareCGetBlock(uint32_t arg0, uint32_t arg1, uint32_t arg2, uint8_t *datai
 		}
 
 		// read block
-		if ((mifare_sendcmd_short(NULL, 0, 0x30, blockNo, receivedAnswer) != 18)) {
+		if ((mifare_sendcmd_short(NULL, 0, 0x30, blockNo, receivedAnswer, NULL) != 18)) {
 			if (MF_DBGLEVEL >= 1)	Dbprintf("read block send command error");
 			break;
 		};
diff --git a/armsrc/mifareutil.c b/armsrc/mifareutil.c
index 085531f4..4f4e978c 100644
--- a/armsrc/mifareutil.c
+++ b/armsrc/mifareutil.c
@@ -77,12 +77,12 @@ uint8_t mf_crypto1_encrypt4bit(struct Crypto1State *pcs, uint8_t data) {
 }
 
 // send commands
-int mifare_sendcmd_short(struct Crypto1State *pcs, uint8_t crypted, uint8_t cmd, uint8_t data, uint8_t* answer)
+int mifare_sendcmd_short(struct Crypto1State *pcs, uint8_t crypted, uint8_t cmd, uint8_t data, uint8_t* answer, uint32_t *timing)
 {
-	return mifare_sendcmd_shortex(pcs, crypted, cmd, data, answer, NULL);
+	return mifare_sendcmd_shortex(pcs, crypted, cmd, data, answer, NULL, timing);
 }
 
-int mifare_sendcmd_shortex(struct Crypto1State *pcs, uint8_t crypted, uint8_t cmd, uint8_t data, uint8_t* answer, uint32_t * parptr)
+int mifare_sendcmd_shortex(struct Crypto1State *pcs, uint8_t crypted, uint8_t cmd, uint8_t data, uint8_t* answer, uint32_t * parptr, uint32_t *timing)
 {
 	uint8_t dcmd[4], ecmd[4];
 	uint32_t pos, par, res;
@@ -101,10 +101,10 @@ int mifare_sendcmd_shortex(struct Crypto1State *pcs, uint8_t crypted, uint8_t cm
 			par = (par >> 1) | ( ((filter(pcs->odd) ^ oddparity(dcmd[pos])) & 0x01) * 0x08 );
 		}	
 
-		ReaderTransmitPar(ecmd, sizeof(ecmd), par);
+		ReaderTransmitPar(ecmd, sizeof(ecmd), par, timing);
 
 	} else {
-		ReaderTransmit(dcmd, sizeof(dcmd));
+		ReaderTransmit(dcmd, sizeof(dcmd), timing);
 	}
 
 	int len = ReaderReceivePar(answer, &par);
@@ -133,10 +133,10 @@ int mifare_sendcmd_shortex(struct Crypto1State *pcs, uint8_t crypted, uint8_t cm
 // mifare commands
 int mifare_classic_auth(struct Crypto1State *pcs, uint32_t uid, uint8_t blockNo, uint8_t keyType, uint64_t ui64Key, uint64_t isNested) 
 {
-	return mifare_classic_authex(pcs, uid, blockNo, keyType, ui64Key, isNested, NULL);
+	return mifare_classic_authex(pcs, uid, blockNo, keyType, ui64Key, isNested, NULL, NULL);
 }
 
-int mifare_classic_authex(struct Crypto1State *pcs, uint32_t uid, uint8_t blockNo, uint8_t keyType, uint64_t ui64Key, uint64_t isNested, uint32_t * ntptr) 
+int mifare_classic_authex(struct Crypto1State *pcs, uint32_t uid, uint8_t blockNo, uint8_t keyType, uint64_t ui64Key, uint64_t isNested, uint32_t * ntptr, uint32_t *timing) 
 {
 	// variables
 	int len;	
@@ -150,8 +150,8 @@ int mifare_classic_authex(struct Crypto1State *pcs, uint32_t uid, uint8_t blockN
 	uint8_t* receivedAnswer = mifare_get_bigbufptr();
 
 	// Transmit MIFARE_CLASSIC_AUTH
-	len = mifare_sendcmd_short(pcs, isNested, 0x60 + (keyType & 0x01), blockNo, receivedAnswer);
-  if (MF_DBGLEVEL >= 4)	Dbprintf("rand nonce len: %x", len);  
+	len = mifare_sendcmd_short(pcs, isNested, 0x60 + (keyType & 0x01), blockNo, receivedAnswer, timing);
+	if (MF_DBGLEVEL >= 4)	Dbprintf("rand nonce len: %x", len);  
 	if (len != 4) return 1;
 	
 	ar[0] = 0x55;
@@ -205,7 +205,7 @@ int mifare_classic_authex(struct Crypto1State *pcs, uint32_t uid, uint8_t blockN
 	}	
 		
 	// Transmit reader nonce and reader answer
-	ReaderTransmitPar(mf_nr_ar, sizeof(mf_nr_ar), par);
+	ReaderTransmitPar(mf_nr_ar, sizeof(mf_nr_ar), par, NULL);
 
 	// Receive 4 bit answer
 	len = ReaderReceive(receivedAnswer);
@@ -235,7 +235,7 @@ int mifare_classic_readblock(struct Crypto1State *pcs, uint32_t uid, uint8_t blo
 	uint8_t* receivedAnswer = mifare_get_bigbufptr();
 	
 	// command MIFARE_CLASSIC_READBLOCK
-	len = mifare_sendcmd_short(pcs, 1, 0x30, blockNo, receivedAnswer);
+	len = mifare_sendcmd_short(pcs, 1, 0x30, blockNo, receivedAnswer, NULL);
 	if (len == 1) {
 		if (MF_DBGLEVEL >= 1)	Dbprintf("Cmd Error: %02x", receivedAnswer[0]);  
 		return 1;
@@ -268,7 +268,7 @@ int mifare_classic_writeblock(struct Crypto1State *pcs, uint32_t uid, uint8_t bl
 	uint8_t* receivedAnswer = mifare_get_bigbufptr();
 	
 	// command MIFARE_CLASSIC_WRITEBLOCK
-	len = mifare_sendcmd_short(pcs, 1, 0xA0, blockNo, receivedAnswer);
+	len = mifare_sendcmd_short(pcs, 1, 0xA0, blockNo, receivedAnswer, NULL);
 
 	if ((len != 1) || (receivedAnswer[0] != 0x0A)) {   //  0x0a - ACK
 		if (MF_DBGLEVEL >= 1)	Dbprintf("Cmd Error: %02x", receivedAnswer[0]);  
@@ -286,7 +286,7 @@ int mifare_classic_writeblock(struct Crypto1State *pcs, uint32_t uid, uint8_t bl
 		par = (par >> 1) | ( ((filter(pcs->odd) ^ oddparity(d_block[pos])) & 0x01) * 0x20000 );
 	}	
 
-	ReaderTransmitPar(d_block_enc, sizeof(d_block_enc), par);
+	ReaderTransmitPar(d_block_enc, sizeof(d_block_enc), par, NULL);
 
 	// Receive the response
 	len = ReaderReceive(receivedAnswer);	
@@ -311,7 +311,7 @@ int mifare_classic_halt(struct Crypto1State *pcs, uint32_t uid)
 	// Mifare HALT
 	uint8_t* receivedAnswer = mifare_get_bigbufptr();
 
-	len = mifare_sendcmd_short(pcs, pcs == NULL ? 0:1, 0x50, 0x00, receivedAnswer);
+	len = mifare_sendcmd_short(pcs, pcs == NULL ? 0:1, 0x50, 0x00, receivedAnswer, NULL);
 	if (len != 0) {
 		if (MF_DBGLEVEL >= 1)	Dbprintf("halt error. response len: %x", len);  
 		return 1;
diff --git a/armsrc/mifareutil.h b/armsrc/mifareutil.h
index 8539a7de..d170f3c6 100644
--- a/armsrc/mifareutil.h
+++ b/armsrc/mifareutil.h
@@ -36,13 +36,6 @@
 
 extern int MF_DBGLEVEL;
 
-//mifare nested
-#define MEM_CHUNK        10000
-#define TRY_KEYS            50
-#define NS_TOLERANCE        10 //  [distance avg-value, distance avg+value]
-#define NS_RETRIES_GETNONCE 15
-#define NES_MAX_INFO         5
-
 //mifare emulator states
 #define MFEMUL_NOFIELD      0
 #define MFEMUL_IDLE         1
@@ -61,13 +54,13 @@ extern int MF_DBGLEVEL;
 
 //functions
 uint8_t* mifare_get_bigbufptr(void);
-int mifare_sendcmd_short(struct Crypto1State *pcs, uint8_t crypted, uint8_t cmd, uint8_t data, uint8_t* answer);
-int mifare_sendcmd_shortex(struct Crypto1State *pcs, uint8_t crypted, uint8_t cmd, uint8_t data, uint8_t* answer, uint32_t * parptr);
+int mifare_sendcmd_short(struct Crypto1State *pcs, uint8_t crypted, uint8_t cmd, uint8_t data, uint8_t* answer, uint32_t *timing);
+int mifare_sendcmd_shortex(struct Crypto1State *pcs, uint8_t crypted, uint8_t cmd, uint8_t data, uint8_t* answer, uint32_t * parptr, uint32_t *timing);
 
 int mifare_classic_auth(struct Crypto1State *pcs, uint32_t uid, \
 												uint8_t blockNo, uint8_t keyType, uint64_t ui64Key, uint64_t isNested);
 int mifare_classic_authex(struct Crypto1State *pcs, uint32_t uid, \
-													uint8_t blockNo, uint8_t keyType, uint64_t ui64Key, uint64_t isNested, uint32_t * ntptr);
+													uint8_t blockNo, uint8_t keyType, uint64_t ui64Key, uint64_t isNested, uint32_t * ntptr, uint32_t *timing);
 int mifare_classic_readblock(struct Crypto1State *pcs, uint32_t uid, uint8_t blockNo, uint8_t *blockData); 
 int mifare_classic_writeblock(struct Crypto1State *pcs, uint32_t uid, uint8_t blockNo, uint8_t *blockData);
 int mifare_classic_halt(struct Crypto1State *pcs, uint32_t uid); 
diff --git a/armsrc/util.c b/armsrc/util.c
index 9bea9e7e..dc18e5e3 100644
--- a/armsrc/util.c
+++ b/armsrc/util.c
@@ -11,6 +11,7 @@
 #include "proxmark3.h"
 #include "util.h"
 #include "string.h"
+#include "apps.h"
 
 size_t nbytes(size_t nbits) {
 	return (nbits/8)+((nbits%8)>0);
@@ -357,6 +358,14 @@ void StartCountMifare()
 	AT91C_BASE_TC0->TC_CCR = AT91C_TC_CLKEN;				// enable TC0
 	AT91C_BASE_TC1->TC_CCR = AT91C_TC_CLKEN;				// enable TC1
 	AT91C_BASE_TC2->TC_CCR = AT91C_TC_CLKEN;				// enable TC2
+
+	// activate the ISO14443 part of the FPGA. We need the clock and frame signals.
+	FpgaWriteConfWord(FPGA_MAJOR_MODE_HF_ISO14443A | FPGA_HF_ISO14443A_TAGSIM_LISTEN);
+
+	// synchronize the counter with the ssp_frame signal.
+	while(AT91C_BASE_PIOA->PIO_PDSR & GPIO_SSC_FRAME); 		// wait for ssp_frame to be low
+	while(!(AT91C_BASE_PIOA->PIO_PDSR & GPIO_SSC_FRAME)); 	// sync on rising edge of ssp_frame (= start of transfer)
+
 	AT91C_BASE_TCB->TCB_BCR = 1;							// assert Sync (set all timers to 0 on next active clock edge)
 }
 
diff --git a/client/cmdhfmf.c b/client/cmdhfmf.c
index 96eb8007..956bbc0e 100644
--- a/client/cmdhfmf.c
+++ b/client/cmdhfmf.c
@@ -500,7 +500,7 @@ int CmdHF14AMfNested(const char *Cmd)
 	uint8_t blDiff = 0;
 	int  SectorsCnt = 0;
 	uint8_t key[6] = {0, 0, 0, 0, 0, 0};
-	uint8_t keyBlock[16 * 6];
+	uint8_t keyBlock[6*6];
 	uint64_t key64 = 0;
 	int transferToEml = 0;
 	
@@ -572,20 +572,12 @@ int CmdHF14AMfNested(const char *Cmd)
 		PrintAndLog("--target block no:%02x target key type:%02x ", trgBlockNo, trgKeyType);
 
 	if (cmdp == 'o') {
-		if (mfnested(blockNo, keyType, key, trgBlockNo, trgKeyType, keyBlock)) {
+		if (mfnested(blockNo, keyType, key, trgBlockNo, trgKeyType, keyBlock, true)) {
 			PrintAndLog("Nested error.");
 			return 2;
 		}
-
-		for (i = 0; i < 16; i++) {
-			PrintAndLog("count=%d key= %s", i, sprint_hex(keyBlock + i * 6, 6));
-		}
-	
-		// test keys
-		res = mfCheckKeys(trgBlockNo, trgKeyType, 8, keyBlock, &key64);
-		if (res)
-			res = mfCheckKeys(trgBlockNo, trgKeyType, 8, &keyBlock[6 * 8], &key64);
-		if (!res) {
+		key64 = bytes_to_num(keyBlock, 6);
+		if (key64) {
 			PrintAndLog("Found valid key:%012"llx, key64);
 
 			// transfer key to the emulator
@@ -603,6 +595,9 @@ int CmdHF14AMfNested(const char *Cmd)
 		}
 	}
 	else { // ------------------------------------  multiple sectors working
+		clock_t time1;
+		time1 = clock();
+
 		blDiff = blockNo % 4;
 		PrintAndLog("Block shift=%d", blDiff);
 		e_sector = calloc(SectorsCnt, sizeof(sector));
@@ -610,10 +605,10 @@ int CmdHF14AMfNested(const char *Cmd)
 		
 		//test current key 4 sectors
 		memcpy(keyBlock, key, 6);
-		num_to_bytes(0xa0a1a2a3a4a5, 6, (uint8_t*)(keyBlock + 1 * 6));
-		num_to_bytes(0xb0b1b2b3b4b5, 6, (uint8_t*)(keyBlock + 2 * 6));
-		num_to_bytes(0xffffffffffff, 6, (uint8_t*)(keyBlock + 3 * 6));
-		num_to_bytes(0x000000000000, 6, (uint8_t*)(keyBlock + 4 * 6));
+		num_to_bytes(0xffffffffffff, 6, (uint8_t*)(keyBlock + 1 * 6));
+		num_to_bytes(0x000000000000, 6, (uint8_t*)(keyBlock + 2 * 6));
+		num_to_bytes(0xa0a1a2a3a4a5, 6, (uint8_t*)(keyBlock + 3 * 6));
+		num_to_bytes(0xb0b1b2b3b4b5, 6, (uint8_t*)(keyBlock + 4 * 6));
 		num_to_bytes(0xaabbccddeeff, 6, (uint8_t*)(keyBlock + 5 * 6));
 
 		PrintAndLog("Testing known keys. Sector count=%d", SectorsCnt);
@@ -628,32 +623,41 @@ int CmdHF14AMfNested(const char *Cmd)
 					e_sector[i].foundKey[j] = 1;
 				}
 			}
-		} 
+		}
+		
 		
 		// nested sectors
 		iterations = 0;
 		PrintAndLog("nested...");
+		bool calibrate = true;
 		for (i = 0; i < NESTED_SECTOR_RETRY; i++) {
-			for (trgBlockNo = blDiff; trgBlockNo < SectorsCnt * 4; trgBlockNo = trgBlockNo + 4) 
+			for (trgBlockNo = blDiff; trgBlockNo < SectorsCnt * 4; trgBlockNo = trgBlockNo + 4) {
 				for (trgKeyType = 0; trgKeyType < 2; trgKeyType++) { 
 					if (e_sector[trgBlockNo / 4].foundKey[trgKeyType]) continue;
-					if (mfnested(blockNo, keyType, key, trgBlockNo, trgKeyType, keyBlock)) continue;
+					PrintAndLog("-----------------------------------------------");
+					if(mfnested(blockNo, keyType, key, trgBlockNo, trgKeyType, keyBlock, calibrate)) {
+						PrintAndLog("Nested error.\n");
+						return 2;
+					}
+					else {
+						calibrate = false;
+					}
 					
 					iterations++;
-					
-					//try keys from nested
-					res = mfCheckKeys(trgBlockNo, trgKeyType, 8, keyBlock, &key64);
-					if (res)
-						res = mfCheckKeys(trgBlockNo, trgKeyType, 8, &keyBlock[6 * 8], &key64);
-					if (!res) {
+
+					key64 = bytes_to_num(keyBlock, 6);
+					if (key64) {
 						PrintAndLog("Found valid key:%012"llx, key64);
 						e_sector[trgBlockNo / 4].foundKey[trgKeyType] = 1;
 						e_sector[trgBlockNo / 4].Key[trgKeyType] = key64;
 					}
 				}
+			}
 		}
 
-		PrintAndLog("Iterations count: %d", iterations);
+		printf("Time in nested: %1.3f (%1.3f sec per key)\n\n", ((float)clock() - time1)/1000.0, ((float)clock() - time1)/iterations/1000.0);
+		
+		PrintAndLog("-----------------------------------------------\nIterations count: %d\n\n", iterations);
 		//print them
 		PrintAndLog("|---|----------------|---|----------------|---|");
 		PrintAndLog("|sec|key A           |res|key B           |res|");
@@ -830,16 +834,16 @@ int CmdHF14AMfChk(const char *Cmd)
 				while( !feof(f) ){
 					memset(buf, 0, sizeof(buf));
 					if (fgets(buf, sizeof(buf), f) == NULL) {
-            PrintAndLog("File reading error.");
-            return 2;
-          }
+						PrintAndLog("File reading error.");
+						return 2;
+					}
           
 					if (strlen(buf) < 12 || buf[11] == '\n')
 						continue;
 				
 					while (fgetc(f) != '\n' && !feof(f)) ;  //goto next line
 					
-					if( buf[0]=='#' ) continue;	//The line start with # is remcommnet,skip
+					if( buf[0]=='#' ) continue;	//The line start with # is comment, skip
 
 					if (!isxdigit(buf[0])){
 						PrintAndLog("File content error. '%s' must include 12 HEX symbols",buf);
@@ -883,10 +887,10 @@ int CmdHF14AMfChk(const char *Cmd)
 		int b=blockNo;
 		for (int i=0; i<SectorsCnt; ++i) {
 			PrintAndLog("--SectorsCnt:%d block no:0x%02x key type:%C key count:%d ", i,	 b, t?'B':'A', keycnt);
-			int size = keycnt>8?8:keycnt;
-			for (int c = 0; c < keycnt; c+=size) {
-				size=keycnt-c>8?8:keycnt-c;			
-				res = mfCheckKeys(b, t, size, keyBlock +6*c, &key64);
+			uint32_t max_keys = keycnt>USB_CMD_DATA_SIZE/6?USB_CMD_DATA_SIZE/6:keycnt;
+			for (uint32_t c = 0; c < keycnt; c+=max_keys) {
+				uint32_t size = keycnt-c>max_keys?max_keys:keycnt-c;
+				res = mfCheckKeys(b, t, size, &keyBlock[6*c], &key64);
 				if (res !=1) {
 					if (!res) {
 						PrintAndLog("Found valid key:[%012"llx"]",key64);
@@ -896,11 +900,6 @@ int CmdHF14AMfChk(const char *Cmd)
 							num_to_bytes(key64, 6, block + t*10);
 							mfEmlSetMem(block, get_trailer_block(b), 1);
 						}
-						break;
-					}
-					else {
-						printf("Not found yet, keycnt:%d\r", c+size);
-						fflush(stdout);
 					}
 				} else {
 					PrintAndLog("Command execute timeout");
diff --git a/client/mifarehost.c b/client/mifarehost.c
index 9676e6f7..03951e2d 100644
--- a/client/mifarehost.c
+++ b/client/mifarehost.c
@@ -11,181 +11,204 @@
 #include <stdio.h>
 #include <stdlib.h> 
 #include <string.h>
+#include <pthread.h>
 #include "mifarehost.h"
 #include "proxmark3.h"
 
 // MIFARE
 int compar_int(const void * a, const void * b) {
-	return (*(uint64_t*)b - *(uint64_t*)a);
+	// didn't work: (the result is truncated to 32 bits)
+	//return (*(uint64_t*)b - *(uint64_t*)a);
+
+	// better:
+	if (*(uint64_t*)b == *(uint64_t*)a) return 0;
+	else if (*(uint64_t*)b > *(uint64_t*)a) return 1;
+	else return -1;
 }
 
-// Compare countKeys structure
-int compar_special_int(const void * a, const void * b) {
-	return (((countKeys *)b)->count - ((countKeys *)a)->count);
+
+
+// Compare 16 Bits out of cryptostate
+int Compare16Bits(const void * a, const void * b) {
+	if ((*(uint64_t*)b & 0x00ff000000ff0000) == (*(uint64_t*)a & 0x00ff000000ff0000)) return 0;
+	else if ((*(uint64_t*)b & 0x00ff000000ff0000) > (*(uint64_t*)a & 0x00ff000000ff0000)) return 1;
+	else return -1;
 }
 
-countKeys * uniqsort(uint64_t * possibleKeys, uint32_t size) {
-	int i, j = 0;
-	int count = 0;
-	countKeys *our_counts;
-	
-	qsort(possibleKeys, size, sizeof (uint64_t), compar_int);
-	
-	our_counts = calloc(size, sizeof(countKeys));
-	if (our_counts == NULL) {
-		PrintAndLog("Memory allocation error for our_counts");
-		return NULL;
-	}
+
+typedef 
+	struct {
+		union {
+			struct Crypto1State *slhead;
+			uint64_t *keyhead;
+		};
+		union {
+			struct Crypto1State *sltail;
+			uint64_t *keytail;
+		};
+		uint32_t len;
+		uint32_t uid;
+		uint32_t blockNo;
+		uint32_t keyType;
+		uint32_t nt;
+		uint32_t ks1;
+	} StateList_t;
+
+
+// wrapper function for multi-threaded lfsr_recovery32
+void* nested_worker_thread(void *arg)
+{
+	struct Crypto1State *p1;
+	StateList_t *statelist = arg;
+
+	statelist->slhead = lfsr_recovery32(statelist->ks1, statelist->nt ^ statelist->uid);
+	for (p1 = statelist->slhead; *(uint64_t *)p1 != 0; p1++);
+	statelist->len = p1 - statelist->slhead;
+	statelist->sltail = --p1;
+	qsort(statelist->slhead, statelist->len, sizeof(uint64_t), Compare16Bits);
 	
-	for (i = 0; i < size; i++) {
-        if (possibleKeys[i+1] == possibleKeys[i]) { 
-			count++;
-		} else {
-			our_counts[j].key = possibleKeys[i];
-			our_counts[j].count = count;
-			j++;
-			count=0;
-		}
-	}
-	qsort(our_counts, j, sizeof(countKeys), compar_special_int);
-	return (our_counts);
+	return statelist->slhead;
 }
 
-int mfnested(uint8_t blockNo, uint8_t keyType, uint8_t * key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t * resultKeys) 
+
+
+
+int mfnested(uint8_t blockNo, uint8_t keyType, uint8_t * key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t * resultKey, bool calibrate) 
 {
-	int i, m, len;
-	uint8_t isEOF;
+	uint16_t i, len;
 	uint32_t uid;
-	fnVector * vector = NULL;
-	countKeys	*ck;
-	int lenVector = 0;
 	UsbCommand resp;
-	
-	memset(resultKeys, 0x00, 16 * 6);
 
+	
+	StateList_t statelists[2];
+	struct Crypto1State *p1, *p2, *p3, *p4;
+	
 	// flush queue
 	WaitForResponseTimeout(CMD_ACK,NULL,100);
 	
-  UsbCommand c = {CMD_MIFARE_NESTED, {blockNo, keyType, trgBlockNo + trgKeyType * 0x100}};
+	UsbCommand c = {CMD_MIFARE_NESTED, {blockNo + keyType * 0x100, trgBlockNo + trgKeyType * 0x100, calibrate}};
 	memcpy(c.d.asBytes, key, 6);
-  SendCommand(&c);
-
-	PrintAndLog("\n");
-
-	// wait cycle
-	while (true) {
-		printf(".");
-		if (ukbhit()) {
-			getchar();
-			printf("\naborted via keyboard!\n");
-			break;
-		}
-
-		if (WaitForResponseTimeout(CMD_ACK,&resp,1500)) {
-			isEOF  = resp.arg[0] & 0xff;
+	SendCommand(&c);
 
-			if (isEOF) break;
-			
-			len = resp.arg[1] & 0xff;
-			if (len == 0) continue;
-			
+	if (WaitForResponseTimeout(CMD_ACK,&resp,1500)) {
+		len = resp.arg[1];
+		if (len == 2) {	
 			memcpy(&uid, resp.d.asBytes, 4);
-			PrintAndLog("uid:%08x len=%d trgbl=%d trgkey=%x", uid, len, resp.arg[2] & 0xff, (resp.arg[2] >> 8) & 0xff);
-			vector = (fnVector *) realloc((void *)vector, (lenVector + len) * sizeof(fnVector) + 200);
-			if (vector == NULL) {
-				PrintAndLog("Memory allocation error for fnVector. len: %d bytes: %d", lenVector + len, (lenVector + len) * sizeof(fnVector)); 
-				break;
-			}
+			PrintAndLog("uid:%08x len=%d trgbl=%d trgkey=%x", uid, len, (uint16_t)resp.arg[2] & 0xff, (uint16_t)resp.arg[2] >> 8);
 			
-			for (i = 0; i < len; i++) {
-				vector[lenVector + i].blockNo = resp.arg[2] & 0xff;
-				vector[lenVector + i].keyType = (resp.arg[2] >> 8) & 0xff;
-				vector[lenVector + i].uid = uid;
+			for (i = 0; i < 2; i++) {
+				statelists[i].blockNo = resp.arg[2] & 0xff;
+				statelists[i].keyType = (resp.arg[2] >> 8) & 0xff;
+				statelists[i].uid = uid;
 
-				memcpy(&vector[lenVector + i].nt,  (void *)(resp.d.asBytes + 8 + i * 8 + 0), 4);
-				memcpy(&vector[lenVector + i].ks1, (void *)(resp.d.asBytes + 8 + i * 8 + 4), 4);
+				memcpy(&statelists[i].nt,  (void *)(resp.d.asBytes + 4 + i * 8 + 0), 4);
+				memcpy(&statelists[i].ks1, (void *)(resp.d.asBytes + 4 + i * 8 + 4), 4);
 			}
-
-			lenVector += len;
+		}
+		else {
+			PrintAndLog("Got 0 keys from proxmark."); 
+			return 1;
 		}
 	}
 	
-	if (!lenVector) {
-		PrintAndLog("Got 0 keys from proxmark."); 
-		return 1;
-	}
-	printf("------------------------------------------------------------------\n");
-	
 	// calc keys
-	struct Crypto1State* revstate = NULL;
-	struct Crypto1State* revstate_start = NULL;
-	uint64_t lfsr;
-	int kcount = 0;
-	pKeys		*pk;
-	
-	if ((pk = (void *) malloc(sizeof(pKeys))) == NULL) return 1;
-	memset(pk, 0x00, sizeof(pKeys));
 	
-	for (m = 0; m < lenVector; m++) {
-		// And finally recover the first 32 bits of the key
-		revstate = lfsr_recovery32(vector[m].ks1, vector[m].nt ^ vector[m].uid);
-		if (revstate_start == NULL) revstate_start = revstate;
+	pthread_t thread_id[2];
+		
+	// create and run worker threads
+	for (i = 0; i < 2; i++) {
+		pthread_create(thread_id + i, NULL, nested_worker_thread, &statelists[i]);
+	}
 	
-		while ((revstate->odd != 0x0) || (revstate->even != 0x0)) {
-			lfsr_rollback_word(revstate, vector[m].nt ^ vector[m].uid, 0);
-			crypto1_get_lfsr(revstate, &lfsr);
+	// wait for threads to terminate:
+	for (i = 0; i < 2; i++) {
+		pthread_join(thread_id[i], (void*)&statelists[i].slhead);
+	}
+
 
-			// Allocate a new space for keys
-			if (((kcount % MEM_CHUNK) == 0) || (kcount >= pk->size)) {
-				pk->size += MEM_CHUNK;
-//fprintf(stdout, "New chunk by %d, sizeof %d\n", kcount, pk->size * sizeof(uint64_t));
-				pk->possibleKeys = (uint64_t *) realloc((void *)pk->possibleKeys, pk->size * sizeof(uint64_t));
-				if (pk->possibleKeys == NULL) {
-					PrintAndLog("Memory allocation error for pk->possibleKeys"); 
-					return 1;
-				}
+	// the first 16 Bits of the cryptostate already contain part of our key.
+	// Create the intersection of the two lists based on these 16 Bits and
+	// roll back the cryptostate
+	p1 = p3 = statelists[0].slhead; 
+	p2 = p4 = statelists[1].slhead;
+	while (p1 <= statelists[0].sltail && p2 <= statelists[1].sltail) {
+		if (Compare16Bits(p1, p2) == 0) {
+			struct Crypto1State savestate, *savep = &savestate;
+			savestate = *p1;
+			while(Compare16Bits(p1, savep) == 0 && p1 <= statelists[0].sltail) {
+				*p3 = *p1;
+				lfsr_rollback_word(p3, statelists[0].nt ^ statelists[0].uid, 0);
+				p3++;
+				p1++;
+			}
+			savestate = *p2;
+			while(Compare16Bits(p2, savep) == 0 && p2 <= statelists[1].sltail) {
+				*p4 = *p2;
+				lfsr_rollback_word(p4, statelists[1].nt ^ statelists[1].uid, 0);
+				p4++;
+				p2++;
 			}
-			pk->possibleKeys[kcount] = lfsr;
-			kcount++;
-			revstate++;
 		}
-	free(revstate_start);
-	revstate_start = NULL;
-
+		else {
+			while (Compare16Bits(p1, p2) == -1) p1++;
+			while (Compare16Bits(p1, p2) == 1) p2++;
+		}
 	}
-	
-	// Truncate
-	if (kcount != 0) {
-		pk->size = --kcount;
-		if ((pk->possibleKeys = (uint64_t *) realloc((void *)pk->possibleKeys, pk->size * sizeof(uint64_t))) == NULL) {
-			PrintAndLog("Memory allocation error for pk->possibleKeys"); 
-			return 1;
-		}		
+	p3->even = 0; p3->odd = 0;
+	p4->even = 0; p4->odd = 0;
+	statelists[0].len = p3 - statelists[0].slhead;
+	statelists[1].len = p4 - statelists[1].slhead;
+	statelists[0].sltail=--p3;
+	statelists[1].sltail=--p4;
+
+	// the statelists now contain possible keys. The key we are searching for must be in the
+	// intersection of both lists. Create the intersection:
+	qsort(statelists[0].keyhead, statelists[0].len, sizeof(uint64_t), compar_int);
+	qsort(statelists[1].keyhead, statelists[1].len, sizeof(uint64_t), compar_int);
+
+	uint64_t *p5, *p6, *p7;
+	p5 = p7 = statelists[0].keyhead; 
+	p6 = statelists[1].keyhead;
+	while (p5 <= statelists[0].keytail && p6 <= statelists[1].keytail) {
+		if (compar_int(p5, p6) == 0) {
+			*p7++ = *p5++;
+			p6++;
+		}
+		else {
+			while (compar_int(p5, p6) == -1) p5++;
+			while (compar_int(p5, p6) == 1) p6++;
+		}
 	}
-
-	PrintAndLog("Total keys count:%d", kcount);
-	ck = uniqsort(pk->possibleKeys, pk->size);
-
-	// fill key array
-	for (i = 0; i < 16 ; i++) {
-		num_to_bytes(ck[i].key, 6, (uint8_t*)(resultKeys + i * 6));
+	statelists[0].len = p7 - statelists[0].keyhead;
+	statelists[0].keytail=--p7;
+
+	memset(resultKey, 0, 6);
+	// The list may still contain several key candidates. Test each of them with mfCheckKeys
+	for (i = 0; i < statelists[0].len; i++) {
+		uint8_t keyBlock[6];
+		uint64_t key64;
+		crypto1_get_lfsr(statelists[0].slhead + i, &key64);
+		num_to_bytes(key64, 6, keyBlock);
+		key64 = 0;
+		if (!mfCheckKeys(statelists[0].blockNo, statelists[0].keyType, 1, keyBlock, &key64)) {
+			num_to_bytes(key64, 6, resultKey);
+			break;
+		}
 	}
-
-	// finalize
-	free(pk->possibleKeys);
-	free(pk);
-	free(ck);
-	free(vector);
-
+	
+	free(statelists[0].slhead);
+	free(statelists[1].slhead);
+	
 	return 0;
 }
 
 int mfCheckKeys (uint8_t blockNo, uint8_t keyType, uint8_t keycnt, uint8_t * keyBlock, uint64_t * key){
+
 	*key = 0;
 
-  UsbCommand c = {CMD_MIFARE_CHKKEYS, {blockNo, keyType, keycnt}};
+	UsbCommand c = {CMD_MIFARE_CHKKEYS, {blockNo, keyType, keycnt}};
 	memcpy(c.d.asBytes, keyBlock, 6 * keycnt);
-  SendCommand(&c);
+	SendCommand(&c);
 
 	UsbCommand resp;
 	if (!WaitForResponseTimeout(CMD_ACK,&resp,3000)) return 1;
diff --git a/client/mifarehost.h b/client/mifarehost.h
index 9e026a55..5de082ce 100644
--- a/client/mifarehost.h
+++ b/client/mifarehost.h
@@ -43,26 +43,14 @@
 
 #define TRACE_ERROR		 					0xFF
 
-typedef struct fnVector { uint8_t blockNo, keyType; uint32_t uid, nt, ks1; } fnVector;
-
 typedef struct {
 	uint64_t Key[2];
 	int foundKey[2];
 } sector;
  
-typedef struct {
-        uint64_t        *possibleKeys;
-        uint32_t        size;
-} pKeys;
-
-typedef struct {
-        uint64_t        key;
-        int             count;
-} countKeys;
-
 extern char logHexFileName[200];
 
-int mfnested(uint8_t blockNo, uint8_t keyType, uint8_t * key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t * ResultKeys);
+int mfnested(uint8_t blockNo, uint8_t keyType, uint8_t * key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t * ResultKeys, bool calibrate);
 int mfCheckKeys (uint8_t blockNo, uint8_t keyType, uint8_t keycnt, uint8_t * keyBlock, uint64_t * key);
 
 int mfEmlGetMem(uint8_t *data, int blockNum, int blocksCount);
diff --git a/client/nonce2key/crapto1.c b/client/nonce2key/crapto1.c
index c0a158b5..fc878a0d 100644
--- a/client/nonce2key/crapto1.c
+++ b/client/nonce2key/crapto1.c
@@ -31,6 +31,71 @@ static void __attribute__((constructor)) fill_lut()
 #define filter(x) (filterlut[(x) & 0xfffff])
 #endif
 
+
+
+typedef struct bucket {
+	uint32_t *head;
+	uint32_t *bp;
+} bucket_t;
+
+typedef bucket_t bucket_array_t[2][0x100];
+
+typedef struct bucket_info {
+	struct {
+		uint32_t *head, *tail;
+		} bucket_info[2][0x100];
+		uint32_t numbuckets;
+	} bucket_info_t;
+	
+
+static void bucket_sort_intersect(uint32_t* const estart, uint32_t* const estop,
+								  uint32_t* const ostart, uint32_t* const ostop,
+								  bucket_info_t *bucket_info, bucket_array_t bucket)
+{
+	uint32_t *p1, *p2;
+	uint32_t *start[2];
+	uint32_t *stop[2];
+	
+	start[0] = estart;
+	stop[0] = estop;
+	start[1] = ostart;
+	stop[1] = ostop;
+	
+	// init buckets to be empty
+	for (uint32_t i = 0; i < 2; i++) {
+		for (uint32_t j = 0x00; j <= 0xff; j++) {
+			bucket[i][j].bp = bucket[i][j].head;
+		}
+	}
+	
+	// sort the lists into the buckets based on the MSB (contribution bits)
+	for (uint32_t i = 0; i < 2; i++) { 
+		for (p1 = start[i]; p1 <= stop[i]; p1++) {
+			uint32_t bucket_index = (*p1 & 0xff000000) >> 24;
+			*(bucket[i][bucket_index].bp++) = *p1;
+		}
+	}
+
+	
+	// write back intersecting buckets as sorted list.
+	// fill in bucket_info with head and tail of the bucket contents in the list and number of non-empty buckets.
+	uint32_t nonempty_bucket;
+	for (uint32_t i = 0; i < 2; i++) {
+		p1 = start[i];
+		nonempty_bucket = 0;
+		for (uint32_t j = 0x00; j <= 0xff; j++) {
+			if (bucket[0][j].bp != bucket[0][j].head && bucket[1][j].bp != bucket[1][j].head) { // non-empty intersecting buckets only
+				bucket_info->bucket_info[i][nonempty_bucket].head = p1;
+				for (p2 = bucket[i][j].head; p2 < bucket[i][j].bp; *p1++ = *p2++);
+				bucket_info->bucket_info[i][nonempty_bucket].tail = p1 - 1;
+				nonempty_bucket++;
+			}
+		}
+		bucket_info->numbuckets = nonempty_bucket;
+		}
+}
+
+
 static void quicksort(uint32_t* const start, uint32_t* const stop)
 {
 	uint32_t *it = start + 1, *rit = stop;
@@ -54,6 +119,8 @@ static void quicksort(uint32_t* const start, uint32_t* const stop)
 	quicksort(start, rit - 1);
 	quicksort(rit + 1, stop);
 }
+
+
 /** binsearch
  * Binary search for the first occurence of *stop's MSB in sorted [start,stop]
  */
@@ -90,45 +157,55 @@ static inline void
 extend_table(uint32_t *tbl, uint32_t **end, int bit, int m1, int m2, uint32_t in)
 {
 	in <<= 24;
-	for(*tbl <<= 1; tbl <= *end; *++tbl <<= 1)
-		if(filter(*tbl) ^ filter(*tbl | 1)) {
-			*tbl |= filter(*tbl) ^ bit;
-			update_contribution(tbl, m1, m2);
-			*tbl ^= in;
-		} else if(filter(*tbl) == bit) {
-			*++*end = tbl[1];
-			tbl[1] = tbl[0] | 1;
-			update_contribution(tbl, m1, m2);
-			*tbl++ ^= in;
-			update_contribution(tbl, m1, m2);
-			*tbl ^= in;
-		} else
-			*tbl-- = *(*end)--;
+
+	for(uint32_t *p = tbl; p <= *end; p++) {
+		*p <<= 1;
+		if(filter(*p) != filter(*p | 1)) {			 	// replace
+			*p |= filter(*p) ^ bit;
+			update_contribution(p, m1, m2);
+			*p ^= in;
+		} else if(filter(*p) == bit) {					// insert
+			*++*end = p[1];
+			p[1] = p[0] | 1;
+			update_contribution(p, m1, m2);
+			*p++ ^= in;
+			update_contribution(p, m1, m2);
+			*p ^= in;
+		} else {										// drop
+			*p-- = *(*end)--;
+		} 
+	}
+	
 }
+
+
 /** extend_table_simple
  * using a bit of the keystream extend the table of possible lfsr states
  */
 static inline void
 extend_table_simple(uint32_t *tbl, uint32_t **end, int bit)
 {
-	for(*tbl <<= 1; tbl <= *end; *++tbl <<= 1)
-		if(filter(*tbl) ^ filter(*tbl | 1)) {
+	for(*tbl <<= 1; tbl <= *end; *++tbl <<= 1)	
+		if(filter(*tbl) ^ filter(*tbl | 1)) {	// replace
 			*tbl |= filter(*tbl) ^ bit;
-		} else if(filter(*tbl) == bit) {
+		} else if(filter(*tbl) == bit) {		// insert
 			*++*end = *++tbl;
 			*tbl = tbl[-1] | 1;
-		} else
+		} else									// drop
 			*tbl-- = *(*end)--;
 }
+
+
 /** recover
  * recursively narrow down the search space, 4 bits of keystream at a time
  */
 static struct Crypto1State*
 recover(uint32_t *o_head, uint32_t *o_tail, uint32_t oks,
 	uint32_t *e_head, uint32_t *e_tail, uint32_t eks, int rem,
-	struct Crypto1State *sl, uint32_t in)
+	struct Crypto1State *sl, uint32_t in, bucket_array_t bucket)
 {
-	uint32_t *o, *e, i;
+	uint32_t *o, *e;
+	bucket_info_t bucket_info;
 
 	if(rem == -1) {
 		for(e = e_head; e <= e_tail; ++e) {
@@ -136,13 +213,13 @@ recover(uint32_t *o_head, uint32_t *o_tail, uint32_t oks,
 			for(o = o_head; o <= o_tail; ++o, ++sl) {
 				sl->even = *o;
 				sl->odd = *e ^ parity(*o & LF_POLY_ODD);
-				sl[1].odd = sl[1].even = 0;
 			}
 		}
+		sl->odd = sl->even = 0;
 		return sl;
 	}
 
-	for(i = 0; i < 4 && rem--; i++) {
+	for(uint32_t i = 0; i < 4 && rem--; i++) {
 		extend_table(o_head, &o_tail, (oks >>= 1) & 1,
 			LF_POLY_EVEN << 1 | 1, LF_POLY_ODD << 1, 0);
 		if(o_head > o_tail)
@@ -154,21 +231,14 @@ recover(uint32_t *o_head, uint32_t *o_tail, uint32_t oks,
 			return sl;
 	}
 
-	quicksort(o_head, o_tail);
-	quicksort(e_head, e_tail);
-
-	while(o_tail >= o_head && e_tail >= e_head)
-		if(((*o_tail ^ *e_tail) >> 24) == 0) {
-			o_tail = binsearch(o_head, o = o_tail);
-			e_tail = binsearch(e_head, e = e_tail);
-			sl = recover(o_tail--, o, oks,
-				     e_tail--, e, eks, rem, sl, in);
-		}
-		else if(*o_tail > *e_tail)
-			o_tail = binsearch(o_head, o_tail) - 1;
-		else
-			e_tail = binsearch(e_head, e_tail) - 1;
-
+	bucket_sort_intersect(e_head, e_tail, o_head, o_tail, &bucket_info, bucket);
+	
+	for (int i = bucket_info.numbuckets - 1; i >= 0; i--) {
+		sl = recover(bucket_info.bucket_info[1][i].head, bucket_info.bucket_info[1][i].tail, oks,
+				     bucket_info.bucket_info[0][i].head, bucket_info.bucket_info[0][i].tail, eks,
+					 rem, sl, in, bucket);
+	}
+	
 	return sl;
 }
 /** lfsr_recovery
@@ -183,6 +253,7 @@ struct Crypto1State* lfsr_recovery32(uint32_t ks2, uint32_t in)
 	uint32_t *even_head = 0, *even_tail = 0, eks = 0;
 	int i;
 
+	// split the keystream into an odd and even part
 	for(i = 31; i >= 0; i -= 2)
 		oks = oks << 1 | BEBIT(ks2, i);
 	for(i = 30; i >= 0; i -= 2)
@@ -191,11 +262,23 @@ struct Crypto1State* lfsr_recovery32(uint32_t ks2, uint32_t in)
 	odd_head = odd_tail = malloc(sizeof(uint32_t) << 21);
 	even_head = even_tail = malloc(sizeof(uint32_t) << 21);
 	statelist =  malloc(sizeof(struct Crypto1State) << 18);
-	if(!odd_tail-- || !even_tail-- || !statelist)
+	if(!odd_tail-- || !even_tail-- || !statelist) {
 		goto out;
-
+	}
 	statelist->odd = statelist->even = 0;
 
+	// allocate memory for out of place bucket_sort
+	bucket_array_t bucket;
+	for (uint32_t i = 0; i < 2; i++)
+		for (uint32_t j = 0; j <= 0xff; j++) {
+			bucket[i][j].head = malloc(sizeof(uint32_t)<<14);
+			if (!bucket[i][j].head) {
+				goto out;
+			}
+		}
+
+	
+	// initialize statelists: add all possible states which would result into the rightmost 2 bits of the keystream
 	for(i = 1 << 20; i >= 0; --i) {
 		if(filter(i) == (oks & 1))
 			*++odd_tail = i;
@@ -203,18 +286,29 @@ struct Crypto1State* lfsr_recovery32(uint32_t ks2, uint32_t in)
 			*++even_tail = i;
 	}
 
+	// extend the statelists. Look at the next 8 Bits of the keystream (4 Bit each odd and even):
 	for(i = 0; i < 4; i++) {
 		extend_table_simple(odd_head,  &odd_tail, (oks >>= 1) & 1);
 		extend_table_simple(even_head, &even_tail, (eks >>= 1) & 1);
 	}
 
-	in = (in >> 16 & 0xff) | (in << 16) | (in & 0xff00);
+	// the statelists now contain all states which could have generated the last 10 Bits of the keystream.
+	// 22 bits to go to recover 32 bits in total. From now on, we need to take the "in"
+	// parameter into account.
+
+	in = (in >> 16 & 0xff) | (in << 16) | (in & 0xff00);		// Byte swapping
+
 	recover(odd_head, odd_tail, oks,
-		even_head, even_tail, eks, 11, statelist, in << 1);
+		even_head, even_tail, eks, 11, statelist, in << 1, bucket);
+
 
 out:
 	free(odd_head);
 	free(even_head);
+	for (uint32_t i = 0; i < 2; i++)
+		for (uint32_t j = 0; j <= 0xff; j++)
+			free(bucket[i][j].head);
+	
 	return statelist;
 }
 
diff --git a/client/proxmark3.c b/client/proxmark3.c
index 5cbacc86..59736ce7 100644
--- a/client/proxmark3.c
+++ b/client/proxmark3.c
@@ -24,10 +24,14 @@
 #include "ui.h"
 #include "sleep.h"
 
+// a global mutex to prevent interlaced printing from different threads
+pthread_mutex_t print_lock;
+
 static serial_port sp;
 static UsbCommand txcmd;
 volatile static bool txcmd_pending = false;
 
+
 void SendCommand(UsbCommand *c) {
 #if 0
   printf("Sending %d bytes\n", sizeof(UsbCommand));
@@ -196,20 +200,20 @@ static void *main_loop(void *targ) {
 }
 
 int main(int argc, char* argv[]) {
-  srand(time(0));
+	srand(time(0));
   
-  if (argc < 2) {
-    printf("syntax: %s <port>\n\n",argv[0]);
-    printf("\tLinux example:'%s /dev/ttyACM0'\n\n", argv[0]);
-    return 1;
-  }
+	if (argc < 2) {
+		printf("syntax: %s <port>\n\n",argv[0]);
+		printf("\tLinux example:'%s /dev/ttyACM0'\n\n", argv[0]);
+		return 1;
+	}
   
-  // Make sure to initialize
-  struct main_loop_arg marg = {
-    .usb_present = 0,
-    .script_cmds_file = NULL
-  };
-  pthread_t main_loop_t;
+	// Make sure to initialize
+	struct main_loop_arg marg = {
+		.usb_present = 0,
+		.script_cmds_file = NULL
+	};
+	pthread_t main_loop_t;
 
 /*
   usb_init();
@@ -223,38 +227,44 @@ int main(int argc, char* argv[]) {
   }
 */
   
-  sp = uart_open(argv[1]);
-  if (sp == INVALID_SERIAL_PORT) {
-    printf("ERROR: invalid serial port\n");
-    marg.usb_present = 0;
-    offline = 1;
-  } else if (sp == CLAIMED_SERIAL_PORT) {
-    printf("ERROR: serial port is claimed by another process\n");
-    marg.usb_present = 0;
-    offline = 1;
-  } else {
-    marg.usb_present = 1;
-    offline = 0;
-  }
+	sp = uart_open(argv[1]);
+	if (sp == INVALID_SERIAL_PORT) {
+		printf("ERROR: invalid serial port\n");
+		marg.usb_present = 0;
+		offline = 1;
+	} else if (sp == CLAIMED_SERIAL_PORT) {
+		printf("ERROR: serial port is claimed by another process\n");
+		marg.usb_present = 0;
+		offline = 1;
+	} else {
+		marg.usb_present = 1;
+		offline = 0;
+	}
 
-  // If the user passed the filename of the 'script' to execute, get it
-  if (argc > 2 && argv[2]) {
-    marg.script_cmds_file = argv[2];
-  }
+	// If the user passed the filename of the 'script' to execute, get it
+	if (argc > 2 && argv[2]) {
+		marg.script_cmds_file = argv[2];
+	}
   
-  pthread_create(&main_loop_t, NULL, &main_loop, &marg);
-  InitGraphics(argc, argv);
+	// create a mutex to avoid interlacing print commands from our different threads
+	pthread_mutex_init(&print_lock, NULL);
 
-  MainGraphics();
+	pthread_create(&main_loop_t, NULL, &main_loop, &marg);
+	InitGraphics(argc, argv);
 
-  pthread_join(main_loop_t, NULL);
+	MainGraphics();
+
+	pthread_join(main_loop_t, NULL);
 
 //  if (marg.usb_present == 1) {
 //    CloseProxmark();
 //  }
 
-  // Clean up the port
-  uart_close(sp);
+	// Clean up the port
+	uart_close(sp);
+  
+	// clean up mutex
+	pthread_mutex_destroy(&print_lock);
   
   return 0;
 }
diff --git a/client/ui.c b/client/ui.c
index 09479620..5fe58dc2 100644
--- a/client/ui.c
+++ b/client/ui.c
@@ -14,6 +14,7 @@
 #include <stdio.h>
 #include <time.h>
 #include <readline/readline.h>
+#include <pthread.h>
 
 #include "ui.h"
 
@@ -21,23 +22,28 @@ double CursorScaleFactor;
 int PlotGridX, PlotGridY, PlotGridXdefault= 64, PlotGridYdefault= 64;
 int offline;
 
+extern pthread_mutex_t print_lock;
+
 static char *logfilename = "proxmark3.log";
 
 void PrintAndLog(char *fmt, ...)
 {
 	char *saved_line;
 	int saved_point;
-  va_list argptr, argptr2;
-  static FILE *logfile = NULL;
-  static int logging=1;
+	va_list argptr, argptr2;
+	static FILE *logfile = NULL;
+	static int logging=1;
 
-  if (logging && !logfile) {
-    logfile=fopen(logfilename, "a");
-    if (!logfile) {
-      fprintf(stderr, "Can't open logfile, logging disabled!\n");
-      logging=0;
-    }
-  }
+	// lock this section to avoid interlacing prints from different threats
+	pthread_mutex_lock(&print_lock);
+  
+	if (logging && !logfile) {
+		logfile=fopen(logfilename, "a");
+		if (!logfile) {
+			fprintf(stderr, "Can't open logfile, logging disabled!\n");
+			logging=0;
+		}
+	}
 	
 	int need_hack = (rl_readline_state & RL_STATE_READCMD) > 0;
 
@@ -49,12 +55,12 @@ void PrintAndLog(char *fmt, ...)
 		rl_redisplay();
 	}
 	
-  va_start(argptr, fmt);
-  va_copy(argptr2, argptr);
-  vprintf(fmt, argptr);
-  printf("          "); // cleaning prompt
-  va_end(argptr);
-  printf("\n");
+	va_start(argptr, fmt);
+	va_copy(argptr2, argptr);
+	vprintf(fmt, argptr);
+	printf("          "); // cleaning prompt
+	va_end(argptr);
+	printf("\n");
 
 	if (need_hack) {
 		rl_restore_prompt();
@@ -64,14 +70,18 @@ void PrintAndLog(char *fmt, ...)
 		free(saved_line);
 	}
 	
-  if (logging && logfile) {
-    vfprintf(logfile, fmt, argptr2);
-    fprintf(logfile,"\n");
-    fflush(logfile);
-  }
-  va_end(argptr2);
+	if (logging && logfile) {
+		vfprintf(logfile, fmt, argptr2);
+		fprintf(logfile,"\n");
+		fflush(logfile);
+	}
+	va_end(argptr2);
+
+	//release lock
+	pthread_mutex_unlock(&print_lock);  
 }
 
+
 void SetLogFilename(char *fn)
 {
   logfilename = fn;
-- 
2.39.5