X-Git-Url: https://git.zerfleddert.de/cgi-bin/gitweb.cgi/proxmark3-svn/blobdiff_plain/087c8bf3303e69d79cbbfdfb8edc836587cfb820..refs/pull/568/head:/client/cmdhfmfhard.c?ds=sidebyside

diff --git a/client/cmdhfmfhard.c b/client/cmdhfmfhard.c
index b5eabb82..cf19436c 100644
--- a/client/cmdhfmfhard.c
+++ b/client/cmdhfmfhard.c
@@ -32,7 +32,9 @@
 #include "crapto1/crapto1.h"
 #include "parity.h"
 #include "hardnested/hardnested_bruteforce.h"
+#include "hardnested/hardnested_bf_core.h"
 #include "hardnested/hardnested_bitarray_core.h"
+#include "zlib.h"
 
 #define NUM_CHECK_BITFLIPS_THREADS		(num_CPUs())
 #define NUM_REDUCTION_WORKING_THREADS	(num_CPUs())
@@ -40,7 +42,7 @@
 #define IGNORE_BITFLIP_THRESHOLD		0.99	// ignore bitflip arrays which have nearly only valid states
 
 #define STATE_FILES_DIRECTORY			"hardnested/tables/"
-#define STATE_FILE_TEMPLATE				"bitflip_%d_%03" PRIx16 "_states.bin"
+#define STATE_FILE_TEMPLATE				"bitflip_%d_%03" PRIx16 "_states.bin.z"
 
 #define DEBUG_KEY_ELIMINATION
 // #define DEBUG_REDUCTION
@@ -70,25 +72,32 @@ static float brute_force_per_second;
 
 
 static void get_SIMD_instruction_set(char* instruction_set) {
-	#if !defined(__APPLE__) || (defined(__APPLE__) && (__clang_major__ > 8))
-		#if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
-	if (__builtin_cpu_supports("avx512f")) strcpy(instruction_set, "AVX512F");
-	else if (__builtin_cpu_supports("avx2")) strcpy(instruction_set, "AVX2");
-		#else 
-	if (__builtin_cpu_supports("avx2")) strcpy(instruction_set, "AVX2");
-		#endif
-	else if (__builtin_cpu_supports("avx")) strcpy(instruction_set, "AVX");
-	else if (__builtin_cpu_supports("sse2")) strcpy(instruction_set, "SSE2");
-	else if (__builtin_cpu_supports("mmx")) strcpy(instruction_set, "MMX");
-	else 
-	#endif
-		strcpy(instruction_set, "unsupported");
+	switch(GetSIMDInstrAuto()) {
+		case SIMD_AVX512:
+			strcpy(instruction_set, "AVX512F");
+			break;
+		case SIMD_AVX2:
+			strcpy(instruction_set, "AVX2");
+			break;
+		case SIMD_AVX:
+			strcpy(instruction_set, "AVX");
+			break;
+		case SIMD_SSE2:
+			strcpy(instruction_set, "SSE2");
+			break;
+		case SIMD_MMX:
+			strcpy(instruction_set, "MMX");
+			break;
+		default:
+			strcpy(instruction_set, "no");
+			break;
+	}	
 }
 
 
 static void print_progress_header(void) {
 	char progress_text[80];
-	char instr_set[12] = "";
+	char instr_set[12] = {0};
 	get_SIMD_instruction_set(instr_set);
 	sprintf(progress_text, "Start using %d threads and %s SIMD core", num_CPUs(), instr_set);
 	PrintAndLog("\n\n");
@@ -141,12 +150,6 @@ static inline void set_bit24(uint32_t *bitarray, uint32_t index)
 }
 
 
-static inline void clear_bit24(uint32_t *bitarray, uint32_t index)
-{
-	bitarray[index>>5] &= ~(0x80000000>>(index&0x0000001f));
-}
-
-
 static inline uint32_t test_bit24(uint32_t *bitarray, uint32_t index)
 {
 	return 	bitarray[index>>5] & (0x80000000>>(index&0x0000001f));
@@ -187,40 +190,6 @@ static inline uint32_t next_state(uint32_t *bitarray, uint32_t state)
 }
 
 
-static inline uint32_t next_not_state(uint32_t *bitarray, uint32_t state)
-{
-	if (++state == 1<<24) return 1<<24;
-	uint32_t index = state >> 5;
-	uint_fast8_t bit = state & 0x1f;
-	uint32_t line = bitarray[index] << bit;
-	while (bit <= 0x1f) {
-		if ((line & 0x80000000) == 0) return state;
-		state++;
-		bit++;
-		line <<= 1;
-	}
-	index++;
-	while (bitarray[index] == 0xffffffff && state < 1<<24) {
-		index++;
-		state += 0x20;
-	}
-	if (state >= 1<<24) return 1<<24;
-#if defined __GNUC__
-	return state + __builtin_clz(~bitarray[index]);
-#else
-	bit = 0x00;
-	line = bitarray[index];
-	while (bit <= 0x1f) {
-		if ((line & 0x80000000) == 0) return state;
-		state++;
-		bit++;
-		line <<= 1;
-	}
-	return 1<<24;
-#endif
-}
-
-
 
 
 #define BITFLIP_2ND_BYTE				0x0200
@@ -240,14 +209,50 @@ static int compare_count_bitflip_bitarrays(const void *b1, const void *b2)
 }
 
 
+static voidpf inflate_malloc(voidpf opaque, uInt items, uInt size)
+{
+	return malloc(items*size);
+}
+
+
+static void inflate_free(voidpf opaque, voidpf address)
+{
+	free(address);
+}
+
+#define OUTPUT_BUFFER_LEN 80
+#define INPUT_BUFFER_LEN 80
+
+//----------------------------------------------------------------------------
+// Initialize decompression of the respective (HF or LF) FPGA stream 
+//----------------------------------------------------------------------------
+static void init_inflate(z_streamp compressed_stream, uint8_t *input_buffer, uint32_t insize, uint8_t *output_buffer, uint32_t outsize)
+{
+
+	// initialize z_stream structure for inflate:
+	compressed_stream->next_in = input_buffer;
+	compressed_stream->avail_in = insize;
+	compressed_stream->next_out = output_buffer;
+	compressed_stream->avail_out = outsize;
+	compressed_stream->zalloc = &inflate_malloc;
+	compressed_stream->zfree = &inflate_free;
+
+	inflateInit2(compressed_stream, 0);
+	
+}
+
+
 static void init_bitflip_bitarrays(void)
 {
 #if defined (DEBUG_REDUCTION)
 	uint8_t line = 0;
 #endif	
 
+
+	z_stream compressed_stream;
+	
 	char state_files_path[strlen(get_my_executable_directory()) + strlen(STATE_FILES_DIRECTORY) + strlen(STATE_FILE_TEMPLATE) + 1];
-	char state_file_name[strlen(STATE_FILE_TEMPLATE)];
+	char state_file_name[strlen(STATE_FILE_TEMPLATE)+1];
 	
 	for (odd_even_t odd_even = EVEN_STATE; odd_even <= ODD_STATE; odd_even++) {
 		num_effective_bitflips[odd_even] = 0;
@@ -262,22 +267,31 @@ static void init_bitflip_bitarrays(void)
 			if (statesfile == NULL) {
 				continue;
 			} else {
-				uint32_t *bitset = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1<<19));
-				if (bitset == NULL) {
-					printf("Out of memory error in init_bitflip_statelists(). Aborting...\n");
-					fclose(statesfile);
-					exit(4);
-				}
-				size_t bytesread = fread(bitset, 1, sizeof(uint32_t) * (1<<19), statesfile);
-				if (bytesread != sizeof(uint32_t) * (1<<19)) {
-					printf("File read error with %s. Aborting...", state_file_name);
+				fseek(statesfile, 0, SEEK_END);
+				uint32_t filesize = (uint32_t)ftell(statesfile);
+				rewind(statesfile);
+				uint8_t input_buffer[filesize];
+				size_t bytesread = fread(input_buffer, 1, filesize, statesfile);
+				if (bytesread != filesize) {
+					printf("File read error with %s. Aborting...\n", state_file_name);
 					fclose(statesfile);
-					free_bitarray(bitset);
+					inflateEnd(&compressed_stream);
 					exit(5);
 				}
 				fclose(statesfile);
-				uint32_t count = count_states(bitset);
+				uint32_t count = 0;
+				init_inflate(&compressed_stream, input_buffer, filesize, (uint8_t *)&count, sizeof(count));
+				inflate(&compressed_stream, Z_SYNC_FLUSH);
 				if ((float)count/(1<<24) < IGNORE_BITFLIP_THRESHOLD) {
+					uint32_t *bitset = (uint32_t *)malloc_bitarray(sizeof(uint32_t) * (1<<19));
+					if (bitset == NULL) {
+						printf("Out of memory error in init_bitflip_statelists(). Aborting...\n");
+						inflateEnd(&compressed_stream);
+						exit(4);
+					}
+					compressed_stream.next_out = (uint8_t *)bitset;
+					compressed_stream.avail_out = sizeof(uint32_t) * (1<<19);
+					inflate(&compressed_stream, Z_SYNC_FLUSH);
 					effective_bitflip[odd_even][num_effective_bitflips[odd_even]++] = bitflip;
 					bitflip_bitarrays[odd_even][bitflip] = bitset;
 					count_bitflip_bitarrays[odd_even][bitflip] = count;
@@ -289,9 +303,8 @@ static void init_bitflip_bitarrays(void)
 						line = 0;
 					}
 #endif
-				} else {
-					free_bitarray(bitset);
 				}
+				inflateEnd(&compressed_stream);
 			}
 		}
 		effective_bitflip[odd_even][num_effective_bitflips[odd_even]] = 0x400;	// EndOfList marker
@@ -1145,7 +1158,13 @@ static bool timeout(void)
 }
 
 
-static void *check_for_BitFlipProperties_thread(void *args)
+static void 
+#ifdef __has_attribute
+#if __has_attribute(force_align_arg_pointer)
+__attribute__((force_align_arg_pointer)) 
+#endif
+#endif
+*check_for_BitFlipProperties_thread(void *args)
 {
 	uint8_t first_byte = ((uint8_t *)args)[0];
 	uint8_t last_byte = ((uint8_t *)args)[1];
@@ -1893,7 +1912,13 @@ static void init_book_of_work(void)
 }
 
 
-static void *generate_candidates_worker_thread(void *args)
+static void 
+#ifdef __has_attribute
+#if __has_attribute(force_align_arg_pointer)
+__attribute__((force_align_arg_pointer)) 
+#endif
+#endif
+*generate_candidates_worker_thread(void *args)
 {
 	uint16_t *sum_args = (uint16_t *)args;
 	uint16_t sum_a0 = sums[sum_args[0]];
@@ -2481,6 +2506,10 @@ static void set_test_state(uint8_t byte)
 int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBlockNo, uint8_t trgKeyType, uint8_t *trgkey, bool nonce_file_read, bool nonce_file_write, bool slow, int tests) 
 {
 	char progress_text[80];
+	
+	char instr_set[12] = {0};
+	get_SIMD_instruction_set(instr_set);
+	PrintAndLog("Using %s SIMD core.", instr_set);
 
 	srand((unsigned) time(NULL));
 	brute_force_per_second = brute_force_benchmark();
@@ -2549,6 +2578,7 @@ int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBloc
 				best_first_bytes[0] = best_first_byte_smallest_bitarray;
 				pre_XOR_nonces();
 				prepare_bf_test_nonces(nonces, best_first_bytes[0]);
+				hardnested_print_progress(num_acquired_nonces, "Starting brute force...", expected_brute_force1, 0);
 				key_found = brute_force();
 				free(candidates->states[ODD_STATE]);
 				free(candidates->states[EVEN_STATE]);
@@ -2568,6 +2598,7 @@ int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBloc
 					// printf("Estimated remaining states: %" PRIu64 " (2^%1.1f)\n", nonces[best_first_bytes[0]].sum_a8_guess[j].num_states, log(nonces[best_first_bytes[0]].sum_a8_guess[j].num_states)/log(2.0));
 					generate_candidates(first_byte_Sum, nonces[best_first_bytes[0]].sum_a8_guess[j].sum_a8_idx);
 					// printf("Time for generating key candidates list: %1.0f sec (%1.1f sec CPU)\n", difftime(time(NULL), start_time), (float)(msclock() - start_clock)/1000.0);
+					hardnested_print_progress(num_acquired_nonces, "Starting brute force...", expected_brute_force, 0);
 					key_found = brute_force();
 					free_statelist_cache();
 					free_candidates_memory(candidates);
@@ -2608,6 +2639,12 @@ int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBloc
 
 		if (nonce_file_read) {  	// use pre-acquired data from file nonces.bin
 			if (read_nonce_file() != 0) {
+				free_bitflip_bitarrays();
+				free_nonces_memory();
+				free_bitarray(all_bitflips_bitarray[ODD_STATE]);
+				free_bitarray(all_bitflips_bitarray[EVEN_STATE]);
+				free_sum_bitarrays();
+				free_part_sum_bitarrays();
 				return 3;
 			}
 			hardnested_stage = CHECK_1ST_BYTES | CHECK_2ND_BYTES;
@@ -2617,6 +2654,12 @@ int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBloc
 		} else {					// acquire nonces.
 			uint16_t is_OK = acquire_nonces(blockNo, keyType, key, trgBlockNo, trgKeyType, nonce_file_write, slow);
 			if (is_OK != 0) {
+				free_bitflip_bitarrays();
+				free_nonces_memory();
+				free_bitarray(all_bitflips_bitarray[ODD_STATE]);
+				free_bitarray(all_bitflips_bitarray[EVEN_STATE]);
+				free_sum_bitarrays();
+				free_part_sum_bitarrays();
 				return is_OK;
 			}
 		}
@@ -2646,10 +2689,11 @@ int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBloc
 			for (statelist_t *sl = candidates; sl != NULL; sl = sl->next) {
 				maximum_states += (uint64_t)sl->len[ODD_STATE] * sl->len[EVEN_STATE];
 			}
-			printf("Number of remaining possible keys: %" PRIu64 " (2^%1.1f)\n", maximum_states, log(maximum_states)/log(2.0));
+			// printf("Number of remaining possible keys: %" PRIu64 " (2^%1.1f)\n", maximum_states, log(maximum_states)/log(2.0));
 			best_first_bytes[0] = best_first_byte_smallest_bitarray;
 			pre_XOR_nonces();
 			prepare_bf_test_nonces(nonces, best_first_bytes[0]);
+			hardnested_print_progress(num_acquired_nonces, "Starting brute force...", expected_brute_force1, 0);
 			key_found = brute_force();
 			free(candidates->states[ODD_STATE]);
 			free(candidates->states[EVEN_STATE]);
@@ -2669,6 +2713,7 @@ int mfnestedhard(uint8_t blockNo, uint8_t keyType, uint8_t *key, uint8_t trgBloc
 				// printf("Estimated remaining states: %" PRIu64 " (2^%1.1f)\n", nonces[best_first_bytes[0]].sum_a8_guess[j].num_states, log(nonces[best_first_bytes[0]].sum_a8_guess[j].num_states)/log(2.0));
 				generate_candidates(first_byte_Sum, nonces[best_first_bytes[0]].sum_a8_guess[j].sum_a8_idx);
 				// printf("Time for generating key candidates list: %1.0f sec (%1.1f sec CPU)\n", difftime(time(NULL), start_time), (float)(msclock() - start_clock)/1000.0);
+				hardnested_print_progress(num_acquired_nonces, "Starting brute force...", expected_brute_force, 0);
 				key_found = brute_force();
 				free_statelist_cache();
 				free_candidates_memory(candidates);