]> git.zerfleddert.de Git - proxmark3-svn/blob - client/hardnested/hardnested_bf_core.c
adjust hardnested compiler fix to accurate version
[proxmark3-svn] / client / hardnested / hardnested_bf_core.c
1 //-----------------------------------------------------------------------------
2 // Copyright (C) 2016, 2017 by piwi
3 //
4 // This code is licensed to you under the terms of the GNU GPL, version 2 or,
5 // at your option, any later version. See the LICENSE.txt file for the text of
6 // the license.
7 //-----------------------------------------------------------------------------
8 // Implements a card only attack based on crypto text (encrypted nonces
9 // received during a nested authentication) only. Unlike other card only
10 // attacks this doesn't rely on implementation errors but only on the
11 // inherent weaknesses of the crypto1 cypher. Described in
12 // Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
13 // Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on
14 // Computer and Communications Security, 2015
15 //-----------------------------------------------------------------------------
16 //
17 // brute forcing is based on @aczids bitsliced brute forcer
18 // https://github.com/aczid/crypto1_bs with some modifications. Mainly:
19 // - don't rollback. Start with 2nd byte of nonce instead
20 // - reuse results of filter subfunctions
21 // - reuse results of previous nonces if some first bits are identical
22 //
23 //-----------------------------------------------------------------------------
24 // aczid's Copyright notice:
25 //
26 // Bit-sliced Crypto-1 brute-forcing implementation
27 // Builds on the data structures returned by CraptEV1 craptev1_get_space(nonces, threshold, uid)
28 /*
29 Copyright (c) 2015-2016 Aram Verstegen
30
31 Permission is hereby granted, free of charge, to any person obtaining a copy
32 of this software and associated documentation files (the "Software"), to deal
33 in the Software without restriction, including without limitation the rights
34 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
35 copies of the Software, and to permit persons to whom the Software is
36 furnished to do so, subject to the following conditions:
37
38 The above copyright notice and this permission notice shall be included in
39 all copies or substantial portions of the Software.
40
41 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
42 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
43 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
44 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
45 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
46 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
47 THE SOFTWARE.
48 */
49
50 #include "hardnested_bf_core.h"
51
52 #include <stdint.h>
53 #include <stdbool.h>
54 #include <stdlib.h>
55 #include <stdio.h>
56 #include <malloc.h>
57 #include <string.h>
58 #include "crapto1/crapto1.h"
59 #include "parity.h"
60
61 // bitslice type
62 // while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic
63 // same for AVX2 and 512 bit vectors
64 // using larger vectors works but seems to generate more register pressure
65 #if defined(__AVX512F__)
66 #define MAX_BITSLICES 512
67 #elif defined(__AVX2__)
68 #define MAX_BITSLICES 256
69 #elif defined(__AVX__)
70 #define MAX_BITSLICES 128
71 #elif defined(__SSE2__)
72 #define MAX_BITSLICES 128
73 #else // MMX or SSE
74 #define MAX_BITSLICES 64
75 #endif
76
77 #define VECTOR_SIZE (MAX_BITSLICES/8)
78 typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t;
79 typedef union {
80 bitslice_value_t value;
81 uint64_t bytes64[MAX_BITSLICES/64];
82 uint8_t bytes[MAX_BITSLICES/8];
83 } bitslice_t;
84
85 // filter function (f20)
86 // sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur
87 #define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d)))
88 #define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d)))
89 #define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e))))
90
91 // bit indexing
92 #define get_bit(n, word) (((word) >> (n)) & 1)
93 #define get_vector_bit(slice, value) get_bit((slice)&0x3f, value.bytes64[(slice)>>6])
94
95 // size of crypto-1 state
96 #define STATE_SIZE 48
97 // size of nonce to be decrypted
98 #define KEYSTREAM_SIZE 24
99
100 // endianness conversion
101 #define rev32(word) ((((word) & 0xff) << 24) | ((((word) >> 8) & 0xff) << 16) | ((((word) >> 16) & 0xff) << 8) | ((((word) >> 24) & 0xff)))
102
103 // this needs to be compiled several times for each instruction set.
104 // For each instruction set, define a dedicated function name:
105 #if defined (__AVX512F__)
106 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX512
107 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX512
108 #elif defined (__AVX2__)
109 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX2
110 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX2
111 #elif defined (__AVX__)
112 #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX
113 #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX
114 #elif defined (__SSE2__)
115 #define BITSLICE_TEST_NONCES bitslice_test_nonces_SSE2
116 #define CRACK_STATES_BITSLICED crack_states_bitsliced_SSE2
117 #elif defined (__MMX__)
118 #define BITSLICE_TEST_NONCES bitslice_test_nonces_MMX
119 #define CRACK_STATES_BITSLICED crack_states_bitsliced_MMX
120 #endif
121
122 // typedefs and declaration of functions:
123 typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t*, statelist_t*, uint32_t*, uint64_t*, uint32_t, uint8_t*, noncelist_t*);
124 crack_states_bitsliced_t crack_states_bitsliced_AVX512;
125 crack_states_bitsliced_t crack_states_bitsliced_AVX2;
126 crack_states_bitsliced_t crack_states_bitsliced_AVX;
127 crack_states_bitsliced_t crack_states_bitsliced_SSE2;
128 crack_states_bitsliced_t crack_states_bitsliced_MMX;
129 crack_states_bitsliced_t crack_states_bitsliced_dispatch;
130
131 typedef void bitslice_test_nonces_t(uint32_t, uint32_t*, uint8_t*);
132 bitslice_test_nonces_t bitslice_test_nonces_AVX512;
133 bitslice_test_nonces_t bitslice_test_nonces_AVX2;
134 bitslice_test_nonces_t bitslice_test_nonces_AVX;
135 bitslice_test_nonces_t bitslice_test_nonces_SSE2;
136 bitslice_test_nonces_t bitslice_test_nonces_MMX;
137 bitslice_test_nonces_t bitslice_test_nonces_dispatch;
138
139 #ifdef _WIN32
140 #define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES/8), MAX_BITSLICES/8)
141 #define free_bitslice(x) _aligned_free(x)
142 #else
143 #define malloc_bitslice(x) memalign(MAX_BITSLICES/8, (x))
144 #define free_bitslice(x) free(x)
145 #endif
146
147 #if defined (__MMX__) // (including more sophisticated instruction sets)
148 typedef enum {
149 EVEN_STATE = 0,
150 ODD_STATE = 1
151 } odd_even_t;
152
153
154 // arrays of bitsliced states with identical values in all slices
155 static bitslice_t bitsliced_encrypted_nonces[256][KEYSTREAM_SIZE];
156 static bitslice_t bitsliced_encrypted_parity_bits[256][4];
157 // 1 and 0 vectors
158 static bitslice_t bs_ones;
159 static bitslice_t bs_zeroes;
160
161
162 void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
163
164 // initialize 1 and 0 vectors
165 memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
166 memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
167
168 // bitslice nonces' 2nd to 4th byte
169 for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
170 for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){
171 bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, rev32(bf_test_nonce[i] << 8));
172 if(bit){
173 bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value;
174 } else {
175 bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value;
176 }
177 }
178 }
179 // bitslice nonces' parity (4 bits)
180 for (uint32_t i = 0; i < nonces_to_bruteforce; i++) {
181 for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){
182 bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]);
183 if(bit){
184 bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value;
185 } else {
186 bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value;
187 }
188 }
189 }
190
191 }
192
193
194 const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces){
195
196 // Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce.
197 // We know that the best first byte is already shifted in. Testing with the remaining three bytes of
198 // the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced
199 // brute forcing (including roll back).
200
201 bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE];
202 bitslice_t * restrict state_p;
203 uint64_t key = -1;
204 uint64_t bucket_states_tested = 0;
205 uint32_t bucket_size[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
206 uint32_t bitsliced_blocks = 0;
207 uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE];
208 #if defined (DEBUG_BRUTE_FORCE)
209 uint32_t elimination_step = 0;
210 #define MAX_ELIMINATION_STEP 32
211 uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0};
212 #endif
213 #ifdef DEBUG_KEY_ELIMINATION
214 bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1];
215 #endif
216
217 // constant ones/zeroes
218 bitslice_t bs_ones;
219 memset(bs_ones.bytes, 0xff, VECTOR_SIZE);
220 bitslice_t bs_zeroes;
221 memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE);
222
223 // bitslice all the even states
224 bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_t *));
225 if (bitsliced_even_states == NULL) {
226 printf("Out of memory error in brute_force. Aborting...");
227 exit(4);
228 }
229 bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_value_t));
230 if (bitsliced_even_feedback == NULL) {
231 printf("Out of memory error in brute_force. Aborting...");
232 exit(4);
233 }
234 for(uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES){
235 bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE/2*sizeof(bitslice_t));
236 if (lstate_p == NULL) {
237 printf("Out of memory error in brute_force. Aborting... \n");
238 exit(4);
239 }
240 memset(lstate_p, 0x00, STATE_SIZE/2*sizeof(bitslice_t)); // zero even bits
241 // bitslice even half-states
242 const uint32_t max_slices = (p_even_end-p_even) < MAX_BITSLICES ? p_even_end-p_even : MAX_BITSLICES;
243 bucket_size[bitsliced_blocks] = max_slices;
244 #ifdef DEBUG_KEY_ELIMINATION
245 bucket_contains_test_key[bitsliced_blocks] = false;
246 #endif
247 uint32_t slice_idx;
248 for(slice_idx = 0; slice_idx < max_slices; ++slice_idx){
249 uint32_t e = *(p_even+slice_idx);
250 #ifdef DEBUG_KEY_ELIMINATION
251 if (known_target_key != -1 && e == test_state[EVEN_STATE]) {
252 bucket_contains_test_key[bitsliced_blocks] = true;
253 // printf("bucket %d contains test key even state\n", bitsliced_blocks);
254 // printf("in slice %d\n", slice_idx);
255 }
256 #endif
257 for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
258 // set even bits
259 if(e&1){
260 lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
261 }
262 }
263 }
264 // padding with last even state
265 for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) {
266 uint32_t e = *(p_even_end-1);
267 for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){
268 // set even bits
269 if(e&1){
270 lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f);
271 }
272 }
273 }
274 bitsliced_even_states[bitsliced_blocks] = lstate_p;
275 // bitsliced_even_feedback[bitsliced_blocks] = bs_ones;
276 bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^
277 lstate_p[(47-10)/2].value ^ lstate_p[(47-12)/2].value ^ lstate_p[(47-14)/2].value ^
278 lstate_p[(47-24)/2].value ^ lstate_p[(47-42)/2].value;
279 bitsliced_blocks++;
280 }
281 // bitslice every odd state to every block of even states
282 for(uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd){
283 // early abort
284 if(*keys_found){
285 goto out;
286 }
287
288 // set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states
289
290 state_p = &states[KEYSTREAM_SIZE];
291 uint32_t o = *p_odd;
292
293 // pre-compute the odd feedback bit
294 bool odd_feedback_bit = evenparity32(o&0x29ce5c);
295 const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value;
296
297 // set odd state bits
298 for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) {
299 if (o & 1){
300 state_p[state_idx] = bs_ones;
301 } else {
302 state_p[state_idx] = bs_zeroes;
303 }
304 }
305
306 bitslice_value_t crypto1_bs_f20b_2[16];
307 bitslice_value_t crypto1_bs_f20b_3[8];
308
309 crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
310 crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
311
312 bitslice_value_t ksb[8];
313 ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value),
314 f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value),
315 crypto1_bs_f20b_2[0],
316 f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value),
317 crypto1_bs_f20b_3[0]);
318
319 uint32_t *restrict p_even = p->states[EVEN_STATE];
320 for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx, p_even += MAX_BITSLICES) {
321
322 #ifdef DEBUG_KEY_ELIMINATION
323 // if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
324 // printf("Now testing known target key.\n");
325 // printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
326 // }
327 #endif
328 // add the even state bits
329 const bitslice_t const *restrict bitsliced_even_state = bitsliced_even_states[block_idx];
330 for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) {
331 state_p[state_idx] = bitsliced_even_state[state_idx/2];
332 }
333
334 // pre-compute first feedback bit vector. This is the same for all nonces
335 bitslice_value_t fbb[8];
336 fbb[0] = odd_feedback ^ bitsliced_even_feedback[block_idx];
337
338 // vector to contain test results (1 = passed, 0 = failed)
339 bitslice_t results = bs_ones;
340
341 // parity_bits
342 bitslice_value_t par[8];
343 par[0] = bs_zeroes.value;
344 uint32_t next_common_bits = 0;
345
346 for(uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests){
347 // common bits with preceding test nonce
348 uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0;
349 next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0;
350 uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte
351 bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce
352 bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits
353 bitslice_value_t parity_bit_vector = par[common_bits]; // dito for first parity vector
354 // bitslice_value_t fb_bits = fbb[0]; // start with precomputed feedback bits from previous nonce
355 // bitslice_value_t ks_bits = ksb[0]; // dito for first keystream bits
356 // bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector
357 state_p -= common_bits; // and reuse the already calculated state bits
358 // highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte),
359 // or the highest bit which differs from the previous nonce
360 for (int32_t ks_idx = KEYSTREAM_SIZE-1-common_bits; ks_idx >= 0; --ks_idx) {
361
362 // decrypt nonce bits
363 const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value;
364 const bitslice_value_t decrypted_nonce_bit_vector = encrypted_nonce_bit_vector ^ ks_bits;
365
366 // compute real parity bits on the fly
367 parity_bit_vector ^= decrypted_nonce_bit_vector;
368
369 // update state
370 state_p--;
371 state_p[0].value = fb_bits ^ decrypted_nonce_bit_vector;
372
373 // update crypto1 subfunctions
374 bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3;
375 f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value);
376 f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value);
377 if (ks_idx > KEYSTREAM_SIZE - 8) {
378 f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
379 f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
380 f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
381 crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
382 crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3;
383 } else if (ks_idx > KEYSTREAM_SIZE - 16) {
384 f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
385 f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
386 f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
387 crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2;
388 } else if (ks_idx > KEYSTREAM_SIZE - 24){
389 f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
390 f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8];
391 f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16];
392 } else {
393 f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value);
394 f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value);
395 f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value);
396 }
397 // update keystream bit
398 ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3);
399
400 // for each completed byte:
401 if ((ks_idx & 0x07) == 0) {
402 // get encrypted parity bits
403 const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value;
404
405 // decrypt parity bits
406 const bitslice_value_t decrypted_parity_bit_vector = encrypted_parity_bit_vector ^ ks_bits;
407
408 // compare actual parity bits with decrypted parity bits and take count in results vector
409 results.value &= ~parity_bit_vector ^ decrypted_parity_bit_vector;
410
411 // make sure we still have a match in our set
412 // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){
413
414 // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ???
415 // the short-circuiting also helps
416 if(results.bytes64[0] == 0
417 #if MAX_BITSLICES > 64
418 && results.bytes64[1] == 0
419 #endif
420 #if MAX_BITSLICES > 128
421 && results.bytes64[2] == 0
422 && results.bytes64[3] == 0
423 #endif
424 ) {
425 #if defined (DEBUG_BRUTE_FORCE)
426 if (elimination_step < MAX_ELIMINATION_STEP) {
427 keys_eliminated[elimination_step] += MAX_BITSLICES;
428 }
429 #endif
430 #ifdef DEBUG_KEY_ELIMINATION
431 if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) {
432 printf("Known target key eliminated in brute_force.\n");
433 printf("block_idx = %d/%d, nonce = %d/%d\n", block_idx, bitsliced_blocks, tests, nonces_to_bruteforce);
434 }
435 #endif
436 goto stop_tests;
437 }
438 // prepare for next nonce byte
439 #if defined (DEBUG_BRUTE_FORCE)
440 elimination_step++;
441 #endif
442 parity_bit_vector = bs_zeroes.value;
443 }
444 // update feedback bit vector
445 if (ks_idx != 0) {
446 fb_bits =
447 (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^
448 state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^
449 state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^
450 state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^
451 state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^
452 state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value);
453 }
454 // remember feedback and keystream vectors for later use
455 uint8_t bit = KEYSTREAM_SIZE - ks_idx;
456 if (bit <= next_common_bits) { // if needed and not yet stored
457 fbb[bit] = fb_bits;
458 ksb[bit] = ks_bits;
459 par[bit] = parity_bit_vector;
460 }
461 }
462 // prepare for next nonce. Revert to initial state
463 state_p = &states[KEYSTREAM_SIZE];
464 }
465
466 // all nonce tests were successful: we've found a possible key in this block!
467 uint32_t *p_even_test = p_even;
468 for (uint32_t results_word = 0; results_word < MAX_BITSLICES / 64; ++results_word) {
469 uint64_t results64 = results.bytes64[results_word];
470 for (uint32_t results_bit = 0; results_bit < 64; results_bit++) {
471 if (results64 & 0x01) {
472 if (verify_key(cuid, nonces, best_first_bytes, *p_odd, *p_even_test)) {
473 struct Crypto1State pcs;
474 pcs.odd = *p_odd;
475 pcs.even = *p_even_test;
476 lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true);
477 crypto1_get_lfsr(&pcs, &key);
478 bucket_states_tested += 64 * results_word + results_bit;
479 goto out;
480 }
481 #ifdef DEBUG_KEY_ELIMINATION
482 if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
483 printf("Known target key eliminated in brute_force verification.\n");
484 printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
485 }
486 #endif
487 }
488 #ifdef DEBUG_KEY_ELIMINATION
489 if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) {
490 printf("Known target key eliminated in brute_force (results_bit == 0).\n");
491 printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks);
492 }
493 #endif
494 results64 >>= 1;
495 p_even_test++;
496 if (p_even_test == p_even_end) {
497 goto stop_tests;
498 }
499 }
500 }
501 stop_tests:
502 #if defined (DEBUG_BRUTE_FORCE)
503 elimination_step = 0;
504 #endif
505 bucket_states_tested += bucket_size[block_idx];
506 // prepare to set new states
507 state_p = &states[KEYSTREAM_SIZE];
508 continue;
509 }
510 }
511 out:
512 for(uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){
513 free_bitslice(bitsliced_even_states[block_idx]);
514 }
515 free(bitsliced_even_states);
516 free_bitslice(bitsliced_even_feedback);
517 __sync_fetch_and_add(num_keys_tested, bucket_states_tested);
518
519 #if defined (DEBUG_BRUTE_FORCE)
520 for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) {
521 printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100);
522 }
523 #endif
524 return key;
525 }
526 #endif
527
528
529 #ifndef __MMX__
530
531 // pointers to functions:
532 crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch;
533 bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch;
534
535 // determine the available instruction set at runtime and call the correct function
536 const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
537 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
538 if (__builtin_cpu_supports("avx512f")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512;
539 else if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
540 #else
541 if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2;
542 #endif
543 else if (__builtin_cpu_supports("avx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX;
544 else if (__builtin_cpu_supports("sse2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2;
545 else if (__builtin_cpu_supports("mmx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX;
546 else {
547 printf("\nFatal: you need at least a CPU with MMX instruction set support. Aborting...\n");
548 exit(5);
549 }
550 // call the most optimized function for this CPU
551 return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
552 }
553
554 void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
555 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
556 if (__builtin_cpu_supports("avx512f")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512;
557 else if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
558 #else
559 if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2;
560 #endif
561 else if (__builtin_cpu_supports("avx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX;
562 else if (__builtin_cpu_supports("sse2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2;
563 else if (__builtin_cpu_supports("mmx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX;
564 else {
565 printf("\nFatal: you need at least a CPU with MMX instruction set support. Aborting...\n");
566 exit(5);
567 }
568 // call the most optimized function for this CPU
569 (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
570 }
571
572 // Entries to dispatched function calls
573 const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) {
574 return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces);
575 }
576
577 void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) {
578 (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par);
579 }
580
581 #endif
Impressum, Datenschutz