]>
Commit | Line | Data |
---|---|---|
c48c4d78 | 1 | //----------------------------------------------------------------------------- |
2 | // Copyright (C) 2016, 2017 by piwi | |
3 | // | |
4 | // This code is licensed to you under the terms of the GNU GPL, version 2 or, | |
5 | // at your option, any later version. See the LICENSE.txt file for the text of | |
6 | // the license. | |
7 | //----------------------------------------------------------------------------- | |
8 | // Implements a card only attack based on crypto text (encrypted nonces | |
9 | // received during a nested authentication) only. Unlike other card only | |
10 | // attacks this doesn't rely on implementation errors but only on the | |
11 | // inherent weaknesses of the crypto1 cypher. Described in | |
12 | // Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened | |
13 | // Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on | |
14 | // Computer and Communications Security, 2015 | |
15 | //----------------------------------------------------------------------------- | |
16 | // | |
17 | // brute forcing is based on @aczids bitsliced brute forcer | |
18 | // https://github.com/aczid/crypto1_bs with some modifications. Mainly: | |
19 | // - don't rollback. Start with 2nd byte of nonce instead | |
20 | // - reuse results of filter subfunctions | |
21 | // - reuse results of previous nonces if some first bits are identical | |
22 | // | |
23 | //----------------------------------------------------------------------------- | |
24 | // aczid's Copyright notice: | |
25 | // | |
26 | // Bit-sliced Crypto-1 brute-forcing implementation | |
27 | // Builds on the data structures returned by CraptEV1 craptev1_get_space(nonces, threshold, uid) | |
28 | /* | |
29 | Copyright (c) 2015-2016 Aram Verstegen | |
30 | ||
31 | Permission is hereby granted, free of charge, to any person obtaining a copy | |
32 | of this software and associated documentation files (the "Software"), to deal | |
33 | in the Software without restriction, including without limitation the rights | |
34 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
35 | copies of the Software, and to permit persons to whom the Software is | |
36 | furnished to do so, subject to the following conditions: | |
37 | ||
38 | The above copyright notice and this permission notice shall be included in | |
39 | all copies or substantial portions of the Software. | |
40 | ||
41 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
42 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
43 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
44 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
45 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
46 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
47 | THE SOFTWARE. | |
48 | */ | |
49 | ||
50 | #include "hardnested_bf_core.h" | |
51 | ||
52 | #include <stdint.h> | |
53 | #include <stdbool.h> | |
54 | #include <stdlib.h> | |
55 | #include <stdio.h> | |
56 | #include <malloc.h> | |
57 | #include <string.h> | |
58 | #include "crapto1/crapto1.h" | |
59 | #include "parity.h" | |
60 | ||
61 | // bitslice type | |
62 | // while AVX supports 256 bit vector floating point operations, we need integer operations for boolean logic | |
63 | // same for AVX2 and 512 bit vectors | |
64 | // using larger vectors works but seems to generate more register pressure | |
65 | #if defined(__AVX512F__) | |
66 | #define MAX_BITSLICES 512 | |
67 | #elif defined(__AVX2__) | |
68 | #define MAX_BITSLICES 256 | |
69 | #elif defined(__AVX__) | |
70 | #define MAX_BITSLICES 128 | |
71 | #elif defined(__SSE2__) | |
72 | #define MAX_BITSLICES 128 | |
73 | #else // MMX or SSE | |
74 | #define MAX_BITSLICES 64 | |
75 | #endif | |
76 | ||
77 | #define VECTOR_SIZE (MAX_BITSLICES/8) | |
78 | typedef unsigned int __attribute__((aligned(VECTOR_SIZE))) __attribute__((vector_size(VECTOR_SIZE))) bitslice_value_t; | |
79 | typedef union { | |
80 | bitslice_value_t value; | |
81 | uint64_t bytes64[MAX_BITSLICES/64]; | |
82 | uint8_t bytes[MAX_BITSLICES/8]; | |
83 | } bitslice_t; | |
84 | ||
85 | // filter function (f20) | |
86 | // sourced from ``Wirelessly Pickpocketing a Mifare Classic Card'' by Flavio Garcia, Peter van Rossum, Roel Verdult and Ronny Wichers Schreur | |
87 | #define f20a(a,b,c,d) (((a|b)^(a&d))^(c&((a^b)|d))) | |
88 | #define f20b(a,b,c,d) (((a&b)|c)^((a^b)&(c|d))) | |
89 | #define f20c(a,b,c,d,e) ((a|((b|e)&(d^e)))^((a^(b&d))&((c^d)|(b&e)))) | |
90 | ||
91 | // bit indexing | |
92 | #define get_bit(n, word) (((word) >> (n)) & 1) | |
93 | #define get_vector_bit(slice, value) get_bit((slice)&0x3f, value.bytes64[(slice)>>6]) | |
94 | ||
95 | // size of crypto-1 state | |
96 | #define STATE_SIZE 48 | |
97 | // size of nonce to be decrypted | |
98 | #define KEYSTREAM_SIZE 24 | |
99 | ||
100 | // endianness conversion | |
101 | #define rev32(word) ((((word) & 0xff) << 24) | ((((word) >> 8) & 0xff) << 16) | ((((word) >> 16) & 0xff) << 8) | ((((word) >> 24) & 0xff))) | |
102 | ||
103 | // this needs to be compiled several times for each instruction set. | |
104 | // For each instruction set, define a dedicated function name: | |
105 | #if defined (__AVX512F__) | |
106 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX512 | |
107 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX512 | |
108 | #elif defined (__AVX2__) | |
109 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX2 | |
110 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX2 | |
111 | #elif defined (__AVX__) | |
112 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_AVX | |
113 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_AVX | |
114 | #elif defined (__SSE2__) | |
115 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_SSE2 | |
116 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_SSE2 | |
117 | #elif defined (__MMX__) | |
118 | #define BITSLICE_TEST_NONCES bitslice_test_nonces_MMX | |
119 | #define CRACK_STATES_BITSLICED crack_states_bitsliced_MMX | |
120 | #endif | |
121 | ||
122 | // typedefs and declaration of functions: | |
123 | typedef const uint64_t crack_states_bitsliced_t(uint32_t, uint8_t*, statelist_t*, uint32_t*, uint64_t*, uint32_t, uint8_t*, noncelist_t*); | |
124 | crack_states_bitsliced_t crack_states_bitsliced_AVX512; | |
125 | crack_states_bitsliced_t crack_states_bitsliced_AVX2; | |
126 | crack_states_bitsliced_t crack_states_bitsliced_AVX; | |
127 | crack_states_bitsliced_t crack_states_bitsliced_SSE2; | |
128 | crack_states_bitsliced_t crack_states_bitsliced_MMX; | |
129 | crack_states_bitsliced_t crack_states_bitsliced_dispatch; | |
130 | ||
131 | typedef void bitslice_test_nonces_t(uint32_t, uint32_t*, uint8_t*); | |
132 | bitslice_test_nonces_t bitslice_test_nonces_AVX512; | |
133 | bitslice_test_nonces_t bitslice_test_nonces_AVX2; | |
134 | bitslice_test_nonces_t bitslice_test_nonces_AVX; | |
135 | bitslice_test_nonces_t bitslice_test_nonces_SSE2; | |
136 | bitslice_test_nonces_t bitslice_test_nonces_MMX; | |
137 | bitslice_test_nonces_t bitslice_test_nonces_dispatch; | |
138 | ||
139 | #ifdef _WIN32 | |
140 | #define malloc_bitslice(x) __builtin_assume_aligned(_aligned_malloc((x), MAX_BITSLICES/8), MAX_BITSLICES/8) | |
141 | #define free_bitslice(x) _aligned_free(x) | |
142 | #else | |
143 | #define malloc_bitslice(x) memalign(MAX_BITSLICES/8, (x)) | |
144 | #define free_bitslice(x) free(x) | |
145 | #endif | |
146 | ||
147 | #if defined (__MMX__) // (including more sophisticated instruction sets) | |
148 | typedef enum { | |
149 | EVEN_STATE = 0, | |
150 | ODD_STATE = 1 | |
151 | } odd_even_t; | |
152 | ||
153 | ||
154 | // arrays of bitsliced states with identical values in all slices | |
155 | static bitslice_t bitsliced_encrypted_nonces[256][KEYSTREAM_SIZE]; | |
156 | static bitslice_t bitsliced_encrypted_parity_bits[256][4]; | |
157 | // 1 and 0 vectors | |
158 | static bitslice_t bs_ones; | |
159 | static bitslice_t bs_zeroes; | |
160 | ||
161 | ||
162 | void BITSLICE_TEST_NONCES(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { | |
163 | ||
164 | // initialize 1 and 0 vectors | |
165 | memset(bs_ones.bytes, 0xff, VECTOR_SIZE); | |
166 | memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE); | |
167 | ||
168 | // bitslice nonces' 2nd to 4th byte | |
169 | for (uint32_t i = 0; i < nonces_to_bruteforce; i++) { | |
170 | for(uint32_t bit_idx = 0; bit_idx < KEYSTREAM_SIZE; bit_idx++){ | |
171 | bool bit = get_bit(KEYSTREAM_SIZE-1-bit_idx, rev32(bf_test_nonce[i] << 8)); | |
172 | if(bit){ | |
173 | bitsliced_encrypted_nonces[i][bit_idx].value = bs_ones.value; | |
174 | } else { | |
175 | bitsliced_encrypted_nonces[i][bit_idx].value = bs_zeroes.value; | |
176 | } | |
177 | } | |
178 | } | |
179 | // bitslice nonces' parity (4 bits) | |
180 | for (uint32_t i = 0; i < nonces_to_bruteforce; i++) { | |
181 | for(uint32_t bit_idx = 0; bit_idx < 4; bit_idx++){ | |
182 | bool bit = get_bit(4-1-bit_idx, bf_test_nonce_par[i]); | |
183 | if(bit){ | |
184 | bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_ones.value; | |
185 | } else { | |
186 | bitsliced_encrypted_parity_bits[i][bit_idx].value = bs_zeroes.value; | |
187 | } | |
188 | } | |
189 | } | |
190 | ||
191 | } | |
192 | ||
193 | ||
194 | const uint64_t CRACK_STATES_BITSLICED(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces){ | |
195 | ||
196 | // Unlike aczid's implementation this doesn't roll back at all when performing bitsliced bruteforce. | |
197 | // We know that the best first byte is already shifted in. Testing with the remaining three bytes of | |
198 | // the nonces is sufficient to eliminate most of them. The small rest is tested with a simple unsliced | |
199 | // brute forcing (including roll back). | |
200 | ||
201 | bitslice_t states[KEYSTREAM_SIZE+STATE_SIZE]; | |
202 | bitslice_t * restrict state_p; | |
203 | uint64_t key = -1; | |
204 | uint64_t bucket_states_tested = 0; | |
205 | uint32_t bucket_size[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1]; | |
206 | uint32_t bitsliced_blocks = 0; | |
207 | uint32_t const *restrict p_even_end = p->states[EVEN_STATE] + p->len[EVEN_STATE]; | |
208 | #if defined (DEBUG_BRUTE_FORCE) | |
209 | uint32_t elimination_step = 0; | |
210 | #define MAX_ELIMINATION_STEP 32 | |
211 | uint64_t keys_eliminated[MAX_ELIMINATION_STEP] = {0}; | |
212 | #endif | |
213 | #ifdef DEBUG_KEY_ELIMINATION | |
214 | bool bucket_contains_test_key[(p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1]; | |
215 | #endif | |
216 | ||
217 | // constant ones/zeroes | |
218 | bitslice_t bs_ones; | |
219 | memset(bs_ones.bytes, 0xff, VECTOR_SIZE); | |
220 | bitslice_t bs_zeroes; | |
221 | memset(bs_zeroes.bytes, 0x00, VECTOR_SIZE); | |
222 | ||
223 | // bitslice all the even states | |
224 | bitslice_t **restrict bitsliced_even_states = (bitslice_t **)malloc(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_t *)); | |
225 | if (bitsliced_even_states == NULL) { | |
226 | printf("Out of memory error in brute_force. Aborting..."); | |
227 | exit(4); | |
228 | } | |
229 | bitslice_value_t *restrict bitsliced_even_feedback = malloc_bitslice(((p->len[EVEN_STATE] - 1)/MAX_BITSLICES + 1) * sizeof(bitslice_value_t)); | |
230 | if (bitsliced_even_feedback == NULL) { | |
231 | printf("Out of memory error in brute_force. Aborting..."); | |
232 | exit(4); | |
233 | } | |
234 | for(uint32_t *restrict p_even = p->states[EVEN_STATE]; p_even < p_even_end; p_even += MAX_BITSLICES){ | |
235 | bitslice_t *restrict lstate_p = malloc_bitslice(STATE_SIZE/2*sizeof(bitslice_t)); | |
236 | if (lstate_p == NULL) { | |
237 | printf("Out of memory error in brute_force. Aborting... \n"); | |
238 | exit(4); | |
239 | } | |
240 | memset(lstate_p, 0x00, STATE_SIZE/2*sizeof(bitslice_t)); // zero even bits | |
241 | // bitslice even half-states | |
242 | const uint32_t max_slices = (p_even_end-p_even) < MAX_BITSLICES ? p_even_end-p_even : MAX_BITSLICES; | |
243 | bucket_size[bitsliced_blocks] = max_slices; | |
244 | #ifdef DEBUG_KEY_ELIMINATION | |
245 | bucket_contains_test_key[bitsliced_blocks] = false; | |
246 | #endif | |
247 | uint32_t slice_idx; | |
248 | for(slice_idx = 0; slice_idx < max_slices; ++slice_idx){ | |
249 | uint32_t e = *(p_even+slice_idx); | |
250 | #ifdef DEBUG_KEY_ELIMINATION | |
251 | if (known_target_key != -1 && e == test_state[EVEN_STATE]) { | |
252 | bucket_contains_test_key[bitsliced_blocks] = true; | |
253 | // printf("bucket %d contains test key even state\n", bitsliced_blocks); | |
254 | // printf("in slice %d\n", slice_idx); | |
255 | } | |
256 | #endif | |
257 | for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){ | |
258 | // set even bits | |
259 | if(e&1){ | |
260 | lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f); | |
261 | } | |
262 | } | |
263 | } | |
264 | // padding with last even state | |
265 | for ( ; slice_idx < MAX_BITSLICES; ++slice_idx) { | |
266 | uint32_t e = *(p_even_end-1); | |
267 | for(uint32_t bit_idx = 0; bit_idx < STATE_SIZE/2; bit_idx++, e >>= 1){ | |
268 | // set even bits | |
269 | if(e&1){ | |
270 | lstate_p[bit_idx].bytes64[slice_idx>>6] |= 1ull << (slice_idx & 0x3f); | |
271 | } | |
272 | } | |
273 | } | |
274 | bitsliced_even_states[bitsliced_blocks] = lstate_p; | |
275 | // bitsliced_even_feedback[bitsliced_blocks] = bs_ones; | |
276 | bitsliced_even_feedback[bitsliced_blocks] = lstate_p[(47- 0)/2].value ^ | |
277 | lstate_p[(47-10)/2].value ^ lstate_p[(47-12)/2].value ^ lstate_p[(47-14)/2].value ^ | |
278 | lstate_p[(47-24)/2].value ^ lstate_p[(47-42)/2].value; | |
279 | bitsliced_blocks++; | |
280 | } | |
281 | // bitslice every odd state to every block of even states | |
282 | for(uint32_t const *restrict p_odd = p->states[ODD_STATE]; p_odd < p->states[ODD_STATE] + p->len[ODD_STATE]; ++p_odd){ | |
283 | // early abort | |
284 | if(*keys_found){ | |
285 | goto out; | |
286 | } | |
287 | ||
288 | // set odd state bits and pre-compute first keystream bit vector. This is the same for all blocks of even states | |
289 | ||
290 | state_p = &states[KEYSTREAM_SIZE]; | |
291 | uint32_t o = *p_odd; | |
292 | ||
293 | // pre-compute the odd feedback bit | |
294 | bool odd_feedback_bit = evenparity32(o&0x29ce5c); | |
295 | const bitslice_value_t odd_feedback = odd_feedback_bit ? bs_ones.value : bs_zeroes.value; | |
296 | ||
297 | // set odd state bits | |
298 | for (uint32_t state_idx = 0; state_idx < STATE_SIZE; o >>= 1, state_idx += 2) { | |
299 | if (o & 1){ | |
300 | state_p[state_idx] = bs_ones; | |
301 | } else { | |
302 | state_p[state_idx] = bs_zeroes; | |
303 | } | |
304 | } | |
305 | ||
306 | bitslice_value_t crypto1_bs_f20b_2[16]; | |
307 | bitslice_value_t crypto1_bs_f20b_3[8]; | |
308 | ||
309 | crypto1_bs_f20b_2[0] = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value); | |
310 | crypto1_bs_f20b_3[0] = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value); | |
311 | ||
312 | bitslice_value_t ksb[8]; | |
313 | ksb[0] = f20c(f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value), | |
314 | f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value), | |
315 | crypto1_bs_f20b_2[0], | |
316 | f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value), | |
317 | crypto1_bs_f20b_3[0]); | |
318 | ||
319 | uint32_t *restrict p_even = p->states[EVEN_STATE]; | |
320 | for (uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx, p_even += MAX_BITSLICES) { | |
321 | ||
322 | #ifdef DEBUG_KEY_ELIMINATION | |
323 | // if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) { | |
324 | // printf("Now testing known target key.\n"); | |
325 | // printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks); | |
326 | // } | |
327 | #endif | |
328 | // add the even state bits | |
329 | const bitslice_t const *restrict bitsliced_even_state = bitsliced_even_states[block_idx]; | |
330 | for(uint32_t state_idx = 1; state_idx < STATE_SIZE; state_idx += 2) { | |
331 | state_p[state_idx] = bitsliced_even_state[state_idx/2]; | |
332 | } | |
333 | ||
334 | // pre-compute first feedback bit vector. This is the same for all nonces | |
335 | bitslice_value_t fbb[8]; | |
336 | fbb[0] = odd_feedback ^ bitsliced_even_feedback[block_idx]; | |
337 | ||
338 | // vector to contain test results (1 = passed, 0 = failed) | |
339 | bitslice_t results = bs_ones; | |
340 | ||
341 | // parity_bits | |
342 | bitslice_value_t par[8]; | |
343 | par[0] = bs_zeroes.value; | |
344 | uint32_t next_common_bits = 0; | |
345 | ||
346 | for(uint32_t tests = 0; tests < nonces_to_bruteforce; ++tests){ | |
347 | // common bits with preceding test nonce | |
348 | uint32_t common_bits = next_common_bits; //tests ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests-1]) : 0; | |
349 | next_common_bits = tests < nonces_to_bruteforce - 1 ? trailing_zeros(bf_test_nonce_2nd_byte[tests] ^ bf_test_nonce_2nd_byte[tests+1]) : 0; | |
350 | uint32_t parity_bit_idx = 1; // start checking with the parity of second nonce byte | |
351 | bitslice_value_t fb_bits = fbb[common_bits]; // start with precomputed feedback bits from previous nonce | |
352 | bitslice_value_t ks_bits = ksb[common_bits]; // dito for first keystream bits | |
353 | bitslice_value_t parity_bit_vector = par[common_bits]; // dito for first parity vector | |
354 | // bitslice_value_t fb_bits = fbb[0]; // start with precomputed feedback bits from previous nonce | |
355 | // bitslice_value_t ks_bits = ksb[0]; // dito for first keystream bits | |
356 | // bitslice_value_t parity_bit_vector = par[0]; // dito for first parity vector | |
357 | state_p -= common_bits; // and reuse the already calculated state bits | |
358 | // highest bit is transmitted/received first. We start with Bit 23 (highest bit of second nonce byte), | |
359 | // or the highest bit which differs from the previous nonce | |
360 | for (int32_t ks_idx = KEYSTREAM_SIZE-1-common_bits; ks_idx >= 0; --ks_idx) { | |
361 | ||
362 | // decrypt nonce bits | |
363 | const bitslice_value_t encrypted_nonce_bit_vector = bitsliced_encrypted_nonces[tests][ks_idx].value; | |
364 | const bitslice_value_t decrypted_nonce_bit_vector = encrypted_nonce_bit_vector ^ ks_bits; | |
365 | ||
366 | // compute real parity bits on the fly | |
367 | parity_bit_vector ^= decrypted_nonce_bit_vector; | |
368 | ||
369 | // update state | |
370 | state_p--; | |
371 | state_p[0].value = fb_bits ^ decrypted_nonce_bit_vector; | |
372 | ||
373 | // update crypto1 subfunctions | |
374 | bitslice_value_t f20a_1, f20b_1, f20b_2, f20a_2, f20b_3; | |
375 | f20a_2 = f20a(state_p[47-33].value, state_p[47-35].value, state_p[47-37].value, state_p[47-39].value); | |
376 | f20b_3 = f20b(state_p[47-41].value, state_p[47-43].value, state_p[47-45].value, state_p[47-47].value); | |
377 | if (ks_idx > KEYSTREAM_SIZE - 8) { | |
378 | f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value); | |
379 | f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value); | |
380 | f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value); | |
381 | crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2; | |
382 | crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx] = f20b_3; | |
383 | } else if (ks_idx > KEYSTREAM_SIZE - 16) { | |
384 | f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value); | |
385 | f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8]; | |
386 | f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value); | |
387 | crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx] = f20b_2; | |
388 | } else if (ks_idx > KEYSTREAM_SIZE - 24){ | |
389 | f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value); | |
390 | f20b_1 = crypto1_bs_f20b_2[KEYSTREAM_SIZE - ks_idx - 8]; | |
391 | f20b_2 = crypto1_bs_f20b_3[KEYSTREAM_SIZE - ks_idx - 16]; | |
392 | } else { | |
393 | f20a_1 = f20a(state_p[47- 9].value, state_p[47-11].value, state_p[47-13].value, state_p[47-15].value); | |
394 | f20b_1 = f20b(state_p[47-17].value, state_p[47-19].value, state_p[47-21].value, state_p[47-23].value); | |
395 | f20b_2 = f20b(state_p[47-25].value, state_p[47-27].value, state_p[47-29].value, state_p[47-31].value); | |
396 | } | |
397 | // update keystream bit | |
398 | ks_bits = f20c(f20a_1, f20b_1, f20b_2, f20a_2, f20b_3); | |
399 | ||
400 | // for each completed byte: | |
401 | if ((ks_idx & 0x07) == 0) { | |
402 | // get encrypted parity bits | |
403 | const bitslice_value_t encrypted_parity_bit_vector = bitsliced_encrypted_parity_bits[tests][parity_bit_idx++].value; | |
404 | ||
405 | // decrypt parity bits | |
406 | const bitslice_value_t decrypted_parity_bit_vector = encrypted_parity_bit_vector ^ ks_bits; | |
407 | ||
408 | // compare actual parity bits with decrypted parity bits and take count in results vector | |
409 | results.value &= ~parity_bit_vector ^ decrypted_parity_bit_vector; | |
410 | ||
411 | // make sure we still have a match in our set | |
412 | // if(memcmp(&results, &bs_zeroes, sizeof(bitslice_t)) == 0){ | |
413 | ||
414 | // this is much faster on my gcc, because somehow a memcmp needlessly spills/fills all the xmm registers to/from the stack - ??? | |
415 | // the short-circuiting also helps | |
416 | if(results.bytes64[0] == 0 | |
417 | #if MAX_BITSLICES > 64 | |
418 | && results.bytes64[1] == 0 | |
419 | #endif | |
420 | #if MAX_BITSLICES > 128 | |
421 | && results.bytes64[2] == 0 | |
422 | && results.bytes64[3] == 0 | |
423 | #endif | |
424 | ) { | |
425 | #if defined (DEBUG_BRUTE_FORCE) | |
426 | if (elimination_step < MAX_ELIMINATION_STEP) { | |
427 | keys_eliminated[elimination_step] += MAX_BITSLICES; | |
428 | } | |
429 | #endif | |
430 | #ifdef DEBUG_KEY_ELIMINATION | |
431 | if (known_target_key != -1 && bucket_contains_test_key[block_idx] && *p_odd == test_state[ODD_STATE]) { | |
432 | printf("Known target key eliminated in brute_force.\n"); | |
433 | printf("block_idx = %d/%d, nonce = %d/%d\n", block_idx, bitsliced_blocks, tests, nonces_to_bruteforce); | |
434 | } | |
435 | #endif | |
436 | goto stop_tests; | |
437 | } | |
438 | // prepare for next nonce byte | |
439 | #if defined (DEBUG_BRUTE_FORCE) | |
440 | elimination_step++; | |
441 | #endif | |
442 | parity_bit_vector = bs_zeroes.value; | |
443 | } | |
444 | // update feedback bit vector | |
445 | if (ks_idx != 0) { | |
446 | fb_bits = | |
447 | (state_p[47- 0].value ^ state_p[47- 5].value ^ state_p[47- 9].value ^ | |
448 | state_p[47-10].value ^ state_p[47-12].value ^ state_p[47-14].value ^ | |
449 | state_p[47-15].value ^ state_p[47-17].value ^ state_p[47-19].value ^ | |
450 | state_p[47-24].value ^ state_p[47-25].value ^ state_p[47-27].value ^ | |
451 | state_p[47-29].value ^ state_p[47-35].value ^ state_p[47-39].value ^ | |
452 | state_p[47-41].value ^ state_p[47-42].value ^ state_p[47-43].value); | |
453 | } | |
454 | // remember feedback and keystream vectors for later use | |
455 | uint8_t bit = KEYSTREAM_SIZE - ks_idx; | |
456 | if (bit <= next_common_bits) { // if needed and not yet stored | |
457 | fbb[bit] = fb_bits; | |
458 | ksb[bit] = ks_bits; | |
459 | par[bit] = parity_bit_vector; | |
460 | } | |
461 | } | |
462 | // prepare for next nonce. Revert to initial state | |
463 | state_p = &states[KEYSTREAM_SIZE]; | |
464 | } | |
465 | ||
466 | // all nonce tests were successful: we've found a possible key in this block! | |
467 | uint32_t *p_even_test = p_even; | |
468 | for (uint32_t results_word = 0; results_word < MAX_BITSLICES / 64; ++results_word) { | |
469 | uint64_t results64 = results.bytes64[results_word]; | |
470 | for (uint32_t results_bit = 0; results_bit < 64; results_bit++) { | |
471 | if (results64 & 0x01) { | |
472 | if (verify_key(cuid, nonces, best_first_bytes, *p_odd, *p_even_test)) { | |
473 | struct Crypto1State pcs; | |
474 | pcs.odd = *p_odd; | |
475 | pcs.even = *p_even_test; | |
476 | lfsr_rollback_byte(&pcs, (cuid >> 24) ^ best_first_bytes[0], true); | |
477 | crypto1_get_lfsr(&pcs, &key); | |
478 | bucket_states_tested += 64 * results_word + results_bit; | |
479 | goto out; | |
480 | } | |
481 | #ifdef DEBUG_KEY_ELIMINATION | |
482 | if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) { | |
483 | printf("Known target key eliminated in brute_force verification.\n"); | |
484 | printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks); | |
485 | } | |
486 | #endif | |
487 | } | |
488 | #ifdef DEBUG_KEY_ELIMINATION | |
489 | if (known_target_key != -1 && *p_even_test == test_state[EVEN_STATE] && *p_odd == test_state[ODD_STATE]) { | |
490 | printf("Known target key eliminated in brute_force (results_bit == 0).\n"); | |
491 | printf("block_idx = %d/%d\n", block_idx, bitsliced_blocks); | |
492 | } | |
493 | #endif | |
494 | results64 >>= 1; | |
495 | p_even_test++; | |
496 | if (p_even_test == p_even_end) { | |
497 | goto stop_tests; | |
498 | } | |
499 | } | |
500 | } | |
501 | stop_tests: | |
502 | #if defined (DEBUG_BRUTE_FORCE) | |
503 | elimination_step = 0; | |
504 | #endif | |
505 | bucket_states_tested += bucket_size[block_idx]; | |
506 | // prepare to set new states | |
507 | state_p = &states[KEYSTREAM_SIZE]; | |
508 | continue; | |
509 | } | |
510 | } | |
511 | out: | |
512 | for(uint32_t block_idx = 0; block_idx < bitsliced_blocks; ++block_idx){ | |
513 | free_bitslice(bitsliced_even_states[block_idx]); | |
514 | } | |
515 | free(bitsliced_even_states); | |
516 | free_bitslice(bitsliced_even_feedback); | |
517 | __sync_fetch_and_add(num_keys_tested, bucket_states_tested); | |
518 | ||
519 | #if defined (DEBUG_BRUTE_FORCE) | |
520 | for (uint32_t i = 0; i < MAX_ELIMINATION_STEP; i++) { | |
521 | printf("Eliminated after %2u test_bytes: %5.2f%%\n", i+1, (float)keys_eliminated[i] / bucket_states_tested * 100); | |
522 | } | |
523 | #endif | |
524 | return key; | |
525 | } | |
526 | #endif | |
527 | ||
528 | ||
529 | #ifndef __MMX__ | |
530 | ||
531 | // pointers to functions: | |
532 | crack_states_bitsliced_t *crack_states_bitsliced_function_p = &crack_states_bitsliced_dispatch; | |
533 | bitslice_test_nonces_t *bitslice_test_nonces_function_p = &bitslice_test_nonces_dispatch; | |
534 | ||
535 | // determine the available instruction set at runtime and call the correct function | |
536 | const uint64_t crack_states_bitsliced_dispatch(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { | |
f950ce1c | 537 | #if (__GNUC__ > 4) |
c48c4d78 | 538 | if (__builtin_cpu_supports("avx512f")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX512; |
539 | else if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; | |
f950ce1c | 540 | #else |
541 | if (__builtin_cpu_supports("avx2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX2; | |
542 | #endif | |
c48c4d78 | 543 | else if (__builtin_cpu_supports("avx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_AVX; |
544 | else if (__builtin_cpu_supports("sse2")) crack_states_bitsliced_function_p = &crack_states_bitsliced_SSE2; | |
545 | else if (__builtin_cpu_supports("mmx")) crack_states_bitsliced_function_p = &crack_states_bitsliced_MMX; | |
546 | else { | |
547 | printf("\nFatal: you need at least a CPU with MMX instruction set support. Aborting...\n"); | |
548 | exit(5); | |
549 | } | |
550 | // call the most optimized function for this CPU | |
551 | return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces); | |
552 | } | |
553 | ||
554 | void bitslice_test_nonces_dispatch(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { | |
f950ce1c | 555 | #if (__GNUC__ > 4) |
c48c4d78 | 556 | if (__builtin_cpu_supports("avx512f")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX512; |
557 | else if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; | |
f950ce1c | 558 | #else |
559 | if (__builtin_cpu_supports("avx2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX2; | |
560 | #endif | |
c48c4d78 | 561 | else if (__builtin_cpu_supports("avx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_AVX; |
562 | else if (__builtin_cpu_supports("sse2")) bitslice_test_nonces_function_p = &bitslice_test_nonces_SSE2; | |
563 | else if (__builtin_cpu_supports("mmx")) bitslice_test_nonces_function_p = &bitslice_test_nonces_MMX; | |
564 | else { | |
565 | printf("\nFatal: you need at least a CPU with MMX instruction set support. Aborting...\n"); | |
566 | exit(5); | |
567 | } | |
568 | // call the most optimized function for this CPU | |
569 | (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par); | |
570 | } | |
571 | ||
572 | // Entries to dispatched function calls | |
573 | const uint64_t crack_states_bitsliced(uint32_t cuid, uint8_t *best_first_bytes, statelist_t *p, uint32_t *keys_found, uint64_t *num_keys_tested, uint32_t nonces_to_bruteforce, uint8_t *bf_test_nonce_2nd_byte, noncelist_t *nonces) { | |
574 | return (*crack_states_bitsliced_function_p)(cuid, best_first_bytes, p, keys_found, num_keys_tested, nonces_to_bruteforce, bf_test_nonce_2nd_byte, nonces); | |
575 | } | |
576 | ||
577 | void bitslice_test_nonces(uint32_t nonces_to_bruteforce, uint32_t *bf_test_nonce, uint8_t *bf_test_nonce_par) { | |
578 | (*bitslice_test_nonces_function_p)(nonces_to_bruteforce, bf_test_nonce, bf_test_nonce_par); | |
579 | } | |
580 | ||
581 | #endif |