iclass.c: speeding up MAC calculation
[proxmark3-svn] / armsrc / optimized_cipher.c
1 /*****************************************************************************
2 * WARNING
3 *
4 * THIS CODE IS CREATED FOR EXPERIMENTATION AND EDUCATIONAL USE ONLY.
5 *
6 * USAGE OF THIS CODE IN OTHER WAYS MAY INFRINGE UPON THE INTELLECTUAL
7 * PROPERTY OF OTHER PARTIES, SUCH AS INSIDE SECURE AND HID GLOBAL,
8 * AND MAY EXPOSE YOU TO AN INFRINGEMENT ACTION FROM THOSE PARTIES.
9 *
10 * THIS CODE SHOULD NEVER BE USED TO INFRINGE PATENTS OR INTELLECTUAL PROPERTY RIGHTS.
11 *
12 *****************************************************************************
13 *
14 * This file is part of loclass. It is a reconstructon of the cipher engine
15 * used in iClass, and RFID techology.
16 *
17 * The implementation is based on the work performed by
18 * Flavio D. Garcia, Gerhard de Koning Gans, Roel Verdult and
19 * Milosch Meriac in the paper "Dismantling IClass".
20 *
21 * Copyright (C) 2014 Martin Holst Swende
22 *
23 * This is free software: you can redistribute it and/or modify
24 * it under the terms of the GNU General Public License version 2 as published
25 * by the Free Software Foundation, or, at your option, any later version.
26 *
27 * This file is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
31 *
32 * You should have received a copy of the GNU General Public License
33 * along with loclass. If not, see <http://www.gnu.org/licenses/>.
34 *
35 *
36 *
37 ****************************************************************************/
38
39 /**
40
41 This file contains an optimized version of the MAC-calculation algorithm. Some measurements on
42 a std laptop showed it runs in about 1/3 of the time:
43
44 Std: 0.428962
45 Opt: 0.151609
46
47 Additionally, it is self-reliant, not requiring e.g. bitstreams from the cipherutils, thus can
48 be easily dropped into a code base.
49
50 The optimizations have been performed in the following steps:
51 * Parameters passed by reference instead of by value.
52 * Iteration instead of recursion, un-nesting recursive loops into for-loops.
53 * Handling of bytes instead of individual bits, for less shuffling and masking
54 * Less creation of "objects", structs, and instead reuse of alloc:ed memory
55 * Inlining some functions via #define:s
56
57 As a consequence, this implementation is less generic. Also, I haven't bothered documenting this.
58 For a thorough documentation, check out the MAC-calculation within cipher.c instead.
59
60 -- MHS 2015
61 **/
62
63 /**
64
65 The runtime of opt_doTagMAC_2() with the MHS optimized version was 403 microseconds on Proxmark3.
66 This was still to slow for some newer readers which didn't want to wait that long.
67
68 Further optimizations to speedup the MAC calculations:
69 * Optimized opt_Tt logic
70 * Look up table for opt_select
71 * Removing many unnecessary bit maskings (& 0x1)
72 * updating state in place instead of alternating use of a second state structure
73 * remove the necessity to reverse bits of input and output bytes
74
75 opt_doTagMAC_2() now completes in 270 microseconds.
76
77 -- piwi 2019
78 **/
79
80 #include "optimized_cipher.h"
81 #include <stddef.h>
82 #include <stdbool.h>
83 #include <stdint.h>
84 #include "string.h"
85
86 static const uint8_t opt_select_LUT[256] = {
87 00, 03, 02, 01, 02, 03, 00, 01, 04, 07, 07, 04, 06, 07, 05, 04,
88 01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04,
89 06, 05, 04, 07, 04, 05, 06, 07, 06, 05, 05, 06, 04, 05, 07, 06,
90 07, 04, 05, 06, 04, 05, 06, 07, 07, 04, 04, 07, 04, 05, 07, 06,
91 06, 05, 04, 07, 04, 05, 06, 07, 02, 01, 01, 02, 00, 01, 03, 02,
92 03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06,
93 00, 03, 02, 01, 02, 03, 00, 01, 00, 03, 03, 00, 02, 03, 01, 00,
94 05, 06, 07, 04, 06, 07, 04, 05, 05, 06, 06, 05, 06, 07, 05, 04,
95 02, 01, 00, 03, 00, 01, 02, 03, 06, 05, 05, 06, 04, 05, 07, 06,
96 03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06,
97 02, 01, 00, 03, 00, 01, 02, 03, 02, 01, 01, 02, 00, 01, 03, 02,
98 03, 00, 01, 02, 00, 01, 02, 03, 03, 00, 00, 03, 00, 01, 03, 02,
99 04, 07, 06, 05, 06, 07, 04, 05, 00, 03, 03, 00, 02, 03, 01, 00,
100 01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04,
101 04, 07, 06, 05, 06, 07, 04, 05, 04, 07, 07, 04, 06, 07, 05, 04,
102 01, 02, 03, 00, 02, 03, 00, 01, 01, 02, 02, 01, 02, 03, 01, 00
103 };
104
105 /********************** the table above has been generated with this code: ********
106 #include "util.h"
107 static void init_opt_select_LUT(void) {
108 for (int r = 0; r < 256; r++) {
109 uint8_t r_ls2 = r << 2;
110 uint8_t r_and_ls2 = r & r_ls2;
111 uint8_t r_or_ls2 = r | r_ls2;
112 uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3);
113 uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r;
114 uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r;
115 opt_select_LUT[r] = (z0 & 4) | (z1 & 2) | (z2 & 1);
116 }
117 print_result("", opt_select_LUT, 256);
118 }
119 ***********************************************************************************/
120
121 #define opt__select(x,y,r) (4 & (((r & (r << 2)) >> 5) ^ ((r & ~(r << 2)) >> 4) ^ ( (r | r << 2) >> 3)))\
122 |(2 & (((r | r << 2) >> 6) ^ ( (r | r << 2) >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1)))\
123 |(1 & (((r & ~(r << 2)) >> 4) ^ ((r & (r << 2)) >> 3) ^ r ^ x))
124
125 /*
126 * Some background on the expression above can be found here...
127 uint8_t xopt__select(bool x, bool y, uint8_t r)
128 {
129
130 //r: r0 r1 r2 r3 r4 r5 r6 r7
131 //r_ls2: r2 r3 r4 r5 r6 r7 0 0
132 // z0
133 // z1
134
135 // uint8_t z0 = (r0 & r2) ^ (r1 & ~r3) ^ (r2 | r4); // <-- original
136 uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3);
137
138 // uint8_t z1 = (r0 | r2) ^ ( r5 | r7) ^ r1 ^ r6 ^ x ^ y; // <-- original
139 uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1);
140
141 // uint8_t z2 = (r3 & ~r5) ^ (r4 & r6 ) ^ r7 ^ x; // <-- original
142 uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r ^ x;
143
144 return (z0 & 4) | (z1 & 2) | (z2 & 1);
145 }
146 */
147
148 static void opt_successor(const uint8_t *k, State *s, uint8_t y) {
149 // #define opt_T(s) (0x1 & ((s->t >> 15) ^ (s->t >> 14) ^ (s->t >> 10) ^ (s->t >> 8) ^ (s->t >> 5) ^ (s->t >> 4)^ (s->t >> 1) ^ s->t))
150 // uint8_t Tt = opt_T(s);
151 uint16_t Tt = s->t & 0xc533;
152 Tt = Tt ^ (Tt >> 1);
153 Tt = Tt ^ (Tt >> 4);
154 Tt = Tt ^ (Tt >> 10);
155 Tt = Tt ^ (Tt >> 8);
156
157 s->t = (s->t >> 1);
158 s->t |= (Tt ^ (s->r >> 7) ^ (s->r >> 3)) << 15;
159
160 uint8_t opt_B = s->b;
161 opt_B ^= s->b >> 6;
162 opt_B ^= s->b >> 5;
163 opt_B ^= s->b >> 4;
164
165 s->b = s->b >> 1;
166 s->b |= (opt_B ^ s->r) << 7;
167
168 uint8_t opt_select = opt_select_LUT[s->r] & 0x04;
169 opt_select |= (opt_select_LUT[s->r] ^ ((Tt ^ y) << 1)) & 0x02;
170 opt_select |= (opt_select_LUT[s->r] ^ Tt) & 0x01;
171
172 uint8_t r = s->r;
173 s->r = (k[opt_select] ^ s->b) + s->l ;
174 s->l = s->r + r;
175 }
176
177 static void opt_suc(const uint8_t *k, State *s, uint8_t *in, uint8_t length, bool add32Zeroes) {
178 for (int i = 0; i < length; i++) {
179 uint8_t head;
180 head = in[i];
181 opt_successor(k, s, head);
182
183 head >>= 1;
184 opt_successor(k, s, head);
185
186 head >>= 1;
187 opt_successor(k, s, head);
188
189 head >>= 1;
190 opt_successor(k, s, head);
191
192 head >>= 1;
193 opt_successor(k, s, head);
194
195 head >>= 1;
196 opt_successor(k, s, head);
197
198 head >>= 1;
199 opt_successor(k, s, head);
200
201 head >>= 1;
202 opt_successor(k, s, head);
203 }
204 //For tag MAC, an additional 32 zeroes
205 if (add32Zeroes) {
206 for(int i = 0; i < 16; i++) {
207 opt_successor(k, s, 0);
208 opt_successor(k, s, 0);
209 }
210 }
211 }
212
213 static void opt_output(const uint8_t *k, State *s, uint8_t *buffer) {
214 for (uint8_t times = 0; times < 4; times++) {
215 uint8_t bout = 0;
216 bout |= (s->r & 0x4) >> 2;
217 opt_successor(k, s, 0);
218 bout |= (s->r & 0x4) >> 1;
219 opt_successor(k, s, 0);
220 bout |= (s->r & 0x4);
221 opt_successor(k, s, 0);
222 bout |= (s->r & 0x4) << 1;
223 opt_successor(k, s, 0);
224 bout |= (s->r & 0x4) << 2;
225 opt_successor(k, s, 0);
226 bout |= (s->r & 0x4) << 3;
227 opt_successor(k, s, 0);
228 bout |= (s->r & 0x4) << 4;
229 opt_successor(k, s, 0);
230 bout |= (s->r & 0x4) << 5;
231 opt_successor(k, s, 0);
232 buffer[times] = bout;
233 }
234 }
235
236 static void opt_MAC(uint8_t *k, uint8_t *input, uint8_t *out) {
237 State _init = {
238 ((k[0] ^ 0x4c) + 0xEC) & 0xFF,// l
239 ((k[0] ^ 0x4c) + 0x21) & 0xFF,// r
240 0x4c, // b
241 0xE012 // t
242 };
243
244 opt_suc(k, &_init, input, 12, false);
245 //printf("\noutp ");
246 opt_output(k, &_init, out);
247 }
248
249 void opt_doReaderMAC(uint8_t *cc_nr_p, uint8_t *div_key_p, uint8_t mac[4]) {
250 uint8_t dest[] = {0, 0, 0, 0, 0, 0, 0, 0};
251 opt_MAC(div_key_p, cc_nr_p, dest);
252 memcpy(mac, dest, 4);
253 return;
254 }
255
256 void opt_doTagMAC(uint8_t *cc_p, const uint8_t *div_key_p, uint8_t mac[4]) {
257 State _init = {
258 ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l
259 ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r
260 0x4c, // b
261 0xE012 // t
262 };
263 opt_suc(div_key_p, &_init, cc_p, 12, true);
264 opt_output(div_key_p, &_init, mac);
265 return;
266 }
267
268 /**
269 * The tag MAC can be divided (both can, but no point in dividing the reader mac) into
270 * two functions, since the first 8 bytes are known, we can pre-calculate the state
271 * reached after feeding CC to the cipher.
272 * @param cc_p
273 * @param div_key_p
274 * @return the cipher state
275 */
276 State opt_doTagMAC_1(uint8_t *cc_p, const uint8_t *div_key_p) {
277 State _init = {
278 ((div_key_p[0] ^ 0x4c) + 0xEC) & 0xFF,// l
279 ((div_key_p[0] ^ 0x4c) + 0x21) & 0xFF,// r
280 0x4c, // b
281 0xE012 // t
282 };
283 opt_suc(div_key_p, &_init, cc_p, 8, false);
284 return _init;
285 }
286
287 /**
288 * The second part of the tag MAC calculation, since the CC is already calculated into the state,
289 * this function is fed only the NR, and internally feeds the remaining 32 0-bits to generate the tag
290 * MAC response.
291 * @param _init - precalculated cipher state
292 * @param nr - the reader challenge
293 * @param mac - where to store the MAC
294 * @param div_key_p - the key to use
295 */
296 void opt_doTagMAC_2(State _init, uint8_t *nr, uint8_t mac[4], const uint8_t *div_key_p) {
297 opt_suc(div_key_p, &_init, nr, 4, true);
298 opt_output(div_key_p, &_init, mac);
299 return;
300 }
Impressum, Datenschutz