]>
git.zerfleddert.de Git - proxmark3-svn/blob - armsrc/optimized_cipher.c
1 /*****************************************************************************
4 * THIS CODE IS CREATED FOR EXPERIMENTATION AND EDUCATIONAL USE ONLY.
6 * USAGE OF THIS CODE IN OTHER WAYS MAY INFRINGE UPON THE INTELLECTUAL
7 * PROPERTY OF OTHER PARTIES, SUCH AS INSIDE SECURE AND HID GLOBAL,
8 * AND MAY EXPOSE YOU TO AN INFRINGEMENT ACTION FROM THOSE PARTIES.
10 * THIS CODE SHOULD NEVER BE USED TO INFRINGE PATENTS OR INTELLECTUAL PROPERTY RIGHTS.
12 *****************************************************************************
14 * This file is part of loclass. It is a reconstructon of the cipher engine
15 * used in iClass, and RFID techology.
17 * The implementation is based on the work performed by
18 * Flavio D. Garcia, Gerhard de Koning Gans, Roel Verdult and
19 * Milosch Meriac in the paper "Dismantling IClass".
21 * Copyright (C) 2014 Martin Holst Swende
23 * This is free software: you can redistribute it and/or modify
24 * it under the terms of the GNU General Public License version 2 as published
25 * by the Free Software Foundation, or, at your option, any later version.
27 * This file is distributed in the hope that it will be useful,
28 * but WITHOUT ANY WARRANTY; without even the implied warranty of
29 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30 * GNU General Public License for more details.
32 * You should have received a copy of the GNU General Public License
33 * along with loclass. If not, see <http://www.gnu.org/licenses/>.
37 ****************************************************************************/
41 This file contains an optimized version of the MAC-calculation algorithm. Some measurements on
42 a std laptop showed it runs in about 1/3 of the time:
47 Additionally, it is self-reliant, not requiring e.g. bitstreams from the cipherutils, thus can
48 be easily dropped into a code base.
50 The optimizations have been performed in the following steps:
51 * Parameters passed by reference instead of by value.
52 * Iteration instead of recursion, un-nesting recursive loops into for-loops.
53 * Handling of bytes instead of individual bits, for less shuffling and masking
54 * Less creation of "objects", structs, and instead reuse of alloc:ed memory
55 * Inlining some functions via #define:s
57 As a consequence, this implementation is less generic. Also, I haven't bothered documenting this.
58 For a thorough documentation, check out the MAC-calculation within cipher.c instead.
65 The runtime of opt_doTagMAC_2() with the MHS optimized version was 403 microseconds on Proxmark3.
66 This was still to slow for some newer readers which didn't want to wait that long.
68 Further optimizations to speedup the MAC calculations:
69 * Optimized opt_Tt logic
70 * Look up table for opt_select
71 * Removing many unnecessary bit maskings (& 0x1)
72 * updating state in place instead of alternating use of a second state structure
73 * remove the necessity to reverse bits of input and output bytes
75 opt_doTagMAC_2() now completes in 270 microseconds.
80 #include "optimized_cipher.h"
86 static const uint8_t opt_select_LUT
[256] = {
87 00, 03, 02, 01, 02, 03, 00, 01, 04, 07, 07, 04, 06, 07, 05, 04,
88 01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04,
89 06, 05, 04, 07, 04, 05, 06, 07, 06, 05, 05, 06, 04, 05, 07, 06,
90 07, 04, 05, 06, 04, 05, 06, 07, 07, 04, 04, 07, 04, 05, 07, 06,
91 06, 05, 04, 07, 04, 05, 06, 07, 02, 01, 01, 02, 00, 01, 03, 02,
92 03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06,
93 00, 03, 02, 01, 02, 03, 00, 01, 00, 03, 03, 00, 02, 03, 01, 00,
94 05, 06, 07, 04, 06, 07, 04, 05, 05, 06, 06, 05, 06, 07, 05, 04,
95 02, 01, 00, 03, 00, 01, 02, 03, 06, 05, 05, 06, 04, 05, 07, 06,
96 03, 00, 01, 02, 00, 01, 02, 03, 07, 04, 04, 07, 04, 05, 07, 06,
97 02, 01, 00, 03, 00, 01, 02, 03, 02, 01, 01, 02, 00, 01, 03, 02,
98 03, 00, 01, 02, 00, 01, 02, 03, 03, 00, 00, 03, 00, 01, 03, 02,
99 04, 07, 06, 05, 06, 07, 04, 05, 00, 03, 03, 00, 02, 03, 01, 00,
100 01, 02, 03, 00, 02, 03, 00, 01, 05, 06, 06, 05, 06, 07, 05, 04,
101 04, 07, 06, 05, 06, 07, 04, 05, 04, 07, 07, 04, 06, 07, 05, 04,
102 01, 02, 03, 00, 02, 03, 00, 01, 01, 02, 02, 01, 02, 03, 01, 00
105 /********************** the table above has been generated with this code: ********
107 static void init_opt_select_LUT(void) {
108 for (int r = 0; r < 256; r++) {
109 uint8_t r_ls2 = r << 2;
110 uint8_t r_and_ls2 = r & r_ls2;
111 uint8_t r_or_ls2 = r | r_ls2;
112 uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3);
113 uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r;
114 uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r;
115 opt_select_LUT[r] = (z0 & 4) | (z1 & 2) | (z2 & 1);
117 print_result("", opt_select_LUT, 256);
119 ***********************************************************************************/
121 #define opt__select(x,y,r) (4 & (((r & (r << 2)) >> 5) ^ ((r & ~(r << 2)) >> 4) ^ ( (r | r << 2) >> 3)))\
122 |(2 & (((r | r << 2) >> 6) ^ ( (r | r << 2) >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1)))\
123 |(1 & (((r & ~(r << 2)) >> 4) ^ ((r & (r << 2)) >> 3) ^ r ^ x))
126 * Some background on the expression above can be found here...
127 uint8_t xopt__select(bool x, bool y, uint8_t r)
130 //r: r0 r1 r2 r3 r4 r5 r6 r7
131 //r_ls2: r2 r3 r4 r5 r6 r7 0 0
135 // uint8_t z0 = (r0 & r2) ^ (r1 & ~r3) ^ (r2 | r4); // <-- original
136 uint8_t z0 = (r_and_ls2 >> 5) ^ ((r & ~r_ls2) >> 4) ^ ( r_or_ls2 >> 3);
138 // uint8_t z1 = (r0 | r2) ^ ( r5 | r7) ^ r1 ^ r6 ^ x ^ y; // <-- original
139 uint8_t z1 = (r_or_ls2 >> 6) ^ ( r_or_ls2 >> 1) ^ (r >> 5) ^ r ^ ((x^y) << 1);
141 // uint8_t z2 = (r3 & ~r5) ^ (r4 & r6 ) ^ r7 ^ x; // <-- original
142 uint8_t z2 = ((r & ~r_ls2) >> 4) ^ (r_and_ls2 >> 3) ^ r ^ x;
144 return (z0 & 4) | (z1 & 2) | (z2 & 1);
148 static void opt_successor(const uint8_t *k
, State
*s
, uint8_t y
) {
149 // #define opt_T(s) (0x1 & ((s->t >> 15) ^ (s->t >> 14) ^ (s->t >> 10) ^ (s->t >> 8) ^ (s->t >> 5) ^ (s->t >> 4)^ (s->t >> 1) ^ s->t))
150 // uint8_t Tt = opt_T(s);
151 uint16_t Tt
= s
->t
& 0xc533;
154 Tt
= Tt
^ (Tt
>> 10);
158 s
->t
|= (Tt
^ (s
->r
>> 7) ^ (s
->r
>> 3)) << 15;
160 uint8_t opt_B
= s
->b
;
166 s
->b
|= (opt_B
^ s
->r
) << 7;
168 uint8_t opt_select
= opt_select_LUT
[s
->r
] & 0x04;
169 opt_select
|= (opt_select_LUT
[s
->r
] ^ ((Tt
^ y
) << 1)) & 0x02;
170 opt_select
|= (opt_select_LUT
[s
->r
] ^ Tt
) & 0x01;
173 s
->r
= (k
[opt_select
] ^ s
->b
) + s
->l
;
177 static void opt_suc(const uint8_t *k
, State
*s
, uint8_t *in
, uint8_t length
, bool add32Zeroes
) {
178 for (int i
= 0; i
< length
; i
++) {
181 opt_successor(k
, s
, head
);
184 opt_successor(k
, s
, head
);
187 opt_successor(k
, s
, head
);
190 opt_successor(k
, s
, head
);
193 opt_successor(k
, s
, head
);
196 opt_successor(k
, s
, head
);
199 opt_successor(k
, s
, head
);
202 opt_successor(k
, s
, head
);
204 //For tag MAC, an additional 32 zeroes
206 for(int i
= 0; i
< 16; i
++) {
207 opt_successor(k
, s
, 0);
208 opt_successor(k
, s
, 0);
213 static void opt_output(const uint8_t *k
, State
*s
, uint8_t *buffer
) {
214 for (uint8_t times
= 0; times
< 4; times
++) {
216 bout
|= (s
->r
& 0x4) >> 2;
217 opt_successor(k
, s
, 0);
218 bout
|= (s
->r
& 0x4) >> 1;
219 opt_successor(k
, s
, 0);
220 bout
|= (s
->r
& 0x4);
221 opt_successor(k
, s
, 0);
222 bout
|= (s
->r
& 0x4) << 1;
223 opt_successor(k
, s
, 0);
224 bout
|= (s
->r
& 0x4) << 2;
225 opt_successor(k
, s
, 0);
226 bout
|= (s
->r
& 0x4) << 3;
227 opt_successor(k
, s
, 0);
228 bout
|= (s
->r
& 0x4) << 4;
229 opt_successor(k
, s
, 0);
230 bout
|= (s
->r
& 0x4) << 5;
231 opt_successor(k
, s
, 0);
232 buffer
[times
] = bout
;
236 static void opt_MAC(uint8_t *k
, uint8_t *input
, uint8_t *out
) {
238 ((k
[0] ^ 0x4c) + 0xEC) & 0xFF,// l
239 ((k
[0] ^ 0x4c) + 0x21) & 0xFF,// r
244 opt_suc(k
, &_init
, input
, 12, false);
246 opt_output(k
, &_init
, out
);
249 void opt_doReaderMAC(uint8_t *cc_nr_p
, uint8_t *div_key_p
, uint8_t mac
[4]) {
250 uint8_t dest
[] = {0, 0, 0, 0, 0, 0, 0, 0};
251 opt_MAC(div_key_p
, cc_nr_p
, dest
);
252 memcpy(mac
, dest
, 4);
256 void opt_doTagMAC(uint8_t *cc_p
, const uint8_t *div_key_p
, uint8_t mac
[4]) {
258 ((div_key_p
[0] ^ 0x4c) + 0xEC) & 0xFF,// l
259 ((div_key_p
[0] ^ 0x4c) + 0x21) & 0xFF,// r
263 opt_suc(div_key_p
, &_init
, cc_p
, 12, true);
264 opt_output(div_key_p
, &_init
, mac
);
269 * The tag MAC can be divided (both can, but no point in dividing the reader mac) into
270 * two functions, since the first 8 bytes are known, we can pre-calculate the state
271 * reached after feeding CC to the cipher.
274 * @return the cipher state
276 State
opt_doTagMAC_1(uint8_t *cc_p
, const uint8_t *div_key_p
) {
278 ((div_key_p
[0] ^ 0x4c) + 0xEC) & 0xFF,// l
279 ((div_key_p
[0] ^ 0x4c) + 0x21) & 0xFF,// r
283 opt_suc(div_key_p
, &_init
, cc_p
, 8, false);
288 * The second part of the tag MAC calculation, since the CC is already calculated into the state,
289 * this function is fed only the NR, and internally feeds the remaining 32 0-bits to generate the tag
291 * @param _init - precalculated cipher state
292 * @param nr - the reader challenge
293 * @param mac - where to store the MAC
294 * @param div_key_p - the key to use
296 void opt_doTagMAC_2(State _init
, uint8_t *nr
, uint8_t mac
[4], const uint8_t *div_key_p
) {
297 opt_suc(div_key_p
, &_init
, nr
, 4, true);
298 opt_output(div_key_p
, &_init
, mac
);