]> git.zerfleddert.de Git - proxmark3-svn/blob - client/hardnested/hardnested_bitarray_core.c
320c4a96e7b6001faa7b174c317ca985a02699bf
[proxmark3-svn] / client / hardnested / hardnested_bitarray_core.c
1 //-----------------------------------------------------------------------------
2 // Copyright (C) 2016, 2017 by piwi
3 //
4 // This code is licensed to you under the terms of the GNU GPL, version 2 or,
5 // at your option, any later version. See the LICENSE.txt file for the text of
6 // the license.ch b
7 //-----------------------------------------------------------------------------
8 // Implements a card only attack based on crypto text (encrypted nonces
9 // received during a nested authentication) only. Unlike other card only
10 // attacks this doesn't rely on implementation errors but only on the
11 // inherent weaknesses of the crypto1 cypher. Described in
12 // Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
13 // Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on
14 // Computer and Communications Security, 2015
15 //-----------------------------------------------------------------------------
16 // some helper functions which can benefit from SIMD instructions or other special instructions
17 //
18
19 #include "hardnested_bitarray_core.h"
20
21 #include <stdint.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #ifndef __APPLE__
25 #include <malloc.h>
26 #endif
27
28 // this needs to be compiled several times for each instruction set.
29 // For each instruction set, define a dedicated function name:
30 #if defined (__AVX512F__)
31 #define MALLOC_BITARRAY malloc_bitarray_AVX512
32 #define FREE_BITARRAY free_bitarray_AVX512
33 #define BITCOUNT bitcount_AVX512
34 #define COUNT_STATES count_states_AVX512
35 #define BITARRAY_AND bitarray_AND_AVX512
36 #define BITARRAY_LOW20_AND bitarray_low20_AND_AVX512
37 #define COUNT_BITARRAY_AND count_bitarray_AND_AVX512
38 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX512
39 #define BITARRAY_AND4 bitarray_AND4_AVX512
40 #define BITARRAY_OR bitarray_OR_AVX512
41 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX512
42 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX512
43 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX512
44 #elif defined (__AVX2__)
45 #define MALLOC_BITARRAY malloc_bitarray_AVX2
46 #define FREE_BITARRAY free_bitarray_AVX2
47 #define BITCOUNT bitcount_AVX2
48 #define COUNT_STATES count_states_AVX2
49 #define BITARRAY_AND bitarray_AND_AVX2
50 #define BITARRAY_LOW20_AND bitarray_low20_AND_AVX2
51 #define COUNT_BITARRAY_AND count_bitarray_AND_AVX2
52 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX2
53 #define BITARRAY_AND4 bitarray_AND4_AVX2
54 #define BITARRAY_OR bitarray_OR_AVX2
55 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX2
56 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX2
57 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX2
58 #elif defined (__AVX__)
59 #define MALLOC_BITARRAY malloc_bitarray_AVX
60 #define FREE_BITARRAY free_bitarray_AVX
61 #define BITCOUNT bitcount_AVX
62 #define COUNT_STATES count_states_AVX
63 #define BITARRAY_AND bitarray_AND_AVX
64 #define BITARRAY_LOW20_AND bitarray_low20_AND_AVX
65 #define COUNT_BITARRAY_AND count_bitarray_AND_AVX
66 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX
67 #define BITARRAY_AND4 bitarray_AND4_AVX
68 #define BITARRAY_OR bitarray_OR_AVX
69 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX
70 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX
71 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX
72 #elif defined (__SSE2__)
73 #define MALLOC_BITARRAY malloc_bitarray_SSE2
74 #define FREE_BITARRAY free_bitarray_SSE2
75 #define BITCOUNT bitcount_SSE2
76 #define COUNT_STATES count_states_SSE2
77 #define BITARRAY_AND bitarray_AND_SSE2
78 #define BITARRAY_LOW20_AND bitarray_low20_AND_SSE2
79 #define COUNT_BITARRAY_AND count_bitarray_AND_SSE2
80 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_SSE2
81 #define BITARRAY_AND4 bitarray_AND4_SSE2
82 #define BITARRAY_OR bitarray_OR_SSE2
83 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_SSE2
84 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_SSE2
85 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_SSE2
86 #elif defined (__MMX__)
87 #define MALLOC_BITARRAY malloc_bitarray_MMX
88 #define FREE_BITARRAY free_bitarray_MMX
89 #define BITCOUNT bitcount_MMX
90 #define COUNT_STATES count_states_MMX
91 #define BITARRAY_AND bitarray_AND_MMX
92 #define BITARRAY_LOW20_AND bitarray_low20_AND_MMX
93 #define COUNT_BITARRAY_AND count_bitarray_AND_MMX
94 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_MMX
95 #define BITARRAY_AND4 bitarray_AND4_MMX
96 #define BITARRAY_OR bitarray_OR_MMX
97 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_MMX
98 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_MMX
99 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_MMX
100 #else
101 #define MALLOC_BITARRAY malloc_bitarray_NOSIMD
102 #define FREE_BITARRAY free_bitarray_NOSIMD
103 #define BITCOUNT bitcount_NOSIMD
104 #define COUNT_STATES count_states_NOSIMD
105 #define BITARRAY_AND bitarray_AND_NOSIMD
106 #define BITARRAY_LOW20_AND bitarray_low20_AND_NOSIMD
107 #define COUNT_BITARRAY_AND count_bitarray_AND_NOSIMD
108 #define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_NOSIMD
109 #define BITARRAY_AND4 bitarray_AND4_NOSIMD
110 #define BITARRAY_OR bitarray_OR_NOSIMD
111 #define COUNT_BITARRAY_AND2 count_bitarray_AND2_NOSIMD
112 #define COUNT_BITARRAY_AND3 count_bitarray_AND3_NOSIMD
113 #define COUNT_BITARRAY_AND4 count_bitarray_AND4_NOSIMD
114 #endif
115
116
117 // typedefs and declaration of functions:
118 typedef uint32_t* malloc_bitarray_t(uint32_t);
119 malloc_bitarray_t malloc_bitarray_AVX512, malloc_bitarray_AVX2, malloc_bitarray_AVX, malloc_bitarray_SSE2, malloc_bitarray_MMX, malloc_bitarray_NOSIMD, malloc_bitarray_dispatch;
120 typedef void free_bitarray_t(uint32_t*);
121 free_bitarray_t free_bitarray_AVX512, free_bitarray_AVX2, free_bitarray_AVX, free_bitarray_SSE2, free_bitarray_MMX, free_bitarray_NOSIMD, free_bitarray_dispatch;
122 typedef uint32_t bitcount_t(uint32_t);
123 bitcount_t bitcount_AVX512, bitcount_AVX2, bitcount_AVX, bitcount_SSE2, bitcount_MMX, bitcount_NOSIMD, bitcount_dispatch;
124 typedef uint32_t count_states_t(uint32_t*);
125 count_states_t count_states_AVX512, count_states_AVX2, count_states_AVX, count_states_SSE2, count_states_MMX, count_states_NOSIMD, count_states_dispatch;
126 typedef void bitarray_AND_t(uint32_t[], uint32_t[]);
127 bitarray_AND_t bitarray_AND_AVX512, bitarray_AND_AVX2, bitarray_AND_AVX, bitarray_AND_SSE2, bitarray_AND_MMX, bitarray_AND_NOSIMD, bitarray_AND_dispatch;
128 typedef void bitarray_low20_AND_t(uint32_t*, uint32_t*);
129 bitarray_low20_AND_t bitarray_low20_AND_AVX512, bitarray_low20_AND_AVX2, bitarray_low20_AND_AVX, bitarray_low20_AND_SSE2, bitarray_low20_AND_MMX, bitarray_low20_AND_NOSIMD, bitarray_low20_AND_dispatch;
130 typedef uint32_t count_bitarray_AND_t(uint32_t*, uint32_t*);
131 count_bitarray_AND_t count_bitarray_AND_AVX512, count_bitarray_AND_AVX2, count_bitarray_AND_AVX, count_bitarray_AND_SSE2, count_bitarray_AND_MMX, count_bitarray_AND_NOSIMD, count_bitarray_AND_dispatch;
132 typedef uint32_t count_bitarray_low20_AND_t(uint32_t*, uint32_t*);
133 count_bitarray_low20_AND_t count_bitarray_low20_AND_AVX512, count_bitarray_low20_AND_AVX2, count_bitarray_low20_AND_AVX, count_bitarray_low20_AND_SSE2, count_bitarray_low20_AND_MMX, count_bitarray_low20_AND_NOSIMD, count_bitarray_low20_AND_dispatch;
134 typedef void bitarray_AND4_t(uint32_t*, uint32_t*, uint32_t*, uint32_t*);
135 bitarray_AND4_t bitarray_AND4_AVX512, bitarray_AND4_AVX2, bitarray_AND4_AVX, bitarray_AND4_SSE2, bitarray_AND4_MMX, bitarray_AND4_NOSIMD, bitarray_AND4_dispatch;
136 typedef void bitarray_OR_t(uint32_t[], uint32_t[]);
137 bitarray_OR_t bitarray_OR_AVX512, bitarray_OR_AVX2, bitarray_OR_AVX, bitarray_OR_SSE2, bitarray_OR_MMX, bitarray_OR_NOSIMD, bitarray_OR_dispatch;
138 typedef uint32_t count_bitarray_AND2_t(uint32_t*, uint32_t*);
139 count_bitarray_AND2_t count_bitarray_AND2_AVX512, count_bitarray_AND2_AVX2, count_bitarray_AND2_AVX, count_bitarray_AND2_SSE2, count_bitarray_AND2_MMX, count_bitarray_AND2_NOSIMD, count_bitarray_AND2_dispatch;
140 typedef uint32_t count_bitarray_AND3_t(uint32_t*, uint32_t*, uint32_t*);
141 count_bitarray_AND3_t count_bitarray_AND3_AVX512, count_bitarray_AND3_AVX2, count_bitarray_AND3_AVX, count_bitarray_AND3_SSE2, count_bitarray_AND3_MMX, count_bitarray_AND3_NOSIMD, count_bitarray_AND3_dispatch;
142 typedef uint32_t count_bitarray_AND4_t(uint32_t*, uint32_t*, uint32_t*, uint32_t*);
143 count_bitarray_AND4_t count_bitarray_AND4_AVX512, count_bitarray_AND4_AVX2, count_bitarray_AND4_AVX, count_bitarray_AND4_SSE2, count_bitarray_AND4_MMX, count_bitarray_AND4_NOSIMD, count_bitarray_AND4_dispatch;
144
145
146 inline uint32_t *MALLOC_BITARRAY(uint32_t x)
147 {
148 #if defined (_WIN32)
149 return __builtin_assume_aligned(_aligned_malloc((x), __BIGGEST_ALIGNMENT__), __BIGGEST_ALIGNMENT__);
150 #elif defined (__APPLE__)
151 uint32_t *allocated_memory;
152 if (posix_memalign((void**)&allocated_memory, __BIGGEST_ALIGNMENT__, x)) {
153 return NULL;
154 } else {
155 return __builtin_assume_aligned(allocated_memory, __BIGGEST_ALIGNMENT__);
156 }
157 #else
158 return __builtin_assume_aligned(memalign(__BIGGEST_ALIGNMENT__, (x)), __BIGGEST_ALIGNMENT__);
159 #endif
160 }
161
162
163 inline void FREE_BITARRAY(uint32_t *x)
164 {
165 #ifdef _WIN32
166 _aligned_free(x);
167 #else
168 free(x);
169 #endif
170 }
171
172
173 inline uint32_t BITCOUNT(uint32_t a)
174 {
175 return __builtin_popcountl(a);
176 }
177
178
179 inline uint32_t COUNT_STATES(uint32_t *A)
180 {
181 uint32_t count = 0;
182 for (uint32_t i = 0; i < (1<<19); i++) {
183 count += BITCOUNT(A[i]);
184 }
185 return count;
186 }
187
188
189 inline void BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
190 {
191 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
192 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
193 for (uint32_t i = 0; i < (1<<19); i++) {
194 A[i] &= B[i];
195 }
196 }
197
198
199 inline void BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
200 {
201 uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
202 uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
203
204 for (uint32_t i = 0; i < (1<<20); i++) {
205 if (!b[i]) {
206 a[i] = 0;
207 }
208 }
209 }
210
211
212 inline uint32_t COUNT_BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
213 {
214 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
215 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
216 uint32_t count = 0;
217 for (uint32_t i = 0; i < (1<<19); i++) {
218 A[i] &= B[i];
219 count += BITCOUNT(A[i]);
220 }
221 return count;
222 }
223
224
225 inline uint32_t COUNT_BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
226 {
227 uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
228 uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
229 uint32_t count = 0;
230
231 for (uint32_t i = 0; i < (1<<20); i++) {
232 if (!b[i]) {
233 a[i] = 0;
234 }
235 count += BITCOUNT(a[i]);
236 }
237 return count;
238 }
239
240
241 inline void BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C, uint32_t *restrict D)
242 {
243 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
244 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
245 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
246 D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
247 for (uint32_t i = 0; i < (1<<19); i++) {
248 A[i] = B[i] & C[i] & D[i];
249 }
250 }
251
252
253 inline void BITARRAY_OR(uint32_t *restrict A, uint32_t *restrict B)
254 {
255 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
256 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
257 for (uint32_t i = 0; i < (1<<19); i++) {
258 A[i] |= B[i];
259 }
260 }
261
262
263 inline uint32_t COUNT_BITARRAY_AND2(uint32_t *restrict A, uint32_t *restrict B)
264 {
265 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
266 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
267 uint32_t count = 0;
268 for (uint32_t i = 0; i < (1<<19); i++) {
269 count += BITCOUNT(A[i] & B[i]);
270 }
271 return count;
272 }
273
274
275 inline uint32_t COUNT_BITARRAY_AND3(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C)
276 {
277 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
278 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
279 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
280 uint32_t count = 0;
281 for (uint32_t i = 0; i < (1<<19); i++) {
282 count += BITCOUNT(A[i] & B[i] & C[i]);
283 }
284 return count;
285 }
286
287
288 inline uint32_t COUNT_BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C, uint32_t *restrict D)
289 {
290 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
291 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
292 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
293 D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
294 uint32_t count = 0;
295 for (uint32_t i = 0; i < (1<<19); i++) {
296 count += BITCOUNT(A[i] & B[i] & C[i] & D[i]);
297 }
298 return count;
299 }
300
301
302 #ifndef __MMX__
303
304 // pointers to functions:
305 malloc_bitarray_t *malloc_bitarray_function_p = &malloc_bitarray_dispatch;
306 free_bitarray_t *free_bitarray_function_p = &free_bitarray_dispatch;
307 bitcount_t *bitcount_function_p = &bitcount_dispatch;
308 count_states_t *count_states_function_p = &count_states_dispatch;
309 bitarray_AND_t *bitarray_AND_function_p = &bitarray_AND_dispatch;
310 bitarray_low20_AND_t *bitarray_low20_AND_function_p = &bitarray_low20_AND_dispatch;
311 count_bitarray_AND_t *count_bitarray_AND_function_p = &count_bitarray_AND_dispatch;
312 count_bitarray_low20_AND_t *count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_dispatch;
313 bitarray_AND4_t *bitarray_AND4_function_p = &bitarray_AND4_dispatch;
314 bitarray_OR_t *bitarray_OR_function_p = &bitarray_OR_dispatch;
315 count_bitarray_AND2_t *count_bitarray_AND2_function_p = &count_bitarray_AND2_dispatch;
316 count_bitarray_AND3_t *count_bitarray_AND3_function_p = &count_bitarray_AND3_dispatch;
317 count_bitarray_AND4_t *count_bitarray_AND4_function_p = &count_bitarray_AND4_dispatch;
318
319 // determine the available instruction set at runtime and call the correct function
320 uint32_t *malloc_bitarray_dispatch(uint32_t x) {
321 #if defined (__i386__) || defined (__x86_64__)
322 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
323 if (__builtin_cpu_supports("avx512f")) malloc_bitarray_function_p = &malloc_bitarray_AVX512;
324 else if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
325 #else
326 if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
327 #endif
328 else if (__builtin_cpu_supports("avx")) malloc_bitarray_function_p = &malloc_bitarray_AVX;
329 else if (__builtin_cpu_supports("sse2")) malloc_bitarray_function_p = &malloc_bitarray_SSE2;
330 else if (__builtin_cpu_supports("mmx")) malloc_bitarray_function_p = &malloc_bitarray_MMX;
331 else
332 #endif
333 malloc_bitarray_function_p = &malloc_bitarray_NOSIMD;
334
335 // call the most optimized function for this CPU
336 return (*malloc_bitarray_function_p)(x);
337 }
338
339 void free_bitarray_dispatch(uint32_t *x) {
340 #if defined (__i386__) || defined (__x86_64__)
341 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
342 if (__builtin_cpu_supports("avx512f")) free_bitarray_function_p = &free_bitarray_AVX512;
343 else if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
344 #else
345 if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
346 #endif
347 else if (__builtin_cpu_supports("avx")) free_bitarray_function_p = &free_bitarray_AVX;
348 else if (__builtin_cpu_supports("sse2")) free_bitarray_function_p = &free_bitarray_SSE2;
349 else if (__builtin_cpu_supports("mmx")) free_bitarray_function_p = &free_bitarray_MMX;
350 else
351 #endif
352 free_bitarray_function_p = &free_bitarray_NOSIMD;
353
354 // call the most optimized function for this CPU
355 (*free_bitarray_function_p)(x);
356 }
357
358 uint32_t bitcount_dispatch(uint32_t a) {
359 #if defined (__i386__) || defined (__x86_64__)
360 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
361 if (__builtin_cpu_supports("avx512f")) bitcount_function_p = &bitcount_AVX512;
362 else if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
363 #else
364 if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
365 #endif
366 else if (__builtin_cpu_supports("avx")) bitcount_function_p = &bitcount_AVX;
367 else if (__builtin_cpu_supports("sse2")) bitcount_function_p = &bitcount_SSE2;
368 else if (__builtin_cpu_supports("mmx")) bitcount_function_p = &bitcount_MMX;
369 else
370 #endif
371 bitcount_function_p = &bitcount_NOSIMD;
372
373 // call the most optimized function for this CPU
374 return (*bitcount_function_p)(a);
375 }
376
377 uint32_t count_states_dispatch(uint32_t *bitarray) {
378 #if defined (__i386__) || defined (__x86_64__)
379 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
380 if (__builtin_cpu_supports("avx512f")) count_states_function_p = &count_states_AVX512;
381 else if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
382 #else
383 if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
384 #endif
385 else if (__builtin_cpu_supports("avx")) count_states_function_p = &count_states_AVX;
386 else if (__builtin_cpu_supports("sse2")) count_states_function_p = &count_states_SSE2;
387 else if (__builtin_cpu_supports("mmx")) count_states_function_p = &count_states_MMX;
388 else
389 #endif
390 count_states_function_p = &count_states_NOSIMD;
391
392 // call the most optimized function for this CPU
393 return (*count_states_function_p)(bitarray);
394 }
395
396 void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
397 #if defined (__i386__) || defined (__x86_64__)
398 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
399 if (__builtin_cpu_supports("avx512f")) bitarray_AND_function_p = &bitarray_AND_AVX512;
400 else if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
401 #else
402 if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
403 #endif
404 else if (__builtin_cpu_supports("avx")) bitarray_AND_function_p = &bitarray_AND_AVX;
405 else if (__builtin_cpu_supports("sse2")) bitarray_AND_function_p = &bitarray_AND_SSE2;
406 else if (__builtin_cpu_supports("mmx")) bitarray_AND_function_p = &bitarray_AND_MMX;
407 else
408 #endif
409 bitarray_AND_function_p = &bitarray_AND_NOSIMD;
410
411 // call the most optimized function for this CPU
412 (*bitarray_AND_function_p)(A,B);
413 }
414
415 void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
416 #if defined (__i386__) || defined (__x86_64__)
417 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
418 if (__builtin_cpu_supports("avx512f")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX512;
419 else if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
420 #else
421 if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
422 #endif
423 else if (__builtin_cpu_supports("avx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX;
424 else if (__builtin_cpu_supports("sse2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_SSE2;
425 else if (__builtin_cpu_supports("mmx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_MMX;
426 else
427 #endif
428 bitarray_low20_AND_function_p = &bitarray_low20_AND_NOSIMD;
429
430 // call the most optimized function for this CPU
431 (*bitarray_low20_AND_function_p)(A, B);
432 }
433
434 uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
435 #if defined (__i386__) || defined (__x86_64__)
436 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
437 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX512;
438 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
439 #else
440 if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
441 #endif
442 else if (__builtin_cpu_supports("avx")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX;
443 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND_function_p = &count_bitarray_AND_SSE2;
444 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND_function_p = &count_bitarray_AND_MMX;
445 else
446 #endif
447 count_bitarray_AND_function_p = &count_bitarray_AND_NOSIMD;
448
449 // call the most optimized function for this CPU
450 return (*count_bitarray_AND_function_p)(A, B);
451 }
452
453 uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
454 #if defined (__i386__) || defined (__x86_64__)
455 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
456 if (__builtin_cpu_supports("avx512f")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512;
457 else if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
458 #else
459 if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
460 #endif
461 else if (__builtin_cpu_supports("avx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX;
462 else if (__builtin_cpu_supports("sse2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_SSE2;
463 else if (__builtin_cpu_supports("mmx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_MMX;
464 else
465 #endif
466 count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_NOSIMD;
467
468 // call the most optimized function for this CPU
469 return (*count_bitarray_low20_AND_function_p)(A, B);
470 }
471
472 void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
473 #if defined (__i386__) || defined (__x86_64__)
474 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
475 if (__builtin_cpu_supports("avx512f")) bitarray_AND4_function_p = &bitarray_AND4_AVX512;
476 else if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
477 #else
478 if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
479 #endif
480 else if (__builtin_cpu_supports("avx")) bitarray_AND4_function_p = &bitarray_AND4_AVX;
481 else if (__builtin_cpu_supports("sse2")) bitarray_AND4_function_p = &bitarray_AND4_SSE2;
482 else if (__builtin_cpu_supports("mmx")) bitarray_AND4_function_p = &bitarray_AND4_MMX;
483 else
484 #endif
485 bitarray_AND4_function_p = &bitarray_AND4_NOSIMD;
486
487 // call the most optimized function for this CPU
488 (*bitarray_AND4_function_p)(A, B, C, D);
489 }
490
491 void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) {
492 #if defined (__i386__) || defined (__x86_64__)
493 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
494 if (__builtin_cpu_supports("avx512f")) bitarray_OR_function_p = &bitarray_OR_AVX512;
495 else if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
496 #else
497 if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
498 #endif
499 else if (__builtin_cpu_supports("avx")) bitarray_OR_function_p = &bitarray_OR_AVX;
500 else if (__builtin_cpu_supports("sse2")) bitarray_OR_function_p = &bitarray_OR_SSE2;
501 else if (__builtin_cpu_supports("mmx")) bitarray_OR_function_p = &bitarray_OR_MMX;
502 else
503 #endif
504 bitarray_OR_function_p = &bitarray_OR_NOSIMD;
505
506 // call the most optimized function for this CPU
507 (*bitarray_OR_function_p)(A,B);
508 }
509
510 uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) {
511 #if defined (__i386__) || defined (__x86_64__)
512 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
513 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512;
514 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
515 #else
516 if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
517 #endif
518 else if (__builtin_cpu_supports("avx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX;
519 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_SSE2;
520 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_MMX;
521 else
522 #endif
523 count_bitarray_AND2_function_p = &count_bitarray_AND2_NOSIMD;
524
525 // call the most optimized function for this CPU
526 return (*count_bitarray_AND2_function_p)(A, B);
527 }
528
529 uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) {
530 #if defined (__i386__) || defined (__x86_64__)
531 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
532 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512;
533 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
534 #else
535 if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
536 #endif
537 else if (__builtin_cpu_supports("avx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX;
538 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_SSE2;
539 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_MMX;
540 else
541 #endif
542 count_bitarray_AND3_function_p = &count_bitarray_AND3_NOSIMD;
543
544 // call the most optimized function for this CPU
545 return (*count_bitarray_AND3_function_p)(A, B, C);
546 }
547
548 uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
549 #if defined (__i386__) || defined (__x86_64__)
550 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
551 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512;
552 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
553 #else
554 if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
555 #endif
556 else if (__builtin_cpu_supports("avx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX;
557 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_SSE2;
558 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_MMX;
559 else
560 #endif
561 count_bitarray_AND4_function_p = &count_bitarray_AND4_NOSIMD;
562
563 // call the most optimized function for this CPU
564 return (*count_bitarray_AND4_function_p)(A, B, C, D);
565 }
566
567
568 ///////////////////////////////////////////////77
569 // Entries to dispatched function calls
570
571 uint32_t *malloc_bitarray(uint32_t x) {
572 return (*malloc_bitarray_function_p)(x);
573 }
574
575 void free_bitarray(uint32_t *x) {
576 (*free_bitarray_function_p)(x);
577 }
578
579 uint32_t bitcount(uint32_t a) {
580 return (*bitcount_function_p)(a);
581 }
582
583 uint32_t count_states(uint32_t *bitarray) {
584 return (*count_states_function_p)(bitarray);
585 }
586
587 void bitarray_AND(uint32_t *A, uint32_t *B) {
588 (*bitarray_AND_function_p)(A, B);
589 }
590
591 void bitarray_low20_AND(uint32_t *A, uint32_t *B) {
592 (*bitarray_low20_AND_function_p)(A, B);
593 }
594
595 uint32_t count_bitarray_AND(uint32_t *A, uint32_t *B) {
596 return (*count_bitarray_AND_function_p)(A, B);
597 }
598
599 uint32_t count_bitarray_low20_AND(uint32_t *A, uint32_t *B) {
600 return (*count_bitarray_low20_AND_function_p)(A, B);
601 }
602
603 void bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
604 (*bitarray_AND4_function_p)(A, B, C, D);
605 }
606
607 void bitarray_OR(uint32_t *A, uint32_t *B) {
608 (*bitarray_OR_function_p)(A, B);
609 }
610
611 uint32_t count_bitarray_AND2(uint32_t *A, uint32_t *B) {
612 return (*count_bitarray_AND2_function_p)(A, B);
613 }
614
615 uint32_t count_bitarray_AND3(uint32_t *A, uint32_t *B, uint32_t *C) {
616 return (*count_bitarray_AND3_function_p)(A, B, C);
617 }
618
619 uint32_t count_bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
620 return (*count_bitarray_AND4_function_p)(A, B, C, D);
621 }
622
623 #endif
624
Impressum, Datenschutz