]> git.zerfleddert.de Git - proxmark3-svn/blame - client/hardnested/hardnested_bitarray_core.c
Merge pull request #311 from marshmellow42/master
[proxmark3-svn] / client / hardnested / hardnested_bitarray_core.c
CommitLineData
c48c4d78 1//-----------------------------------------------------------------------------
2// Copyright (C) 2016, 2017 by piwi
3//
4// This code is licensed to you under the terms of the GNU GPL, version 2 or,
5// at your option, any later version. See the LICENSE.txt file for the text of
6// the license.ch b
7//-----------------------------------------------------------------------------
8// Implements a card only attack based on crypto text (encrypted nonces
9// received during a nested authentication) only. Unlike other card only
10// attacks this doesn't rely on implementation errors but only on the
11// inherent weaknesses of the crypto1 cypher. Described in
12// Carlo Meijer, Roel Verdult, "Ciphertext-only Cryptanalysis on Hardened
13// Mifare Classic Cards" in Proceedings of the 22nd ACM SIGSAC Conference on
14// Computer and Communications Security, 2015
15//-----------------------------------------------------------------------------
16// some helper functions which can benefit from SIMD instructions or other special instructions
17//
18
19#include "hardnested_bitarray_core.h"
20
21#include <stdint.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <malloc.h>
25
c48c4d78 26// this needs to be compiled several times for each instruction set.
27// For each instruction set, define a dedicated function name:
28#if defined (__AVX512F__)
29#define MALLOC_BITARRAY malloc_bitarray_AVX512
30#define FREE_BITARRAY free_bitarray_AVX512
31#define BITCOUNT bitcount_AVX512
32#define COUNT_STATES count_states_AVX512
33#define BITARRAY_AND bitarray_AND_AVX512
34#define BITARRAY_LOW20_AND bitarray_low20_AND_AVX512
35#define COUNT_BITARRAY_AND count_bitarray_AND_AVX512
36#define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX512
37#define BITARRAY_AND4 bitarray_AND4_AVX512
38#define BITARRAY_OR bitarray_OR_AVX512
39#define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX512
40#define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX512
41#define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX512
42#elif defined (__AVX2__)
43#define MALLOC_BITARRAY malloc_bitarray_AVX2
44#define FREE_BITARRAY free_bitarray_AVX2
45#define BITCOUNT bitcount_AVX2
46#define COUNT_STATES count_states_AVX2
47#define BITARRAY_AND bitarray_AND_AVX2
48#define BITARRAY_LOW20_AND bitarray_low20_AND_AVX2
49#define COUNT_BITARRAY_AND count_bitarray_AND_AVX2
50#define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX2
51#define BITARRAY_AND4 bitarray_AND4_AVX2
52#define BITARRAY_OR bitarray_OR_AVX2
53#define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX2
54#define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX2
55#define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX2
56#elif defined (__AVX__)
57#define MALLOC_BITARRAY malloc_bitarray_AVX
58#define FREE_BITARRAY free_bitarray_AVX
59#define BITCOUNT bitcount_AVX
60#define COUNT_STATES count_states_AVX
61#define BITARRAY_AND bitarray_AND_AVX
62#define BITARRAY_LOW20_AND bitarray_low20_AND_AVX
63#define COUNT_BITARRAY_AND count_bitarray_AND_AVX
64#define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_AVX
65#define BITARRAY_AND4 bitarray_AND4_AVX
66#define BITARRAY_OR bitarray_OR_AVX
67#define COUNT_BITARRAY_AND2 count_bitarray_AND2_AVX
68#define COUNT_BITARRAY_AND3 count_bitarray_AND3_AVX
69#define COUNT_BITARRAY_AND4 count_bitarray_AND4_AVX
70#elif defined (__SSE2__)
71#define MALLOC_BITARRAY malloc_bitarray_SSE2
72#define FREE_BITARRAY free_bitarray_SSE2
73#define BITCOUNT bitcount_SSE2
74#define COUNT_STATES count_states_SSE2
75#define BITARRAY_AND bitarray_AND_SSE2
76#define BITARRAY_LOW20_AND bitarray_low20_AND_SSE2
77#define COUNT_BITARRAY_AND count_bitarray_AND_SSE2
78#define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_SSE2
79#define BITARRAY_AND4 bitarray_AND4_SSE2
80#define BITARRAY_OR bitarray_OR_SSE2
81#define COUNT_BITARRAY_AND2 count_bitarray_AND2_SSE2
82#define COUNT_BITARRAY_AND3 count_bitarray_AND3_SSE2
83#define COUNT_BITARRAY_AND4 count_bitarray_AND4_SSE2
84#elif defined (__MMX__)
85#define MALLOC_BITARRAY malloc_bitarray_MMX
86#define FREE_BITARRAY free_bitarray_MMX
87#define BITCOUNT bitcount_MMX
88#define COUNT_STATES count_states_MMX
89#define BITARRAY_AND bitarray_AND_MMX
90#define BITARRAY_LOW20_AND bitarray_low20_AND_MMX
91#define COUNT_BITARRAY_AND count_bitarray_AND_MMX
92#define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_MMX
93#define BITARRAY_AND4 bitarray_AND4_MMX
94#define BITARRAY_OR bitarray_OR_MMX
95#define COUNT_BITARRAY_AND2 count_bitarray_AND2_MMX
96#define COUNT_BITARRAY_AND3 count_bitarray_AND3_MMX
97#define COUNT_BITARRAY_AND4 count_bitarray_AND4_MMX
af7a1f70 98#else
99#define MALLOC_BITARRAY malloc_bitarray_NOSIMD
100#define FREE_BITARRAY free_bitarray_NOSIMD
101#define BITCOUNT bitcount_NOSIMD
102#define COUNT_STATES count_states_NOSIMD
103#define BITARRAY_AND bitarray_AND_NOSIMD
104#define BITARRAY_LOW20_AND bitarray_low20_AND_NOSIMD
105#define COUNT_BITARRAY_AND count_bitarray_AND_NOSIMD
106#define COUNT_BITARRAY_LOW20_AND count_bitarray_low20_AND_NOSIMD
107#define BITARRAY_AND4 bitarray_AND4_NOSIMD
108#define BITARRAY_OR bitarray_OR_NOSIMD
109#define COUNT_BITARRAY_AND2 count_bitarray_AND2_NOSIMD
110#define COUNT_BITARRAY_AND3 count_bitarray_AND3_NOSIMD
111#define COUNT_BITARRAY_AND4 count_bitarray_AND4_NOSIMD
c48c4d78 112#endif
113
114
115// typedefs and declaration of functions:
116typedef uint32_t* malloc_bitarray_t(uint32_t);
af7a1f70 117malloc_bitarray_t malloc_bitarray_AVX512, malloc_bitarray_AVX2, malloc_bitarray_AVX, malloc_bitarray_SSE2, malloc_bitarray_MMX, malloc_bitarray_NOSIMD, malloc_bitarray_dispatch;
c48c4d78 118typedef void free_bitarray_t(uint32_t*);
af7a1f70 119free_bitarray_t free_bitarray_AVX512, free_bitarray_AVX2, free_bitarray_AVX, free_bitarray_SSE2, free_bitarray_MMX, free_bitarray_NOSIMD, free_bitarray_dispatch;
c48c4d78 120typedef uint32_t bitcount_t(uint32_t);
af7a1f70 121bitcount_t bitcount_AVX512, bitcount_AVX2, bitcount_AVX, bitcount_SSE2, bitcount_MMX, bitcount_NOSIMD, bitcount_dispatch;
c48c4d78 122typedef uint32_t count_states_t(uint32_t*);
af7a1f70 123count_states_t count_states_AVX512, count_states_AVX2, count_states_AVX, count_states_SSE2, count_states_MMX, count_states_NOSIMD, count_states_dispatch;
c48c4d78 124typedef void bitarray_AND_t(uint32_t[], uint32_t[]);
af7a1f70 125bitarray_AND_t bitarray_AND_AVX512, bitarray_AND_AVX2, bitarray_AND_AVX, bitarray_AND_SSE2, bitarray_AND_MMX, bitarray_AND_NOSIMD, bitarray_AND_dispatch;
c48c4d78 126typedef void bitarray_low20_AND_t(uint32_t*, uint32_t*);
af7a1f70 127bitarray_low20_AND_t bitarray_low20_AND_AVX512, bitarray_low20_AND_AVX2, bitarray_low20_AND_AVX, bitarray_low20_AND_SSE2, bitarray_low20_AND_MMX, bitarray_low20_AND_NOSIMD, bitarray_low20_AND_dispatch;
c48c4d78 128typedef uint32_t count_bitarray_AND_t(uint32_t*, uint32_t*);
af7a1f70 129count_bitarray_AND_t count_bitarray_AND_AVX512, count_bitarray_AND_AVX2, count_bitarray_AND_AVX, count_bitarray_AND_SSE2, count_bitarray_AND_MMX, count_bitarray_AND_NOSIMD, count_bitarray_AND_dispatch;
c48c4d78 130typedef uint32_t count_bitarray_low20_AND_t(uint32_t*, uint32_t*);
af7a1f70 131count_bitarray_low20_AND_t count_bitarray_low20_AND_AVX512, count_bitarray_low20_AND_AVX2, count_bitarray_low20_AND_AVX, count_bitarray_low20_AND_SSE2, count_bitarray_low20_AND_MMX, count_bitarray_low20_AND_NOSIMD, count_bitarray_low20_AND_dispatch;
c48c4d78 132typedef void bitarray_AND4_t(uint32_t*, uint32_t*, uint32_t*, uint32_t*);
af7a1f70 133bitarray_AND4_t bitarray_AND4_AVX512, bitarray_AND4_AVX2, bitarray_AND4_AVX, bitarray_AND4_SSE2, bitarray_AND4_MMX, bitarray_AND4_NOSIMD, bitarray_AND4_dispatch;
c48c4d78 134typedef void bitarray_OR_t(uint32_t[], uint32_t[]);
af7a1f70 135bitarray_OR_t bitarray_OR_AVX512, bitarray_OR_AVX2, bitarray_OR_AVX, bitarray_OR_SSE2, bitarray_OR_MMX, bitarray_OR_NOSIMD, bitarray_OR_dispatch;
c48c4d78 136typedef uint32_t count_bitarray_AND2_t(uint32_t*, uint32_t*);
af7a1f70 137count_bitarray_AND2_t count_bitarray_AND2_AVX512, count_bitarray_AND2_AVX2, count_bitarray_AND2_AVX, count_bitarray_AND2_SSE2, count_bitarray_AND2_MMX, count_bitarray_AND2_NOSIMD, count_bitarray_AND2_dispatch;
c48c4d78 138typedef uint32_t count_bitarray_AND3_t(uint32_t*, uint32_t*, uint32_t*);
af7a1f70 139count_bitarray_AND3_t count_bitarray_AND3_AVX512, count_bitarray_AND3_AVX2, count_bitarray_AND3_AVX, count_bitarray_AND3_SSE2, count_bitarray_AND3_MMX, count_bitarray_AND3_NOSIMD, count_bitarray_AND3_dispatch;
c48c4d78 140typedef uint32_t count_bitarray_AND4_t(uint32_t*, uint32_t*, uint32_t*, uint32_t*);
af7a1f70 141count_bitarray_AND4_t count_bitarray_AND4_AVX512, count_bitarray_AND4_AVX2, count_bitarray_AND4_AVX, count_bitarray_AND4_SSE2, count_bitarray_AND4_MMX, count_bitarray_AND4_NOSIMD, count_bitarray_AND4_dispatch;
c48c4d78 142
143
144inline uint32_t *MALLOC_BITARRAY(uint32_t x)
145{
146#ifdef _WIN32
147 return __builtin_assume_aligned(_aligned_malloc((x), __BIGGEST_ALIGNMENT__), __BIGGEST_ALIGNMENT__);
148#else
149 return __builtin_assume_aligned(memalign(__BIGGEST_ALIGNMENT__, (x)), __BIGGEST_ALIGNMENT__);
150#endif
151}
152
153
154inline void FREE_BITARRAY(uint32_t *x)
155{
156#ifdef _WIN32
157 _aligned_free(x);
158#else
159 free(x);
160#endif
161}
162
163
164inline uint32_t BITCOUNT(uint32_t a)
165{
166 return __builtin_popcountl(a);
167}
168
169
170inline uint32_t COUNT_STATES(uint32_t *A)
171{
172 uint32_t count = 0;
173 for (uint32_t i = 0; i < (1<<19); i++) {
174 count += BITCOUNT(A[i]);
175 }
176 return count;
177}
178
179
180inline void BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
181{
182 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
183 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
184 for (uint32_t i = 0; i < (1<<19); i++) {
185 A[i] &= B[i];
186 }
187}
188
189
190inline void BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
191{
192 uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
193 uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
194
195 for (uint32_t i = 0; i < (1<<20); i++) {
196 if (!b[i]) {
197 a[i] = 0;
198 }
199 }
200}
201
202
203inline uint32_t COUNT_BITARRAY_AND(uint32_t *restrict A, uint32_t *restrict B)
204{
205 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
206 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
207 uint32_t count = 0;
208 for (uint32_t i = 0; i < (1<<19); i++) {
209 A[i] &= B[i];
210 count += BITCOUNT(A[i]);
211 }
212 return count;
213}
214
215
216inline uint32_t COUNT_BITARRAY_LOW20_AND(uint32_t *restrict A, uint32_t *restrict B)
217{
218 uint16_t *a = (uint16_t *)__builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
219 uint16_t *b = (uint16_t *)__builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
220 uint32_t count = 0;
221
222 for (uint32_t i = 0; i < (1<<20); i++) {
223 if (!b[i]) {
224 a[i] = 0;
225 }
226 count += BITCOUNT(a[i]);
227 }
228 return count;
229}
230
231
232inline void BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C, uint32_t *restrict D)
233{
234 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
235 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
236 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
237 D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
238 for (uint32_t i = 0; i < (1<<19); i++) {
239 A[i] = B[i] & C[i] & D[i];
240 }
241}
242
243
244inline void BITARRAY_OR(uint32_t *restrict A, uint32_t *restrict B)
245{
246 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
247 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
248 for (uint32_t i = 0; i < (1<<19); i++) {
249 A[i] |= B[i];
250 }
251}
252
253
254inline uint32_t COUNT_BITARRAY_AND2(uint32_t *restrict A, uint32_t *restrict B)
255{
256 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
257 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
258 uint32_t count = 0;
259 for (uint32_t i = 0; i < (1<<19); i++) {
260 count += BITCOUNT(A[i] & B[i]);
261 }
262 return count;
263}
264
265
266inline uint32_t COUNT_BITARRAY_AND3(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C)
267{
268 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
269 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
270 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
271 uint32_t count = 0;
272 for (uint32_t i = 0; i < (1<<19); i++) {
273 count += BITCOUNT(A[i] & B[i] & C[i]);
274 }
275 return count;
276}
277
278
279inline uint32_t COUNT_BITARRAY_AND4(uint32_t *restrict A, uint32_t *restrict B, uint32_t *restrict C, uint32_t *restrict D)
280{
281 A = __builtin_assume_aligned(A, __BIGGEST_ALIGNMENT__);
282 B = __builtin_assume_aligned(B, __BIGGEST_ALIGNMENT__);
283 C = __builtin_assume_aligned(C, __BIGGEST_ALIGNMENT__);
284 D = __builtin_assume_aligned(D, __BIGGEST_ALIGNMENT__);
285 uint32_t count = 0;
286 for (uint32_t i = 0; i < (1<<19); i++) {
287 count += BITCOUNT(A[i] & B[i] & C[i] & D[i]);
288 }
289 return count;
290}
291
af7a1f70 292
c48c4d78 293#ifndef __MMX__
294
295// pointers to functions:
296malloc_bitarray_t *malloc_bitarray_function_p = &malloc_bitarray_dispatch;
297free_bitarray_t *free_bitarray_function_p = &free_bitarray_dispatch;
298bitcount_t *bitcount_function_p = &bitcount_dispatch;
299count_states_t *count_states_function_p = &count_states_dispatch;
300bitarray_AND_t *bitarray_AND_function_p = &bitarray_AND_dispatch;
301bitarray_low20_AND_t *bitarray_low20_AND_function_p = &bitarray_low20_AND_dispatch;
302count_bitarray_AND_t *count_bitarray_AND_function_p = &count_bitarray_AND_dispatch;
303count_bitarray_low20_AND_t *count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_dispatch;
304bitarray_AND4_t *bitarray_AND4_function_p = &bitarray_AND4_dispatch;
305bitarray_OR_t *bitarray_OR_function_p = &bitarray_OR_dispatch;
306count_bitarray_AND2_t *count_bitarray_AND2_function_p = &count_bitarray_AND2_dispatch;
307count_bitarray_AND3_t *count_bitarray_AND3_function_p = &count_bitarray_AND3_dispatch;
308count_bitarray_AND4_t *count_bitarray_AND4_function_p = &count_bitarray_AND4_dispatch;
309
310// determine the available instruction set at runtime and call the correct function
311uint32_t *malloc_bitarray_dispatch(uint32_t x) {
af7a1f70 312#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 313 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 314 if (__builtin_cpu_supports("avx512f")) malloc_bitarray_function_p = &malloc_bitarray_AVX512;
315 else if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
f950ce1c 316 #else
317 if (__builtin_cpu_supports("avx2")) malloc_bitarray_function_p = &malloc_bitarray_AVX2;
318 #endif
c48c4d78 319 else if (__builtin_cpu_supports("avx")) malloc_bitarray_function_p = &malloc_bitarray_AVX;
320 else if (__builtin_cpu_supports("sse2")) malloc_bitarray_function_p = &malloc_bitarray_SSE2;
321 else if (__builtin_cpu_supports("mmx")) malloc_bitarray_function_p = &malloc_bitarray_MMX;
af7a1f70 322 else
323#endif
324 malloc_bitarray_function_p = &malloc_bitarray_NOSIMD;
325
c48c4d78 326 // call the most optimized function for this CPU
327 return (*malloc_bitarray_function_p)(x);
328}
329
330void free_bitarray_dispatch(uint32_t *x) {
af7a1f70 331#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 332 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 333 if (__builtin_cpu_supports("avx512f")) free_bitarray_function_p = &free_bitarray_AVX512;
334 else if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
f950ce1c 335 #else
336 if (__builtin_cpu_supports("avx2")) free_bitarray_function_p = &free_bitarray_AVX2;
337 #endif
c48c4d78 338 else if (__builtin_cpu_supports("avx")) free_bitarray_function_p = &free_bitarray_AVX;
339 else if (__builtin_cpu_supports("sse2")) free_bitarray_function_p = &free_bitarray_SSE2;
340 else if (__builtin_cpu_supports("mmx")) free_bitarray_function_p = &free_bitarray_MMX;
af7a1f70 341 else
342#endif
343 free_bitarray_function_p = &free_bitarray_NOSIMD;
344
c48c4d78 345 // call the most optimized function for this CPU
346 (*free_bitarray_function_p)(x);
347}
348
349uint32_t bitcount_dispatch(uint32_t a) {
af7a1f70 350#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 351 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 352 if (__builtin_cpu_supports("avx512f")) bitcount_function_p = &bitcount_AVX512;
353 else if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
f950ce1c 354 #else
355 if (__builtin_cpu_supports("avx2")) bitcount_function_p = &bitcount_AVX2;
356 #endif
c48c4d78 357 else if (__builtin_cpu_supports("avx")) bitcount_function_p = &bitcount_AVX;
358 else if (__builtin_cpu_supports("sse2")) bitcount_function_p = &bitcount_SSE2;
359 else if (__builtin_cpu_supports("mmx")) bitcount_function_p = &bitcount_MMX;
af7a1f70 360 else
361#endif
362 bitcount_function_p = &bitcount_NOSIMD;
363
c48c4d78 364 // call the most optimized function for this CPU
365 return (*bitcount_function_p)(a);
366}
367
368uint32_t count_states_dispatch(uint32_t *bitarray) {
af7a1f70 369#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 370 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 371 if (__builtin_cpu_supports("avx512f")) count_states_function_p = &count_states_AVX512;
372 else if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
f950ce1c 373 #else
374 if (__builtin_cpu_supports("avx2")) count_states_function_p = &count_states_AVX2;
375 #endif
c48c4d78 376 else if (__builtin_cpu_supports("avx")) count_states_function_p = &count_states_AVX;
377 else if (__builtin_cpu_supports("sse2")) count_states_function_p = &count_states_SSE2;
378 else if (__builtin_cpu_supports("mmx")) count_states_function_p = &count_states_MMX;
af7a1f70 379 else
380#endif
381 count_states_function_p = &count_states_NOSIMD;
382
c48c4d78 383 // call the most optimized function for this CPU
384 return (*count_states_function_p)(bitarray);
385}
386
387void bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
af7a1f70 388#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 389 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 390 if (__builtin_cpu_supports("avx512f")) bitarray_AND_function_p = &bitarray_AND_AVX512;
391 else if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
f950ce1c 392 #else
393 if (__builtin_cpu_supports("avx2")) bitarray_AND_function_p = &bitarray_AND_AVX2;
394 #endif
c48c4d78 395 else if (__builtin_cpu_supports("avx")) bitarray_AND_function_p = &bitarray_AND_AVX;
396 else if (__builtin_cpu_supports("sse2")) bitarray_AND_function_p = &bitarray_AND_SSE2;
397 else if (__builtin_cpu_supports("mmx")) bitarray_AND_function_p = &bitarray_AND_MMX;
af7a1f70 398 else
399#endif
400 bitarray_AND_function_p = &bitarray_AND_NOSIMD;
401
c48c4d78 402 // call the most optimized function for this CPU
403 (*bitarray_AND_function_p)(A,B);
404}
405
406void bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
af7a1f70 407#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 408 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 409 if (__builtin_cpu_supports("avx512f")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX512;
410 else if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
f950ce1c 411 #else
412 if (__builtin_cpu_supports("avx2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX2;
413 #endif
c48c4d78 414 else if (__builtin_cpu_supports("avx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_AVX;
415 else if (__builtin_cpu_supports("sse2")) bitarray_low20_AND_function_p = &bitarray_low20_AND_SSE2;
416 else if (__builtin_cpu_supports("mmx")) bitarray_low20_AND_function_p = &bitarray_low20_AND_MMX;
af7a1f70 417 else
418#endif
419 bitarray_low20_AND_function_p = &bitarray_low20_AND_NOSIMD;
420
c48c4d78 421 // call the most optimized function for this CPU
422 (*bitarray_low20_AND_function_p)(A, B);
423}
424
425uint32_t count_bitarray_AND_dispatch(uint32_t *A, uint32_t *B) {
af7a1f70 426#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 427 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 428 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX512;
429 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
f950ce1c 430 #else
431 if (__builtin_cpu_supports("avx2")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX2;
432 #endif
c48c4d78 433 else if (__builtin_cpu_supports("avx")) count_bitarray_AND_function_p = &count_bitarray_AND_AVX;
434 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND_function_p = &count_bitarray_AND_SSE2;
435 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND_function_p = &count_bitarray_AND_MMX;
af7a1f70 436 else
437#endif
438 count_bitarray_AND_function_p = &count_bitarray_AND_NOSIMD;
439
c48c4d78 440 // call the most optimized function for this CPU
441 return (*count_bitarray_AND_function_p)(A, B);
442}
443
444uint32_t count_bitarray_low20_AND_dispatch(uint32_t *A, uint32_t *B) {
af7a1f70 445#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 446 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 447 if (__builtin_cpu_supports("avx512f")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX512;
448 else if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
f950ce1c 449 #else
450 if (__builtin_cpu_supports("avx2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX2;
451 #endif
c48c4d78 452 else if (__builtin_cpu_supports("avx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_AVX;
453 else if (__builtin_cpu_supports("sse2")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_SSE2;
454 else if (__builtin_cpu_supports("mmx")) count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_MMX;
af7a1f70 455 else
456#endif
457 count_bitarray_low20_AND_function_p = &count_bitarray_low20_AND_NOSIMD;
458
c48c4d78 459 // call the most optimized function for this CPU
460 return (*count_bitarray_low20_AND_function_p)(A, B);
461}
462
463void bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
af7a1f70 464#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 465 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 466 if (__builtin_cpu_supports("avx512f")) bitarray_AND4_function_p = &bitarray_AND4_AVX512;
467 else if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
f950ce1c 468 #else
469 if (__builtin_cpu_supports("avx2")) bitarray_AND4_function_p = &bitarray_AND4_AVX2;
470 #endif
c48c4d78 471 else if (__builtin_cpu_supports("avx")) bitarray_AND4_function_p = &bitarray_AND4_AVX;
472 else if (__builtin_cpu_supports("sse2")) bitarray_AND4_function_p = &bitarray_AND4_SSE2;
473 else if (__builtin_cpu_supports("mmx")) bitarray_AND4_function_p = &bitarray_AND4_MMX;
af7a1f70 474 else
475#endif
476 bitarray_AND4_function_p = &bitarray_AND4_NOSIMD;
477
c48c4d78 478 // call the most optimized function for this CPU
479 (*bitarray_AND4_function_p)(A, B, C, D);
480}
481
482void bitarray_OR_dispatch(uint32_t *A, uint32_t *B) {
af7a1f70 483#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 484 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 485 if (__builtin_cpu_supports("avx512f")) bitarray_OR_function_p = &bitarray_OR_AVX512;
486 else if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
f950ce1c 487 #else
488 if (__builtin_cpu_supports("avx2")) bitarray_OR_function_p = &bitarray_OR_AVX2;
489 #endif
c48c4d78 490 else if (__builtin_cpu_supports("avx")) bitarray_OR_function_p = &bitarray_OR_AVX;
491 else if (__builtin_cpu_supports("sse2")) bitarray_OR_function_p = &bitarray_OR_SSE2;
492 else if (__builtin_cpu_supports("mmx")) bitarray_OR_function_p = &bitarray_OR_MMX;
af7a1f70 493 else
494#endif
495 bitarray_OR_function_p = &bitarray_OR_NOSIMD;
496
c48c4d78 497 // call the most optimized function for this CPU
498 (*bitarray_OR_function_p)(A,B);
499}
500
501uint32_t count_bitarray_AND2_dispatch(uint32_t *A, uint32_t *B) {
af7a1f70 502#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 503 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 504 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX512;
505 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
f950ce1c 506 #else
507 if (__builtin_cpu_supports("avx2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX2;
508 #endif
c48c4d78 509 else if (__builtin_cpu_supports("avx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_AVX;
510 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND2_function_p = &count_bitarray_AND2_SSE2;
511 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND2_function_p = &count_bitarray_AND2_MMX;
af7a1f70 512 else
513#endif
514 count_bitarray_AND2_function_p = &count_bitarray_AND2_NOSIMD;
515
c48c4d78 516 // call the most optimized function for this CPU
517 return (*count_bitarray_AND2_function_p)(A, B);
518}
519
520uint32_t count_bitarray_AND3_dispatch(uint32_t *A, uint32_t *B, uint32_t *C) {
af7a1f70 521#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 522 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 523 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX512;
524 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
f950ce1c 525 #else
526 if (__builtin_cpu_supports("avx2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX2;
527 #endif
c48c4d78 528 else if (__builtin_cpu_supports("avx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_AVX;
529 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND3_function_p = &count_bitarray_AND3_SSE2;
530 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND3_function_p = &count_bitarray_AND3_MMX;
af7a1f70 531 else
532#endif
533 count_bitarray_AND3_function_p = &count_bitarray_AND3_NOSIMD;
534
c48c4d78 535 // call the most optimized function for this CPU
536 return (*count_bitarray_AND3_function_p)(A, B, C);
537}
538
539uint32_t count_bitarray_AND4_dispatch(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
af7a1f70 540#if defined (__i386__) || defined (__x86_64__)
e5baf1ef 541 #if (__GNUC__ >= 5) && (__GNUC__ > 5 || __GNUC_MINOR__ > 2)
c48c4d78 542 if (__builtin_cpu_supports("avx512f")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX512;
543 else if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
f950ce1c 544 #else
545 if (__builtin_cpu_supports("avx2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX2;
546 #endif
c48c4d78 547 else if (__builtin_cpu_supports("avx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_AVX;
548 else if (__builtin_cpu_supports("sse2")) count_bitarray_AND4_function_p = &count_bitarray_AND4_SSE2;
549 else if (__builtin_cpu_supports("mmx")) count_bitarray_AND4_function_p = &count_bitarray_AND4_MMX;
af7a1f70 550 else
551#endif
552 count_bitarray_AND4_function_p = &count_bitarray_AND4_NOSIMD;
553
c48c4d78 554 // call the most optimized function for this CPU
555 return (*count_bitarray_AND4_function_p)(A, B, C, D);
556}
557
558
559///////////////////////////////////////////////77
560// Entries to dispatched function calls
561
562uint32_t *malloc_bitarray(uint32_t x) {
563 return (*malloc_bitarray_function_p)(x);
564}
565
566void free_bitarray(uint32_t *x) {
567 (*free_bitarray_function_p)(x);
568}
569
570uint32_t bitcount(uint32_t a) {
571 return (*bitcount_function_p)(a);
572}
573
574uint32_t count_states(uint32_t *bitarray) {
575 return (*count_states_function_p)(bitarray);
576}
577
578void bitarray_AND(uint32_t *A, uint32_t *B) {
579 (*bitarray_AND_function_p)(A, B);
580}
581
582void bitarray_low20_AND(uint32_t *A, uint32_t *B) {
583 (*bitarray_low20_AND_function_p)(A, B);
584}
585
586uint32_t count_bitarray_AND(uint32_t *A, uint32_t *B) {
587 return (*count_bitarray_AND_function_p)(A, B);
588}
589
590uint32_t count_bitarray_low20_AND(uint32_t *A, uint32_t *B) {
591 return (*count_bitarray_low20_AND_function_p)(A, B);
592}
593
594void bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
595 (*bitarray_AND4_function_p)(A, B, C, D);
596}
597
598void bitarray_OR(uint32_t *A, uint32_t *B) {
599 (*bitarray_OR_function_p)(A, B);
600}
601
602uint32_t count_bitarray_AND2(uint32_t *A, uint32_t *B) {
603 return (*count_bitarray_AND2_function_p)(A, B);
604}
605
606uint32_t count_bitarray_AND3(uint32_t *A, uint32_t *B, uint32_t *C) {
607 return (*count_bitarray_AND3_function_p)(A, B, C);
608}
609
610uint32_t count_bitarray_AND4(uint32_t *A, uint32_t *B, uint32_t *C, uint32_t *D) {
611 return (*count_bitarray_AND4_function_p)(A, B, C, D);
612}
613
614#endif
615
Impressum, Datenschutz