git.zerfleddert.de Git - proxmark3-svn/blob - client/jansson/lookup3.h

   1 /*
   2 -------------------------------------------------------------------------------
   3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
   4
   5 These are functions for producing 32-bit hashes for hash table lookup.
   6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
   7 are externally useful functions.  Routines to test the hash are included
   8 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
   9 the public domain.  It has no warranty.
  10
  11 You probably want to use hashlittle().  hashlittle() and hashbig()
  12 hash byte arrays.  hashlittle() is is faster than hashbig() on
  13 little-endian machines.  Intel and AMD are little-endian machines.
  14 On second thought, you probably want hashlittle2(), which is identical to
  15 hashlittle() except it returns two 32-bit hashes for the price of one.
  16 You could implement hashbig2() if you wanted but I haven't bothered here.
  17
  18 If you want to find a hash of, say, exactly 7 integers, do
  19   a = i1;  b = i2;  c = i3;
  20   mix(a,b,c);
  21   a += i4; b += i5; c += i6;
  22   mix(a,b,c);
  23   a += i7;
  24   final(a,b,c);
  25 then use c as the hash value.  If you have a variable length array of
  26 4-byte integers to hash, use hashword().  If you have a byte array (like
  27 a character string), use hashlittle().  If you have several byte arrays, or
  28 a mix of things, see the comments above hashlittle().
  29
  30 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
  31 then mix those integers.  This is fast (you can do a lot more thorough
  32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
  33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
  34 -------------------------------------------------------------------------------
  35 */
  36
  37 #include <stdlib.h>
  38
  39 #ifdef HAVE_CONFIG_H
  40 #include <jansson_private_config.h>
  41 #endif
  42
  43 #ifdef HAVE_STDINT_H
  44 #include <stdint.h>     /* defines uint32_t etc */
  45 #endif
  46
  47 #ifdef HAVE_SYS_PARAM_H
  48 #include <sys/param.h>  /* attempt to define endianness */
  49 #endif
  50
  51 #ifdef HAVE_ENDIAN_H
  52 # include <endian.h>    /* attempt to define endianness */
  53 #endif
  54
  55 /*
  56  * My best guess at if you are big-endian or little-endian.  This may
  57  * need adjustment.
  58  */
  59 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
  60      __BYTE_ORDER == __LITTLE_ENDIAN) || \
  61     (defined(i386) || defined(__i386__) || defined(__i486__) || \
  62      defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
  63 # define HASH_LITTLE_ENDIAN 1
  64 # define HASH_BIG_ENDIAN 0
  65 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
  66        __BYTE_ORDER == __BIG_ENDIAN) || \
  67       (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
  68 # define HASH_LITTLE_ENDIAN 0
  69 # define HASH_BIG_ENDIAN 1
  70 #else
  71 # define HASH_LITTLE_ENDIAN 0
  72 # define HASH_BIG_ENDIAN 0
  73 #endif
  74
  75 #define hashsize(n) ((uint32_t)1<<(n))
  76 #define hashmask(n) (hashsize(n)-1)
  77 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
  78
  79 /*
  80 -------------------------------------------------------------------------------
  81 mix -- mix 3 32-bit values reversibly.
  82
  83 This is reversible, so any information in (a,b,c) before mix() is
  84 still in (a,b,c) after mix().
  85
  86 If four pairs of (a,b,c) inputs are run through mix(), or through
  87 mix() in reverse, there are at least 32 bits of the output that
  88 are sometimes the same for one pair and different for another pair.
  89 This was tested for:
  90 * pairs that differed by one bit, by two bits, in any combination
  91   of top bits of (a,b,c), or in any combination of bottom bits of
  92   (a,b,c).
  93 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
  94   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
  95   is commonly produced by subtraction) look like a single 1-bit
  96   difference.
  97 * the base values were pseudorandom, all zero but one bit set, or
  98   all zero plus a counter that starts at zero.
  99
 100 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
 101 satisfy this are
 102     4  6  8 16 19  4
 103     9 15  3 18 27 15
 104    14  9  3  7 17  3
 105 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
 106 for "differ" defined as + with a one-bit base and a two-bit delta.  I
 107 used http://burtleburtle.net/bob/hash/avalanche.html to choose
 108 the operations, constants, and arrangements of the variables.
 109
 110 This does not achieve avalanche.  There are input bits of (a,b,c)
 111 that fail to affect some output bits of (a,b,c), especially of a.  The
 112 most thoroughly mixed value is c, but it doesn't really even achieve
 113 avalanche in c.
 114
 115 This allows some parallelism.  Read-after-writes are good at doubling
 116 the number of bits affected, so the goal of mixing pulls in the opposite
 117 direction as the goal of parallelism.  I did what I could.  Rotates
 118 seem to cost as much as shifts on every machine I could lay my hands
 119 on, and rotates are much kinder to the top and bottom bits, so I used
 120 rotates.
 121 -------------------------------------------------------------------------------
 122 */
 123 #define mix(a,b,c) \
 124 { \
 125   a -= c;  a ^= rot(c, 4);  c += b; \
 126   b -= a;  b ^= rot(a, 6);  a += c; \
 127   c -= b;  c ^= rot(b, 8);  b += a; \
 128   a -= c;  a ^= rot(c,16);  c += b; \
 129   b -= a;  b ^= rot(a,19);  a += c; \
 130   c -= b;  c ^= rot(b, 4);  b += a; \
 131 }
 132
 133 /*
 134 -------------------------------------------------------------------------------
 135 final -- final mixing of 3 32-bit values (a,b,c) into c
 136
 137 Pairs of (a,b,c) values differing in only a few bits will usually
 138 produce values of c that look totally different.  This was tested for
 139 * pairs that differed by one bit, by two bits, in any combination
 140   of top bits of (a,b,c), or in any combination of bottom bits of
 141   (a,b,c).
 142 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
 143   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
 144   is commonly produced by subtraction) look like a single 1-bit
 145   difference.
 146 * the base values were pseudorandom, all zero but one bit set, or
 147   all zero plus a counter that starts at zero.
 148
 149 These constants passed:
 150  14 11 25 16 4 14 24
 151  12 14 25 16 4 14 24
 152 and these came close:
 153   4  8 15 26 3 22 24
 154  10  8 15 26 3 22 24
 155  11  8 15 26 3 22 24
 156 -------------------------------------------------------------------------------
 157 */
 158 #define final(a,b,c) \
 159 { \
 160   c ^= b; c -= rot(b,14); \
 161   a ^= c; a -= rot(c,11); \
 162   b ^= a; b -= rot(a,25); \
 163   c ^= b; c -= rot(b,16); \
 164   a ^= c; a -= rot(c,4);  \
 165   b ^= a; b -= rot(a,14); \
 166   c ^= b; c -= rot(b,24); \
 167 }
 168
 169 /*
 170 -------------------------------------------------------------------------------
 171 hashlittle() -- hash a variable-length key into a 32-bit value
 172   k       : the key (the unaligned variable-length array of bytes)
 173   length  : the length of the key, counting by bytes
 174   initval : can be any 4-byte value
 175 Returns a 32-bit value.  Every bit of the key affects every bit of
 176 the return value.  Two keys differing by one or two bits will have
 177 totally different hash values.
 178
 179 The best hash table sizes are powers of 2.  There is no need to do
 180 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
 181 use a bitmask.  For example, if you need only 10 bits, do
 182   h = (h & hashmask(10));
 183 In which case, the hash table should have hashsize(10) elements.
 184
 185 If you are hashing n strings (uint8_t **)k, do it like this:
 186   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
 187
 188 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
 189 code any way you wish, private, educational, or commercial.  It's free.
 190
 191 Use for hash table lookup, or anything where one collision in 2^^32 is
 192 acceptable.  Do NOT use for cryptographic purposes.
 193 -------------------------------------------------------------------------------
 194 */
 195
 196 static uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
 197 {
 198   uint32_t a,b,c;                                          /* internal state */
 199   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
 200
 201   /* Set up the internal state */
 202   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
 203
 204   u.ptr = key;
 205   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
 206     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
 207
 208 /* Detect Valgrind or AddressSanitizer */
 209 #ifdef VALGRIND
 210 # define NO_MASKING_TRICK 1
 211 #else
 212 # if defined(__has_feature)  /* Clang */
 213 #  if __has_feature(address_sanitizer)  /* is ASAN enabled? */
 214 #   define NO_MASKING_TRICK 1
 215 #  endif
 216 # else
 217 #  if defined(__SANITIZE_ADDRESS__)  /* GCC 4.8.x, is ASAN enabled? */
 218 #   define NO_MASKING_TRICK 1
 219 #  endif
 220 # endif
 221 #endif
 222
 223 #ifdef NO_MASKING_TRICK
 224     const uint8_t  *k8;
 225 #endif
 226
 227     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 228     while (length > 12)
 229     {
 230       a += k[0];
 231       b += k[1];
 232       c += k[2];
 233       mix(a,b,c);
 234       length -= 12;
 235       k += 3;
 236     }
 237
 238     /*----------------------------- handle the last (probably partial) block */
 239     /*
 240      * "k[2]&0xffffff" actually reads beyond the end of the string, but
 241      * then masks off the part it's not allowed to read.  Because the
 242      * string is aligned, the masked-off tail is in the same word as the
 243      * rest of the string.  Every machine with memory protection I've seen
 244      * does it on word boundaries, so is OK with this.  But VALGRIND will
 245      * still catch it and complain.  The masking trick does make the hash
 246      * noticably faster for short strings (like English words).
 247      */
 248 #ifndef NO_MASKING_TRICK
 249
 250     switch(length)
 251     {
 252     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 253     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
 254     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
 255     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
 256     case 8 : b+=k[1]; a+=k[0]; break;
 257     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
 258     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
 259     case 5 : b+=k[1]&0xff; a+=k[0]; break;
 260     case 4 : a+=k[0]; break;
 261     case 3 : a+=k[0]&0xffffff; break;
 262     case 2 : a+=k[0]&0xffff; break;
 263     case 1 : a+=k[0]&0xff; break;
 264     case 0 : return c;              /* zero length strings require no mixing */
 265     }
 266
 267 #else /* make valgrind happy */
 268
 269     k8 = (const uint8_t *)k;
 270     switch(length)
 271     {
 272     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
 273     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
 274     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
 275     case 9 : c+=k8[8];                   /* fall through */
 276     case 8 : b+=k[1]; a+=k[0]; break;
 277     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
 278     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
 279     case 5 : b+=k8[4];                   /* fall through */
 280     case 4 : a+=k[0]; break;
 281     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
 282     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
 283     case 1 : a+=k8[0]; break;
 284     case 0 : return c;
 285     }
 286
 287 #endif /* !valgrind */
 288
 289   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
 290     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
 291     const uint8_t  *k8;
 292
 293     /*--------------- all but last block: aligned reads and different mixing */
 294     while (length > 12)
 295     {
 296       a += k[0] + (((uint32_t)k[1])<<16);
 297       b += k[2] + (((uint32_t)k[3])<<16);
 298       c += k[4] + (((uint32_t)k[5])<<16);
 299       mix(a,b,c);
 300       length -= 12;
 301       k += 6;
 302     }
 303
 304     /*----------------------------- handle the last (probably partial) block */
 305     k8 = (const uint8_t *)k;
 306     switch(length)
 307     {
 308     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
 309              b+=k[2]+(((uint32_t)k[3])<<16);
 310              a+=k[0]+(((uint32_t)k[1])<<16);
 311              break;
 312     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
 313     case 10: c+=k[4];
 314              b+=k[2]+(((uint32_t)k[3])<<16);
 315              a+=k[0]+(((uint32_t)k[1])<<16);
 316              break;
 317     case 9 : c+=k8[8];                      /* fall through */
 318     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
 319              a+=k[0]+(((uint32_t)k[1])<<16);
 320              break;
 321     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
 322     case 6 : b+=k[2];
 323              a+=k[0]+(((uint32_t)k[1])<<16);
 324              break;
 325     case 5 : b+=k8[4];                      /* fall through */
 326     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
 327              break;
 328     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
 329     case 2 : a+=k[0];
 330              break;
 331     case 1 : a+=k8[0];
 332              break;
 333     case 0 : return c;                     /* zero length requires no mixing */
 334     }
 335
 336   } else {                        /* need to read the key one byte at a time */
 337     const uint8_t *k = (const uint8_t *)key;
 338
 339     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
 340     while (length > 12)
 341     {
 342       a += k[0];
 343       a += ((uint32_t)k[1])<<8;
 344       a += ((uint32_t)k[2])<<16;
 345       a += ((uint32_t)k[3])<<24;
 346       b += k[4];
 347       b += ((uint32_t)k[5])<<8;
 348       b += ((uint32_t)k[6])<<16;
 349       b += ((uint32_t)k[7])<<24;
 350       c += k[8];
 351       c += ((uint32_t)k[9])<<8;
 352       c += ((uint32_t)k[10])<<16;
 353       c += ((uint32_t)k[11])<<24;
 354       mix(a,b,c);
 355       length -= 12;
 356       k += 12;
 357     }
 358
 359     /*-------------------------------- last block: affect all 32 bits of (c) */
 360     switch(length)                   /* all the case statements fall through */
 361     {
 362     case 12: c+=((uint32_t)k[11])<<24; /* fall through */
 363     case 11: c+=((uint32_t)k[10])<<16; /* fall through */
 364     case 10: c+=((uint32_t)k[9])<<8; /* fall through */
 365     case 9 : c+=k[8]; /* fall through */
 366     case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
 367     case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
 368     case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
 369     case 5 : b+=k[4]; /* fall through */
 370     case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
 371     case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
 372     case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
 373     case 1 : a+=k[0];
 374              break;
 375     case 0 : return c;
 376     }
 377   }
 378
 379   final(a,b,c);
 380   return c;
 381 }