Back to index

enigmail  1.4.3
HashFunctions.h
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
00002  * vim: set ts=8 sw=4 et tw=99 ft=cpp:
00003  *
00004  * This Source Code Form is subject to the terms of the Mozilla Public
00005  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
00006  * You can obtain one at http://mozilla.org/MPL/2.0/. */
00007 
00008 /* Utilities for hashing */
00009 
00010 /*
00011  * This file exports functions for hashing data down to a 32-bit value,
00012  * including:
00013  *
00014  *  - HashString    Hash a char* or uint16_t/wchar_t* of known or unknown
00015  *                  length.
00016  *
00017  *  - HashBytes     Hash a byte array of known length.
00018  *
00019  *  - HashGeneric   Hash one or more values.  Currently, we support uint32_t,
00020  *                  types which can be implicitly cast to uint32_t, data
00021  *                  pointers, and function pointers.
00022  *
00023  *  - AddToHash     Add one or more values to the given hash.  This supports the
00024  *                  same list of types as HashGeneric.
00025  *
00026  *
00027  * You can chain these functions together to hash complex objects.  For example:
00028  *
00029  *  class ComplexObject {
00030  *    char* str;
00031  *    uint32_t uint1, uint2;
00032  *    void (*callbackFn)();
00033  *
00034  *    uint32_t Hash() {
00035  *      uint32_t hash = HashString(str);
00036  *      hash = AddToHash(hash, uint1, uint2);
00037  *      return AddToHash(hash, callbackFn);
00038  *    }
00039  *  };
00040  *
00041  * If you want to hash an nsAString or nsACString, use the HashString functions
00042  * in nsHashKey.h.
00043  */
00044 
00045 #ifndef mozilla_HashFunctions_h_
00046 #define mozilla_HashFunctions_h_
00047 
00048 #include "mozilla/Assertions.h"
00049 #include "mozilla/Attributes.h"
00050 #include "mozilla/StandardInteger.h"
00051 
00052 #ifdef __cplusplus
00053 namespace mozilla {
00054 
00058 static const uint32_t GoldenRatioU32 = 0x9E3779B9U;
00059 
00060 inline uint32_t
00061 RotateLeft32(uint32_t value, uint8_t bits)
00062 {
00063   MOZ_ASSERT(bits < 32);
00064   return (value << bits) | (value >> (32 - bits));
00065 }
00066 
00067 namespace detail {
00068 
00069 inline uint32_t
00070 AddU32ToHash(uint32_t hash, uint32_t value)
00071 {
00072   /*
00073    * This is the meat of all our hash routines.  This hash function is not
00074    * particularly sophisticated, but it seems to work well for our mostly
00075    * plain-text inputs.  Implementation notes follow.
00076    *
00077    * Our use of the golden ratio here is arbitrary; we could pick almost any
00078    * number which:
00079    *
00080    *  * is odd (because otherwise, all our hash values will be even)
00081    *
00082    *  * has a reasonably-even mix of 1's and 0's (consider the extreme case
00083    *    where we multiply by 0x3 or 0xeffffff -- this will not produce good
00084    *    mixing across all bits of the hash).
00085    *
00086    * The rotation length of 5 is also arbitrary, although an odd number is again
00087    * preferable so our hash explores the whole universe of possible rotations.
00088    *
00089    * Finally, we multiply by the golden ratio *after* xor'ing, not before.
00090    * Otherwise, if |hash| is 0 (as it often is for the beginning of a message),
00091    * the expression
00092    *
00093    *   (GoldenRatioU32 * RotateLeft(hash, 5)) |xor| value
00094    *
00095    * evaluates to |value|.
00096    *
00097    * (Number-theoretic aside: Because any odd number |m| is relatively prime to
00098    * our modulus (2^32), the list
00099    *
00100    *    [x * m (mod 2^32) for 0 <= x < 2^32]
00101    *
00102    * has no duplicate elements.  This means that multiplying by |m| does not
00103    * cause us to skip any possible hash values.
00104    *
00105    * It's also nice if |m| has large-ish order mod 2^32 -- that is, if the
00106    * smallest k such that m^k == 1 (mod 2^32) is large -- so we can safely
00107    * multiply our hash value by |m| a few times without negating the
00108    * multiplicative effect.  Our golden ratio constant has order 2^29, which is
00109    * more than enough for our purposes.)
00110    */
00111   return GoldenRatioU32 * (RotateLeft32(hash, 5) ^ value);
00112 }
00113 
00117 template<size_t PtrSize>
00118 inline uint32_t
00119 AddUintptrToHash(uint32_t hash, uintptr_t value);
00120 
00121 template<>
00122 inline uint32_t
00123 AddUintptrToHash<4>(uint32_t hash, uintptr_t value)
00124 {
00125   return AddU32ToHash(hash, static_cast<uint32_t>(value));
00126 }
00127 
00128 template<>
00129 inline uint32_t
00130 AddUintptrToHash<8>(uint32_t hash, uintptr_t value)
00131 {
00132   /*
00133    * The static cast to uint64_t below is necessary because this function
00134    * sometimes gets compiled on 32-bit platforms (yes, even though it's a
00135    * template and we never call this particular override in a 32-bit build).  If
00136    * we do value >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
00137    * right 32 bits, and the compiler throws an error.
00138    */
00139   uint32_t v1 = static_cast<uint32_t>(value);
00140   uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(value) >> 32);
00141   return AddU32ToHash(AddU32ToHash(hash, v1), v2);
00142 }
00143 
00144 } /* namespace detail */
00145 
00153 template<typename A>
00154 MOZ_WARN_UNUSED_RESULT
00155 inline uint32_t
00156 AddToHash(uint32_t hash, A a)
00157 {
00158   /*
00159    * Try to convert |A| to uint32_t implicitly.  If this works, great.  If not,
00160    * we'll error out.
00161    */
00162   return detail::AddU32ToHash(hash, a);
00163 }
00164 
00165 template<typename A>
00166 MOZ_WARN_UNUSED_RESULT
00167 inline uint32_t
00168 AddToHash(uint32_t hash, A* a)
00169 {
00170   /*
00171    * You might think this function should just take a void*.  But then we'd only
00172    * catch data pointers and couldn't handle function pointers.
00173    */
00174 
00175   MOZ_STATIC_ASSERT(sizeof(a) == sizeof(uintptr_t),
00176                     "Strange pointer!");
00177 
00178   return detail::AddUintptrToHash<sizeof(uintptr_t)>(hash, uintptr_t(a));
00179 }
00180 
00181 template<typename A, typename B>
00182 MOZ_WARN_UNUSED_RESULT
00183 uint32_t
00184 AddToHash(uint32_t hash, A a, B b)
00185 {
00186   return AddToHash(AddToHash(hash, a), b);
00187 }
00188 
00189 template<typename A, typename B, typename C>
00190 MOZ_WARN_UNUSED_RESULT
00191 uint32_t
00192 AddToHash(uint32_t hash, A a, B b, C c)
00193 {
00194   return AddToHash(AddToHash(hash, a, b), c);
00195 }
00196 
00197 template<typename A, typename B, typename C, typename D>
00198 MOZ_WARN_UNUSED_RESULT
00199 uint32_t
00200 AddToHash(uint32_t hash, A a, B b, C c, D d)
00201 {
00202   return AddToHash(AddToHash(hash, a, b, c), d);
00203 }
00204 
00205 template<typename A, typename B, typename C, typename D, typename E>
00206 MOZ_WARN_UNUSED_RESULT
00207 uint32_t
00208 AddToHash(uint32_t hash, A a, B b, C c, D d, E e)
00209 {
00210   return AddToHash(AddToHash(hash, a, b, c, d), e);
00211 }
00212 
00220 template<typename A>
00221 MOZ_WARN_UNUSED_RESULT
00222 inline uint32_t
00223 HashGeneric(A a)
00224 {
00225   return AddToHash(0, a);
00226 }
00227 
00228 template<typename A, typename B>
00229 MOZ_WARN_UNUSED_RESULT
00230 inline uint32_t
00231 HashGeneric(A a, B b)
00232 {
00233   return AddToHash(0, a, b);
00234 }
00235 
00236 template<typename A, typename B, typename C>
00237 MOZ_WARN_UNUSED_RESULT
00238 inline uint32_t
00239 HashGeneric(A a, B b, C c)
00240 {
00241   return AddToHash(0, a, b, c);
00242 }
00243 
00244 template<typename A, typename B, typename C, typename D>
00245 MOZ_WARN_UNUSED_RESULT
00246 inline uint32_t
00247 HashGeneric(A a, B b, C c, D d)
00248 {
00249   return AddToHash(0, a, b, c, d);
00250 }
00251 
00252 template<typename A, typename B, typename C, typename D, typename E>
00253 MOZ_WARN_UNUSED_RESULT
00254 inline uint32_t
00255 HashGeneric(A a, B b, C c, D d, E e)
00256 {
00257   return AddToHash(0, a, b, c, d, e);
00258 }
00259 
00260 namespace detail {
00261 
00262 template<typename T>
00263 uint32_t
00264 HashUntilZero(const T* str)
00265 {
00266   uint32_t hash = 0;
00267   for (T c; (c = *str); str++)
00268     hash = AddToHash(hash, c);
00269   return hash;
00270 }
00271 
00272 template<typename T>
00273 uint32_t
00274 HashKnownLength(const T* str, size_t length)
00275 {
00276   uint32_t hash = 0;
00277   for (size_t i = 0; i < length; i++)
00278     hash = AddToHash(hash, str[i]);
00279   return hash;
00280 }
00281 
00282 } /* namespace detail */
00283 
00290 MOZ_WARN_UNUSED_RESULT
00291 inline uint32_t
00292 HashString(const char* str)
00293 {
00294   return detail::HashUntilZero(str);
00295 }
00296 
00297 MOZ_WARN_UNUSED_RESULT
00298 inline uint32_t
00299 HashString(const char* str, size_t length)
00300 {
00301   return detail::HashKnownLength(str, length);
00302 }
00303 
00304 MOZ_WARN_UNUSED_RESULT
00305 inline uint32_t
00306 HashString(const uint16_t* str)
00307 {
00308   return detail::HashUntilZero(str);
00309 }
00310 
00311 MOZ_WARN_UNUSED_RESULT
00312 inline uint32_t
00313 HashString(const uint16_t* str, size_t length)
00314 {
00315   return detail::HashKnownLength(str, length);
00316 }
00317 
00318 /*
00319  * On Windows, wchar_t (PRUnichar) is not the same as uint16_t, even though it's
00320  * the same width!
00321  */
00322 #ifdef WIN32
00323 MOZ_WARN_UNUSED_RESULT
00324 inline uint32_t
00325 HashString(const wchar_t* str)
00326 {
00327   return detail::HashUntilZero(str);
00328 }
00329 
00330 MOZ_WARN_UNUSED_RESULT
00331 inline uint32_t
00332 HashString(const wchar_t* str, size_t length)
00333 {
00334   return detail::HashKnownLength(str, length);
00335 }
00336 #endif
00337 
00344 MOZ_WARN_UNUSED_RESULT
00345 extern MFBT_API(uint32_t)
00346 HashBytes(const void* bytes, size_t length);
00347 
00348 } /* namespace mozilla */
00349 #endif /* __cplusplus */
00350 #endif /* mozilla_HashFunctions_h_ */