diff --git a/src/coreclr/inc/dn_xxhash.h b/src/coreclr/inc/dn_xxhash.h new file mode 100644 index 00000000000000..8a5f6a3dde5fc9 --- /dev/null +++ b/src/coreclr/inc/dn_xxhash.h @@ -0,0 +1,237 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// --------------------------------------------------------------------------- +// xxHash32 primitives and helpers +// +// Based on the xxHash32 logic implemented in System.HashCode, which is in +// turn based on the code published by Yann Collet: +// https://raw.githubusercontent.com/Cyan4973/xxHash/5c174cfa4e45a42f94082dc0d4539b39696afea1/xxhash.c +// +// xxHash - Fast Hash algorithm +// Copyright (C) 2012-2016, Yann Collet +// +// BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// You can contact the author at : +// - xxHash homepage: http://www.xxhash.com +// - xxHash source repository : https://github.com/Cyan4973/xxHash +// --------------------------------------------------------------------------- + +#pragma once +#include +#include "clrtypes.h" + +inline static UINT32 XXHash32_MixEmptyState() +{ + // Unlike System.HashCode, these hash values are required to be stable, so don't + // mix in a random process specific value + return 374761393U; // Prime5 +} + +inline static UINT32 XXHash32_MixState(UINT32 v1, UINT32 v2, UINT32 v3, UINT32 v4) +{ + return (UINT32)_rotl(v1, 1) + (UINT32)_rotl(v2, 7) + (UINT32)_rotl(v3, 12) + (UINT32)_rotl(v4, 18); +} + +inline static UINT32 XXHash32_QueueRound(UINT32 hash, UINT32 queuedValue) +{ + return ((UINT32)_rotl((int)(hash + queuedValue * 3266489917U/*Prime3*/), 17)) * 668265263U/*Prime4*/; +} + +inline static UINT32 XXHash32_Round(UINT32 hash, UINT32 input) +{ + return ((UINT32)_rotl((int)(hash + input * 2246822519U/*Prime2*/), 13)) * 2654435761U/*Prime1*/; +} + +inline static UINT32 XXHash32_MixFinal(UINT32 hash) +{ + hash ^= hash >> 15; + hash *= 2246822519U/*Prime2*/; + hash ^= hash >> 13; + hash *= 3266489917U/*Prime3*/; + hash ^= hash >> 16; + return hash; +} + +inline static UINT32 MixOneValueIntoHash(UINT32 value1) +{ + // This matches the behavior of System.HashCode.Combine(value1) as of the time of authoring + + // Provide a way of diffusing bits from something with a limited + // input hash space. For example, many enums only have a few + // possible hashes, only using the bottom few bits of the code. Some + // collections are built on the assumption that hashes are spread + // over a larger space, so diffusing the bits may help the + // collection work more efficiently. + + DWORD hash = XXHash32_MixEmptyState(); + hash += 4; + hash = XXHash32_QueueRound(hash, value1); + hash = XXHash32_MixFinal(hash); + return hash; +} + +inline static UINT32 CombineTwoValuesIntoHash(UINT32 value1, UINT32 value2) +{ + // This matches the behavior of System.HashCode.Combine(value1, value2) as of the time of authoring + DWORD hash = XXHash32_MixEmptyState(); + hash += 8; + hash = XXHash32_QueueRound(hash, value1); + hash = XXHash32_QueueRound(hash, value2); + hash = XXHash32_MixFinal(hash); + return hash; +} + +inline static UINT32 MixPointerIntoHash(void* ptr) +{ +#ifdef HOST_64BIT + return CombineTwoValuesIntoHash((UINT32)(UINT_PTR)ptr, (UINT32)(((UINT64)(UINT_PTR)ptr) >> 32)); +#else + return MixOneValueIntoHash((UINT32)ptr); +#endif +} + +inline static UINT32 CombineThreeValuesIntoHash(UINT32 value1, UINT32 value2, UINT32 value3) +{ + // This matches the behavior of System.HashCode.Combine(value1, value2, value3) as of the time of authoring + DWORD hash = XXHash32_MixEmptyState(); + hash += 12; + hash = XXHash32_QueueRound(hash, value1); + hash = XXHash32_QueueRound(hash, value2); + hash = XXHash32_QueueRound(hash, value3); + hash = XXHash32_MixFinal(hash); + return hash; +} + +// This is a port of the System.HashCode logic for computing a hashcode using the xxHash algorithm. +// However, as this is intended to provide a stable hash, the seed value is always 0. +class xxHash +{ + const uint32_t seed = 0; + const uint32_t Prime1 = 2654435761U; + const uint32_t Prime2 = 2246822519U; + const uint32_t Prime3 = 3266489917U; + const uint32_t Prime4 = 668265263U; + const uint32_t Prime5 = 374761393U; + + uint32_t _v1 = seed + Prime1 + Prime2; + uint32_t _v2 = seed + Prime2; + uint32_t _v3 = seed; + uint32_t _v4 = seed - Prime1; + uint32_t _queue1 = 0; + uint32_t _queue2 = 0; + uint32_t _queue3 = 0; + uint32_t _length = 0; + +public: + void Add(uint32_t val) + { + // The original xxHash works as follows: + // 0. Initialize immediately. We can't do this in a struct (no + // default ctor). + // 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators. + // 2. Accumulate remaining blocks of length 4 (1 uint) into the + // hash. + // 3. Accumulate remaining blocks of length 1 into the hash. + + // There is no need for #3 as this type only accepts ints. _queue1, + // _queue2 and _queue3 are basically a buffer so that when + // ToHashCode is called we can execute #2 correctly. + + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. + uint32_t previousLength = _length++; + uint32_t position = previousLength % 4; + + // Switch can't be inlined. + + if (position == 0) + _queue1 = val; + else if (position == 1) + _queue2 = val; + else if (position == 2) + _queue3 = val; + else // position == 3 + { + _v1 = XXHash32_Round(_v1, _queue1); + _v2 = XXHash32_Round(_v2, _queue2); + _v3 = XXHash32_Round(_v3, _queue3); + _v4 = XXHash32_Round(_v4, val); + } + } + + void AddPointer(void* ptr) + { +#ifdef HOST_64BIT + Add((uint32_t)(UINT_PTR)ptr); + Add((uint32_t)(((UINT_PTR)ptr) >> 32)); +#else + Add((uint32_t)(UINT_PTR)ptr); +#endif + } + + uint32_t ToHashCode() + { + // Storing the value of _length locally shaves of quite a few bytes + // in the resulting machine code. + uint32_t length = _length; + + // position refers to the *next* queue position in this method, so + // position == 1 means that _queue1 is populated; _queue2 would have + // been populated on the next call to Add. + uint32_t position = length % 4; + + // If the length is less than 4, _v1 to _v4 don't contain anything + // yet. xxHash32 treats this differently. + + uint32_t hash = length < 4 ? XXHash32_MixEmptyState() : XXHash32_MixState(_v1, _v2, _v3, _v4); + + // _length is incremented once per Add(Int32) and is therefore 4 + // times too small (xxHash length is in bytes, not ints). + + hash += length * 4; + + // Mix what remains in the queue + + // Switch can't be inlined right now, so use as few branches as + // possible by manually excluding impossible scenarios (position > 1 + // is always false if position is not > 0). + if (position > 0) + { + hash = XXHash32_QueueRound(hash, _queue1); + if (position > 1) + { + hash = XXHash32_QueueRound(hash, _queue2); + if (position > 2) + hash = XXHash32_QueueRound(hash, _queue3); + } + } + + hash = XXHash32_MixFinal(hash); + return (int)hash; + } +}; diff --git a/src/coreclr/inc/shash.h b/src/coreclr/inc/shash.h index ae6892c1625abc..8a9e13d26ec809 100644 --- a/src/coreclr/inc/shash.h +++ b/src/coreclr/inc/shash.h @@ -729,51 +729,6 @@ struct CaseSensitiveStringCompareHash } }; -// Provides case-insensitive comparison and hashing functionality through static -// and functor object methods. Can be instantiated with CHAR or WCHAR. -template -struct CaseInsensitiveStringCompareHash -{ -private: - typedef CharT const * str_t; - - static size_t _strcmp(str_t left, str_t right) - { - return ::SString::_tstricmp(left, right); - } - - static size_t _hash(CHAR const *str) - { - return HashiStringA(str); - } - - static size_t _hash(WCHAR const *str) - { - return HashiString(str); - } - -public: - static size_t compare(str_t left, str_t right) - { - return _strcmp(left, right); - } - - size_t operator()(str_t left, str_t right) - { - return compare(left, right); - } - - static size_t hash(str_t str) - { - return _hash(str); - } - - size_t operator()(str_t str) - { - return hash(str); - } -}; - // StringSHashTraits is a traits class useful for string-keyed // pointer hash tables. diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h index b60d6a75255449..1151f1d3950451 100644 --- a/src/coreclr/inc/utilcode.h +++ b/src/coreclr/inc/utilcode.h @@ -28,6 +28,7 @@ using std::nothrow; #include "volatile.h" #include #include "clrhost.h" +#include "dn_xxhash.h" #include "debugmacros.h" #include "corhlprpriv.h" #include "check.h" @@ -1955,15 +1956,29 @@ inline COUNT_T HashPtr(COUNT_T currentHash, PTR_VOID ptr) inline ULONG HashBytes(BYTE const *pbData, size_t iSize) { LIMITED_METHOD_CONTRACT; - ULONG hash = 5381; - BYTE const *pbDataEnd = pbData + iSize; + ULONG hash = XXHash32_MixEmptyState(); + hash += (ULONG)iSize; - for (/**/ ; pbData < pbDataEnd; pbData++) + // Process 4 bytes at a time. + while (iSize >= sizeof(uint32_t)) { - hash = ((hash << 5) + hash) ^ *pbData; + uint32_t val; + memcpy(&val, pbData, sizeof(val)); + hash = XXHash32_QueueRound(hash, val); + pbData += sizeof(val); + iSize -= sizeof(val); } - return hash; + + // Process remaining bytes. + if (iSize > 0) + { + uint32_t val = 0; + memcpy(&val, pbData, iSize); + hash = XXHash32_QueueRound(hash, val); + } + + return XXHash32_MixFinal(hash); } // Helper function for hashing a string char by char. @@ -1995,39 +2010,6 @@ inline ULONG HashString(LPCWSTR szStr) return hash; } -inline ULONG HashStringN(LPCWSTR szStr, SIZE_T cchStr) -{ - LIMITED_METHOD_CONTRACT; - ULONG hash = 5381; - - // hash the string two characters at a time - ULONG *ptr = (ULONG *)szStr; - - // we assume that szStr is null-terminated - _ASSERTE(cchStr <= u16_strlen(szStr)); - SIZE_T cDwordCount = (cchStr + 1) / 2; - - for (SIZE_T i = 0; i < cDwordCount; i++) - { - hash = ((hash << 5) + hash) ^ ptr[i]; - } - - return hash; -} - -// Case-insensitive string hash function. -inline ULONG HashiStringA(LPCSTR szStr) -{ - LIMITED_METHOD_CONTRACT; - ULONG hash = 5381; - while (*szStr != 0) - { - hash = ((hash << 5) + hash) ^ toupper(*szStr); - szStr++; - } - return hash; -} - // Case-insensitive string hash function. inline ULONG HashiString(LPCWSTR szStr) { @@ -2041,19 +2023,6 @@ inline ULONG HashiString(LPCWSTR szStr) return hash; } -// Case-insensitive string hash function. -inline ULONG HashiStringN(LPCWSTR szStr, DWORD count) -{ - LIMITED_METHOD_CONTRACT; - ULONG hash = 5381; - while (*szStr != 0 && count--) - { - hash = ((hash << 5) + hash) ^ towupper(*szStr); - szStr++; - } - return hash; -} - // Case-insensitive string hash function when all of the // characters in the string are known to be below 0x80. // Knowing this is much more efficient than calling diff --git a/src/coreclr/vm/ceeload.cpp b/src/coreclr/vm/ceeload.cpp index ab5a28c591051e..c4d9ae09851f62 100644 --- a/src/coreclr/vm/ceeload.cpp +++ b/src/coreclr/vm/ceeload.cpp @@ -2245,12 +2245,7 @@ void ModuleBase::InitializeStringData(DWORD token, EEStringData *pstrData, CQuic pstrData->SetStringBuffer(pSwapped); #endif // !!BIGENDIAN - // MD and String look at this bit in opposite ways. Here's where we'll do the conversion. - // MD sets the bit to true if the string contains characters greater than 80. - // String sets the bit to true if the string doesn't contain characters greater than 80. - pstrData->SetCharCount(dwCharCount); - pstrData->SetIsOnlyLowChars(!fIs80Plus); } @@ -3739,7 +3734,7 @@ void SaveManagedCommandLine(LPCWSTR pwzAssemblyPath, int argc, LPCWSTR *argv) LPCWSTR exePath = GetExePath(); SIZE_T commandLineLen = (u16_strlen(exePath) + 1); - // Append assembly path to approximate the command line for generic hosts like `dotnet`. + // Append assembly path to approximate the command line for generic hosts like `dotnet`. // This isn't quite correct for apphost, as the app name will be duplicated. commandLineLen += (u16_strlen(pwzAssemblyPath) + 1); diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index b91382bff21dd8..90ee69828ca602 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -1348,7 +1348,7 @@ STRINGREF* LCGMethodResolver::GetOrInternString(STRINGREF *pProtectedStringRef) // Get the global string literal interning map GlobalStringLiteralMap* pStringLiteralMap = SystemDomain::GetGlobalStringLiteralMap(); - // Calculating the hash: EEUnicodeHashTableHelper::GetHash + // Calculating the hash. EEStringData StringData = EEStringData((*pProtectedStringRef)->GetStringLength(), (*pProtectedStringRef)->GetBuffer()); DWORD dwHash = pStringLiteralMap->GetHash(&StringData); diff --git a/src/coreclr/vm/eehash.cpp b/src/coreclr/vm/eehash.cpp index 0b0669f53bcf2a..f21a43c5fbccb2 100644 --- a/src/coreclr/vm/eehash.cpp +++ b/src/coreclr/vm/eehash.cpp @@ -60,91 +60,7 @@ void FreeEEHashBuckets(EEHashEntry_t** pBuckets) } // ============================================================================ -// Unicode string hash table helper. -// ============================================================================ -EEHashEntry_t * EEUnicodeHashTableHelper::AllocateEntry(EEStringData *pKey, BOOL bDeepCopy, void *pHeap) -{ - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - INJECT_FAULT(return NULL;); - } - CONTRACTL_END - - EEHashEntry_t *pEntry; - - if (bDeepCopy) - { - pEntry = (EEHashEntry_t *) new (nothrow) BYTE[SIZEOF_EEHASH_ENTRY + sizeof(EEStringData) + ((pKey->GetCharCount() + 1) * sizeof(WCHAR))]; - if (pEntry) { - EEStringData *pEntryKey = (EEStringData *)(&pEntry->Key); - pEntryKey->SetIsOnlyLowChars (pKey->GetIsOnlyLowChars()); - pEntryKey->SetCharCount (pKey->GetCharCount()); - pEntryKey->SetStringBuffer ((LPWSTR) ((LPBYTE)pEntry->Key + sizeof(EEStringData))); - memcpy((LPWSTR)pEntryKey->GetStringBuffer(), pKey->GetStringBuffer(), pKey->GetCharCount() * sizeof(WCHAR)); - } - } - else - { - pEntry = (EEHashEntry_t *) new (nothrow) BYTE[SIZEOF_EEHASH_ENTRY + sizeof(EEStringData)]; - if (pEntry) { - EEStringData *pEntryKey = (EEStringData *) pEntry->Key; - pEntryKey->SetIsOnlyLowChars (pKey->GetIsOnlyLowChars()); - pEntryKey->SetCharCount (pKey->GetCharCount()); - pEntryKey->SetStringBuffer (pKey->GetStringBuffer()); - } - } - - return pEntry; -} - - -void EEUnicodeHashTableHelper::DeleteEntry(EEHashEntry_t *pEntry, void *pHeap) -{ - LIMITED_METHOD_CONTRACT; - - delete [] (BYTE*)pEntry; -} - - -BOOL EEUnicodeHashTableHelper::CompareKeys(EEHashEntry_t *pEntry, EEStringData *pKey) -{ - LIMITED_METHOD_CONTRACT; - - EEStringData *pEntryKey = (EEStringData*) pEntry->Key; - - // Same buffer, same string. - if (pEntryKey->GetStringBuffer() == pKey->GetStringBuffer()) - return TRUE; - - // Length not the same, never a match. - if (pEntryKey->GetCharCount() != pKey->GetCharCount()) - return FALSE; - - // Compare the entire thing. - // We'll deliberately ignore the bOnlyLowChars field since this derived from the characters - return !memcmp(pEntryKey->GetStringBuffer(), pKey->GetStringBuffer(), pEntryKey->GetCharCount() * sizeof(WCHAR)); -} - - -DWORD EEUnicodeHashTableHelper::Hash(EEStringData *pKey) -{ - LIMITED_METHOD_CONTRACT; - - return (HashBytes((const BYTE *) pKey->GetStringBuffer(), pKey->GetCharCount()*sizeof(WCHAR))); -} - - -EEStringData *EEUnicodeHashTableHelper::GetKey(EEHashEntry_t *pEntry) -{ - LIMITED_METHOD_CONTRACT; - - return (EEStringData*)pEntry->Key; -} - -// ============================================================================ -// Unicode stringliteral hash table helper. +// Unicode string literal hash table helper. // ============================================================================ EEHashEntry_t * EEUnicodeStringLiteralHashTableHelper::AllocateEntry(EEStringData *pKey, BOOL bDeepCopy, void *pHeap) { diff --git a/src/coreclr/vm/eehash.h b/src/coreclr/vm/eehash.h index 3d43f8c1bff108..bfb3ed51bea904 100644 --- a/src/coreclr/vm/eehash.h +++ b/src/coreclr/vm/eehash.h @@ -362,7 +362,6 @@ class EEStringData LPCWSTR szString; // The string data. DWORD cch; // Characters in the string. #ifdef _DEBUG - BOOL bDebugOnlyLowChars; // Does the string contain only characters less than 0x80? DWORD dwDebugCch; #endif // _DEBUG @@ -374,7 +373,6 @@ class EEStringData SetStringBuffer(NULL); SetCharCount(0); - SetIsOnlyLowChars(FALSE); }; EEStringData(DWORD cchString, LPCWSTR str) : cch(0) { @@ -382,15 +380,6 @@ class EEStringData SetStringBuffer(str); SetCharCount(cchString); - SetIsOnlyLowChars(FALSE); - }; - EEStringData(DWORD cchString, LPCWSTR str, BOOL onlyLow) : cch(0) - { - LIMITED_METHOD_CONTRACT; - - SetStringBuffer(str); - SetCharCount(cchString); - SetIsOnlyLowChars(onlyLow); }; inline ULONG GetCharCount() const { @@ -420,37 +409,8 @@ class EEStringData szString = _szString; } - inline BOOL GetIsOnlyLowChars() const - { - LIMITED_METHOD_CONTRACT; - - _ASSERTE(bDebugOnlyLowChars == ((cch & ONLY_LOW_CHARS_MASK) ? TRUE : FALSE)); - return ((cch & ONLY_LOW_CHARS_MASK) ? TRUE : FALSE); - } - inline void SetIsOnlyLowChars(BOOL bIsOnlyLowChars) - { - LIMITED_METHOD_CONTRACT; - -#ifdef _DEBUG - bDebugOnlyLowChars = bIsOnlyLowChars; -#endif // _DEBUG - bIsOnlyLowChars ? (cch |= ONLY_LOW_CHARS_MASK) : (cch &= ~ONLY_LOW_CHARS_MASK); - } }; -class EEUnicodeHashTableHelper -{ -public: - static EEHashEntry_t * AllocateEntry(EEStringData *pKey, BOOL bDeepCopy, AllocationHeap Heap); - static void DeleteEntry(EEHashEntry_t *pEntry, AllocationHeap Heap); - static BOOL CompareKeys(EEHashEntry_t *pEntry, EEStringData *pKey); - static DWORD Hash(EEStringData *pKey); - static EEStringData * GetKey(EEHashEntry_t *pEntry); -}; - -typedef EEHashTable EEUnicodeStringHashTable; - - class EEUnicodeStringLiteralHashTableHelper { public: diff --git a/src/coreclr/vm/object.cpp b/src/coreclr/vm/object.cpp index dea387ab644625..c0a50aa2bd73b4 100644 --- a/src/coreclr/vm/object.cpp +++ b/src/coreclr/vm/object.cpp @@ -819,7 +819,7 @@ STRINGREF* StringObject::InitEmptyStringRefPtr() { GCX_COOP(); - EEStringData data(0, W(""), TRUE); + EEStringData data(0, W("")); void* pinnedStr = nullptr; EmptyStringRefPtr = SystemDomain::System()->DefaultDomain()->GetLoaderAllocator()->GetStringObjRefPtrFromUnicodeString(&data, &pinnedStr); EmptyStringIsFrozen = pinnedStr != nullptr; diff --git a/src/coreclr/vm/typehashingalgorithms.h b/src/coreclr/vm/typehashingalgorithms.h index 2a6b5d1115404b..5fc4b53d2bfed1 100644 --- a/src/coreclr/vm/typehashingalgorithms.h +++ b/src/coreclr/vm/typehashingalgorithms.h @@ -7,6 +7,7 @@ #pragma once #include +#include // // Returns the hashcode value of the 'src' string @@ -124,49 +125,6 @@ inline static int ComputeGenericInstanceHashCode(int definitionHashcode, int ari return (hashcode + _rotl(hashcode, 15)); } -/* - -The below hash combining function is based on the xxHash32 logic implemented -in System.HashCode. In particular it is a port of the 2 element hash -combining routines, which are in turn based on xxHash32 logic. - -The xxHash32 implementation is based on the code published by Yann Collet: -https://raw.githubusercontent.com/Cyan4973/xxHash/5c174cfa4e45a42f94082dc0d4539b39696afea1/xxhash.c - - xxHash - Fast Hash algorithm - Copyright (C) 2012-2016, Yann Collet - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - xxHash homepage: http://www.xxhash.com - - xxHash source repository : https://github.com/Cyan4973/xxHash - -*/ - inline static UINT32 HashMDToken(mdToken token) { // Hash function to generate a value usable for reasonable hashes from a single 32bit value @@ -181,194 +139,3 @@ inline static UINT32 HashMDToken(mdToken token) a ^= (a>>15); return a; } - -inline static UINT32 XXHash32_MixEmptyState() -{ - // Unlike System.HashCode, these hash values are required to be stable, so don't - // mixin a random process specific value - return 374761393U; // Prime5 -} - -inline static UINT32 XXHash32_MixState(UINT32 v1, UINT32 v2, UINT32 v3, UINT32 v4) -{ - return (UINT32)_rotl(v1, 1) + (UINT32)_rotl(v2, 7) + (UINT32)_rotl(v3, 12) + (UINT32)_rotl(v4, 18); -} - -inline static UINT32 XXHash32_QueueRound(UINT32 hash, UINT32 queuedValue) -{ - return ((UINT32)_rotl((int)(hash + queuedValue * 3266489917U/*Prime3*/), 17)) * 668265263U/*Prime4*/; -} - -inline static UINT32 XXHash32_Round(UINT32 hash, UINT32 input) -{ - return ((UINT32)_rotl((int)(hash + input * 2246822519U/*Prime2*/), 13)) * 2654435761U/*Prime1*/; -} - -inline static UINT32 XXHash32_MixFinal(UINT32 hash) -{ - hash ^= hash >> 15; - hash *= 2246822519U/*Prime2*/; - hash ^= hash >> 13; - hash *= 3266489917U/*Prime3*/; - hash ^= hash >> 16; - return hash; -} - -inline static UINT32 MixOneValueIntoHash(UINT32 value1) -{ - // This matches the behavior of System.HashCode.Combine(value1) as of the time of authoring - - // Provide a way of diffusing bits from something with a limited - // input hash space. For example, many enums only have a few - // possible hashes, only using the bottom few bits of the code. Some - // collections are built on the assumption that hashes are spread - // over a larger space, so diffusing the bits may help the - // collection work more efficiently. - - DWORD hash = XXHash32_MixEmptyState(); - hash += 4; - hash = XXHash32_QueueRound(hash, value1); - hash = XXHash32_MixFinal(hash); - return hash; -} - -inline static UINT32 CombineTwoValuesIntoHash(UINT32 value1, UINT32 value2) -{ - // This matches the behavior of System.HashCode.Combine(value1, value2) as of the time of authoring - DWORD hash = XXHash32_MixEmptyState(); - hash += 8; - hash = XXHash32_QueueRound(hash, value1); - hash = XXHash32_QueueRound(hash, value2); - hash = XXHash32_MixFinal(hash); - return hash; -} - -inline static UINT32 MixPointerIntoHash(void* ptr) -{ -#ifdef HOST_64BIT - return CombineTwoValuesIntoHash((UINT32)(UINT_PTR)ptr, (UINT32)(((UINT64)ptr) >> 32)); -#else - return MixOneValueIntoHash((UINT32)ptr); -#endif -} - - -inline static UINT32 CombineThreeValuesIntoHash(UINT32 value1, UINT32 value2, UINT32 value3) -{ - // This matches the behavior of System.HashCode.Combine(value1, value2, value3) as of the time of authoring - DWORD hash = XXHash32_MixEmptyState(); - hash += 12; - hash = XXHash32_QueueRound(hash, value1); - hash = XXHash32_QueueRound(hash, value2); - hash = XXHash32_QueueRound(hash, value3); - hash = XXHash32_MixFinal(hash); - return hash; -} - -// This is a port of the System.HashCode logic for computing a hashcode using the xxHash algorithm -// However, as this is intended to provide a stable hash, the seed value is always 0. -class xxHash -{ - const uint32_t seed = 0; - const uint32_t Prime1 = 2654435761U; - const uint32_t Prime2 = 2246822519U; - const uint32_t Prime3 = 3266489917U; - const uint32_t Prime4 = 668265263U; - const uint32_t Prime5 = 374761393U; - - uint32_t _v1 = seed + Prime1 + Prime2; - uint32_t _v2 = seed + Prime2; - uint32_t _v3 = seed; - uint32_t _v4 = seed - Prime1; - uint32_t _queue1 = 0; - uint32_t _queue2 = 0; - uint32_t _queue3 = 0; - uint32_t _length = 0; - -public: - void AddPointer(void* ptr) - { -#ifdef HOST_64BIT - Add((uint32_t)(UINT_PTR)ptr); - Add((uint32_t)(((UINT64)ptr) >> 32)); -#else - Add((uint32_t)(UINT_PTR)ptr); -#endif - } - void Add(uint32_t val) - { - // The original xxHash works as follows: - // 0. Initialize immediately. We can't do this in a struct (no - // default ctor). - // 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators. - // 2. Accumulate remaining blocks of length 4 (1 uint) into the - // hash. - // 3. Accumulate remaining blocks of length 1 into the hash. - - // There is no need for #3 as this type only accepts ints. _queue1, - // _queue2 and _queue3 are basically a buffer so that when - // ToHashCode is called we can execute #2 correctly. - - // Storing the value of _length locally shaves of quite a few bytes - // in the resulting machine code. - uint32_t previousLength = _length++; - uint32_t position = previousLength % 4; - - // Switch can't be inlined. - - if (position == 0) - _queue1 = val; - else if (position == 1) - _queue2 = val; - else if (position == 2) - _queue3 = val; - else // position == 3 - { - _v1 = XXHash32_Round(_v1, _queue1); - _v2 = XXHash32_Round(_v2, _queue2); - _v3 = XXHash32_Round(_v3, _queue3); - _v4 = XXHash32_Round(_v4, val); - } - } - - uint32_t ToHashCode() - { - // Storing the value of _length locally shaves of quite a few bytes - // in the resulting machine code. - uint32_t length = _length; - - // position refers to the *next* queue position in this method, so - // position == 1 means that _queue1 is populated; _queue2 would have - // been populated on the next call to Add. - uint32_t position = length % 4; - - // If the length is less than 4, _v1 to _v4 don't contain anything - // yet. xxHash32 treats this differently. - - uint32_t hash = length < 4 ? XXHash32_MixEmptyState() : XXHash32_MixState(_v1, _v2, _v3, _v4); - - // _length is incremented once per Add(Int32) and is therefore 4 - // times too small (xxHash length is in bytes, not ints). - - hash += length * 4; - - // Mix what remains in the queue - - // Switch can't be inlined right now, so use as few branches as - // possible by manually excluding impossible scenarios (position > 1 - // is always false if position is not > 0). - if (position > 0) - { - hash = XXHash32_QueueRound(hash, _queue1); - if (position > 1) - { - hash = XXHash32_QueueRound(hash, _queue2); - if (position > 2) - hash = XXHash32_QueueRound(hash, _queue3); - } - } - - hash = XXHash32_MixFinal(hash); - return (int)hash; - } -};