Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 237 additions & 0 deletions src/coreclr/inc/dn_xxhash.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// ---------------------------------------------------------------------------
// xxHash32 primitives and helpers
//
// Based on the xxHash32 logic implemented in System.HashCode, which is in
// turn based on the code published by Yann Collet:
// https://raw.githubusercontent.com/Cyan4973/xxHash/5c174cfa4e45a42f94082dc0d4539b39696afea1/xxhash.c
//
// xxHash - Fast Hash algorithm
// Copyright (C) 2012-2016, Yann Collet
//
// BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// You can contact the author at :
// - xxHash homepage: http://www.xxhash.com
// - xxHash source repository : https://github.com/Cyan4973/xxHash
// ---------------------------------------------------------------------------

#pragma once
#include <stdlib.h>
#include "clrtypes.h"

inline static UINT32 XXHash32_MixEmptyState()
{
// Unlike System.HashCode, these hash values are required to be stable, so don't

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not clear why these hash values are required to be stable now that this is standalone and no longer part of typehashingalgorithms.h

// mix in a random process specific value
return 374761393U; // Prime5
}

inline static UINT32 XXHash32_MixState(UINT32 v1, UINT32 v2, UINT32 v3, UINT32 v4)
{
return (UINT32)_rotl(v1, 1) + (UINT32)_rotl(v2, 7) + (UINT32)_rotl(v3, 12) + (UINT32)_rotl(v4, 18);
}

inline static UINT32 XXHash32_QueueRound(UINT32 hash, UINT32 queuedValue)
{
return ((UINT32)_rotl((int)(hash + queuedValue * 3266489917U/*Prime3*/), 17)) * 668265263U/*Prime4*/;
}

inline static UINT32 XXHash32_Round(UINT32 hash, UINT32 input)
{
return ((UINT32)_rotl((int)(hash + input * 2246822519U/*Prime2*/), 13)) * 2654435761U/*Prime1*/;
}

inline static UINT32 XXHash32_MixFinal(UINT32 hash)
{
hash ^= hash >> 15;
hash *= 2246822519U/*Prime2*/;
hash ^= hash >> 13;
hash *= 3266489917U/*Prime3*/;
hash ^= hash >> 16;
return hash;
}

inline static UINT32 MixOneValueIntoHash(UINT32 value1)
{
// This matches the behavior of System.HashCode.Combine(value1) as of the time of authoring

// Provide a way of diffusing bits from something with a limited
// input hash space. For example, many enums only have a few
// possible hashes, only using the bottom few bits of the code. Some
// collections are built on the assumption that hashes are spread
// over a larger space, so diffusing the bits may help the
// collection work more efficiently.

DWORD hash = XXHash32_MixEmptyState();
hash += 4;
hash = XXHash32_QueueRound(hash, value1);
hash = XXHash32_MixFinal(hash);
return hash;
}

inline static UINT32 CombineTwoValuesIntoHash(UINT32 value1, UINT32 value2)
{
// This matches the behavior of System.HashCode.Combine(value1, value2) as of the time of authoring
DWORD hash = XXHash32_MixEmptyState();
hash += 8;
hash = XXHash32_QueueRound(hash, value1);
hash = XXHash32_QueueRound(hash, value2);
hash = XXHash32_MixFinal(hash);
return hash;
}

inline static UINT32 MixPointerIntoHash(void* ptr)
{
#ifdef HOST_64BIT
return CombineTwoValuesIntoHash((UINT32)(UINT_PTR)ptr, (UINT32)(((UINT64)(UINT_PTR)ptr) >> 32));
#else
return MixOneValueIntoHash((UINT32)ptr);
#endif
}

inline static UINT32 CombineThreeValuesIntoHash(UINT32 value1, UINT32 value2, UINT32 value3)
{
// This matches the behavior of System.HashCode.Combine(value1, value2, value3) as of the time of authoring
DWORD hash = XXHash32_MixEmptyState();
hash += 12;
hash = XXHash32_QueueRound(hash, value1);
hash = XXHash32_QueueRound(hash, value2);
hash = XXHash32_QueueRound(hash, value3);
hash = XXHash32_MixFinal(hash);
return hash;
}

// This is a port of the System.HashCode logic for computing a hashcode using the xxHash algorithm.
// However, as this is intended to provide a stable hash, the seed value is always 0.
class xxHash
{
const uint32_t seed = 0;
const uint32_t Prime1 = 2654435761U;
const uint32_t Prime2 = 2246822519U;
const uint32_t Prime3 = 3266489917U;
const uint32_t Prime4 = 668265263U;
const uint32_t Prime5 = 374761393U;

uint32_t _v1 = seed + Prime1 + Prime2;
uint32_t _v2 = seed + Prime2;
uint32_t _v3 = seed;
uint32_t _v4 = seed - Prime1;
uint32_t _queue1 = 0;
uint32_t _queue2 = 0;
uint32_t _queue3 = 0;
uint32_t _length = 0;

public:
void Add(uint32_t val)
{
// The original xxHash works as follows:
// 0. Initialize immediately. We can't do this in a struct (no
// default ctor).
// 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators.
// 2. Accumulate remaining blocks of length 4 (1 uint) into the
// hash.
// 3. Accumulate remaining blocks of length 1 into the hash.

// There is no need for #3 as this type only accepts ints. _queue1,
// _queue2 and _queue3 are basically a buffer so that when
// ToHashCode is called we can execute #2 correctly.

// Storing the value of _length locally shaves of quite a few bytes
// in the resulting machine code.
uint32_t previousLength = _length++;
uint32_t position = previousLength % 4;

// Switch can't be inlined.

if (position == 0)
_queue1 = val;
else if (position == 1)
_queue2 = val;
else if (position == 2)
_queue3 = val;
else // position == 3
{
_v1 = XXHash32_Round(_v1, _queue1);
_v2 = XXHash32_Round(_v2, _queue2);
_v3 = XXHash32_Round(_v3, _queue3);
_v4 = XXHash32_Round(_v4, val);
}
}

void AddPointer(void* ptr)
{
#ifdef HOST_64BIT
Add((uint32_t)(UINT_PTR)ptr);
Add((uint32_t)(((UINT_PTR)ptr) >> 32));
#else
Add((uint32_t)(UINT_PTR)ptr);
#endif
}

uint32_t ToHashCode()
{
// Storing the value of _length locally shaves of quite a few bytes
// in the resulting machine code.
uint32_t length = _length;

// position refers to the *next* queue position in this method, so
// position == 1 means that _queue1 is populated; _queue2 would have
// been populated on the next call to Add.
uint32_t position = length % 4;

// If the length is less than 4, _v1 to _v4 don't contain anything
// yet. xxHash32 treats this differently.

uint32_t hash = length < 4 ? XXHash32_MixEmptyState() : XXHash32_MixState(_v1, _v2, _v3, _v4);

// _length is incremented once per Add(Int32) and is therefore 4
// times too small (xxHash length is in bytes, not ints).

hash += length * 4;

// Mix what remains in the queue

// Switch can't be inlined right now, so use as few branches as
// possible by manually excluding impossible scenarios (position > 1
// is always false if position is not > 0).
if (position > 0)
{
hash = XXHash32_QueueRound(hash, _queue1);
if (position > 1)
{
hash = XXHash32_QueueRound(hash, _queue2);
if (position > 2)
hash = XXHash32_QueueRound(hash, _queue3);
}
}

hash = XXHash32_MixFinal(hash);
return (int)hash;
}
};
45 changes: 0 additions & 45 deletions src/coreclr/inc/shash.h
Original file line number Diff line number Diff line change
Expand Up @@ -729,51 +729,6 @@ struct CaseSensitiveStringCompareHash
}
};

// Provides case-insensitive comparison and hashing functionality through static
// and functor object methods. Can be instantiated with CHAR or WCHAR.
template <typename CharT>
struct CaseInsensitiveStringCompareHash
{
private:
typedef CharT const * str_t;

static size_t _strcmp(str_t left, str_t right)
{
return ::SString::_tstricmp(left, right);
}

static size_t _hash(CHAR const *str)
{
return HashiStringA(str);
}

static size_t _hash(WCHAR const *str)
{
return HashiString(str);
}

public:
static size_t compare(str_t left, str_t right)
{
return _strcmp(left, right);
}

size_t operator()(str_t left, str_t right)
{
return compare(left, right);
}

static size_t hash(str_t str)
{
return _hash(str);
}

size_t operator()(str_t str)
{
return hash(str);
}
};

// StringSHashTraits is a traits class useful for string-keyed
// pointer hash tables.

Expand Down
71 changes: 20 additions & 51 deletions src/coreclr/inc/utilcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ using std::nothrow;
#include "volatile.h"
#include <daccess.h>
#include "clrhost.h"
#include "dn_xxhash.h"
#include "debugmacros.h"
#include "corhlprpriv.h"
#include "check.h"
Expand Down Expand Up @@ -1955,15 +1956,29 @@ inline COUNT_T HashPtr(COUNT_T currentHash, PTR_VOID ptr)
inline ULONG HashBytes(BYTE const *pbData, size_t iSize)
{
LIMITED_METHOD_CONTRACT;
ULONG hash = 5381;

BYTE const *pbDataEnd = pbData + iSize;
ULONG hash = XXHash32_MixEmptyState();
hash += (ULONG)iSize;

for (/**/ ; pbData < pbDataEnd; pbData++)
// Process 4 bytes at a time.
while (iSize >= sizeof(uint32_t))
{
hash = ((hash << 5) + hash) ^ *pbData;
uint32_t val;
memcpy(&val, pbData, sizeof(val));
hash = XXHash32_QueueRound(hash, val);
pbData += sizeof(val);
iSize -= sizeof(val);
}
return hash;

// Process remaining bytes.
if (iSize > 0)
{
uint32_t val = 0;
memcpy(&val, pbData, iSize);
hash = XXHash32_QueueRound(hash, val);
}

return XXHash32_MixFinal(hash);
}

// Helper function for hashing a string char by char.
Expand Down Expand Up @@ -1995,39 +2010,6 @@ inline ULONG HashString(LPCWSTR szStr)
return hash;
}

inline ULONG HashStringN(LPCWSTR szStr, SIZE_T cchStr)
{
LIMITED_METHOD_CONTRACT;
ULONG hash = 5381;

// hash the string two characters at a time
ULONG *ptr = (ULONG *)szStr;

// we assume that szStr is null-terminated
_ASSERTE(cchStr <= u16_strlen(szStr));
SIZE_T cDwordCount = (cchStr + 1) / 2;

for (SIZE_T i = 0; i < cDwordCount; i++)
{
hash = ((hash << 5) + hash) ^ ptr[i];
}

return hash;
}

// Case-insensitive string hash function.
inline ULONG HashiStringA(LPCSTR szStr)
{
LIMITED_METHOD_CONTRACT;
ULONG hash = 5381;
while (*szStr != 0)
{
hash = ((hash << 5) + hash) ^ toupper(*szStr);
szStr++;
}
return hash;
}

// Case-insensitive string hash function.
inline ULONG HashiString(LPCWSTR szStr)
{
Expand All @@ -2041,19 +2023,6 @@ inline ULONG HashiString(LPCWSTR szStr)
return hash;
}

// Case-insensitive string hash function.
inline ULONG HashiStringN(LPCWSTR szStr, DWORD count)
{
LIMITED_METHOD_CONTRACT;
ULONG hash = 5381;
while (*szStr != 0 && count--)
{
hash = ((hash << 5) + hash) ^ towupper(*szStr);
szStr++;
}
return hash;
}

// Case-insensitive string hash function when all of the
// characters in the string are known to be below 0x80.
// Knowing this is much more efficient than calling
Expand Down
Loading