Skip to content

Commit

Permalink
Hashing.h - utilities for hashing various data types.
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@150890 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
viridia committed Feb 18, 2012
1 parent b155c23 commit 1a4b19e
Show file tree
Hide file tree
Showing 5 changed files with 285 additions and 0 deletions.
180 changes: 180 additions & 0 deletions include/llvm/ADT/Hashing.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines utilities for computing hash values for various data types.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_ADT_HASHING_H
#define LLVM_ADT_HASHING_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"

namespace llvm {

/// Class to compute a hash value from multiple data fields of arbitrary
/// types. Note that if you are hashing a single data type, such as a
/// string, it may be cheaper to use a hash algorithm that is tailored
/// for that specific data type.
/// Typical Usage:
/// GeneralHash Hash;
/// Hash.add(someValue);
/// Hash.add(someOtherValue);
/// return Hash.finish();
/// Adapted from MurmurHash2 by Austin Appleby
class GeneralHash {
private:
enum {
M = 0x5bd1e995
};
unsigned Hash;
unsigned Count;
public:
GeneralHash(unsigned Seed = 0) : Hash(Seed), Count(0) {}

/// Add a pointer value.
/// Note: this adds pointers to the hash using sizes and endianness that
/// depend on the host. It doesn't matter however, because hashing on
/// pointer values is inherently unstable.
template<typename T>
GeneralHash& add(const T *PtrVal) {
addBits(&PtrVal, &PtrVal + 1);
return *this;
}

/// Add an ArrayRef of arbitrary data.
template<typename T>
GeneralHash& add(ArrayRef<T> ArrayVal) {
addBits(ArrayVal.begin(), ArrayVal.end());
return *this;
}

/// Add a string
GeneralHash& add(StringRef StrVal) {
addBits(StrVal.begin(), StrVal.end());
return *this;
}

/// Add an signed 32-bit integer.
GeneralHash& add(int32_t Data) {
addInt(uint32_t(Data));
return *this;
}

/// Add an unsigned 32-bit integer.
GeneralHash& add(uint32_t Data) {
addInt(Data);
return *this;
}

/// Add an signed 64-bit integer.
GeneralHash& add(int64_t Data) {
addInt(uint64_t(Data));
return *this;
}

/// Add an unsigned 64-bit integer.
GeneralHash& add(uint64_t Data) {
addInt(Data);
return *this;
}

/// Add a float
GeneralHash& add(float Data) {
union {
float D; uint32_t I;
};
D = Data;
addInt(I);
return *this;
}

/// Add a double
GeneralHash& add(double Data) {
union {
double D; uint64_t I;
};
D = Data;
addInt(I);
return *this;
}

// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
unsigned finish() {
mix(Count);
Hash ^= Hash >> 13;
Hash *= M;
Hash ^= Hash >> 15;
return Hash;
}

private:
void mix(uint32_t Data) {
++Count;
Data *= M;
Data ^= Data >> 24;
Data *= M;
Hash *= M;
Hash ^= Data;
}

// Add a single uint32 value
void addInt(uint32_t Val) {
mix(Val);
}

// Add a uint64 value
void addInt(uint64_t Val) {
mix(uint32_t(Val >> 32));
mix(uint32_t(Val));
}

template<typename T, bool isAligned>
struct addBitsImpl {
static void add(GeneralHash &Hash, const T *I, const T *E) {
Hash.addUnaligned(
reinterpret_cast<const uint8_t *>(I),
reinterpret_cast<const uint8_t *>(E));
}
};

template<typename T>
struct addBitsImpl<T, true> {
static void add(GeneralHash &Hash, const T *I, const T *E) {
Hash.addAligned(
reinterpret_cast<const uint32_t *>(I),
reinterpret_cast<const uint32_t *>(E));
}
};

// Add a range of bits from I to E.
template<typename T>
void addBits(const T *I, const T *E) {
addBitsImpl<T, AlignOf<T>::Alignment_GreaterEqual_4Bytes>::add(*this, I, E);
}

// Add a range of uint32s
void addAligned(const uint32_t *I, const uint32_t *E) {
while (I < E) {
mix(*I++);
}
}

// Add a possibly unaligned sequence of bytes.
void addUnaligned(const uint8_t *I, const uint8_t *E);
};

} // end namespace llvm

#endif
1 change: 1 addition & 0 deletions lib/Support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ add_llvm_library(LLVMSupport
FoldingSet.cpp
FormattedStream.cpp
GraphWriter.cpp
Hashing.cpp
IntEqClasses.cpp
IntervalMap.cpp
IntrusiveRefCntPtr.cpp
Expand Down
46 changes: 46 additions & 0 deletions lib/Support/Hashing.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//===-- llvm/ADT/Hashing.cpp - Utilities for hashing ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/Hashing.h"

namespace llvm {

// Add a possibly unaligned sequence of bytes.
void GeneralHash::addUnaligned(const uint8_t *I, const uint8_t *E) {
ptrdiff_t Length = E - I;
if (uintptr_t(I) & 3 == 0) {
while (Length > 3) {
mix(*reinterpret_cast<const uint32_t *>(I));
I += 4;
Length -= 4;
}
} else {
while (Length > 3) {
mix(
uint32_t(I[0]) +
(uint32_t(I[1]) << 8) +
(uint32_t(I[2]) << 16) +
(uint32_t(I[3]) << 24));
I += 4;
Length -= 4;
}
}

if (Length & 3) {
uint32_t Data = 0;
switch (Length & 3) {
case 3: Data |= uint32_t(I[2]) << 16; // fall through
case 2: Data |= uint32_t(I[1]) << 8; // fall through
case 1: Data |= uint32_t(I[0]); break;
}
mix(Data);
}
}

}
57 changes: 57 additions & 0 deletions unittests/ADT/HashingTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//===- llvm/unittest/ADT/HashingTest.cpp ----------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Hashing.h unit tests.
//
//===----------------------------------------------------------------------===//

#include "gtest/gtest.h"
#include "llvm/ADT/Hashing.h"

using namespace llvm;

namespace {

TEST(HashingTest, EmptyHashTest) {
GeneralHash Hash;
ASSERT_EQ(0u, Hash.finish());
}

TEST(HashingTest, IntegerHashTest) {
ASSERT_TRUE(GeneralHash().add(1).finish() == GeneralHash().add(1).finish());
ASSERT_TRUE(GeneralHash().add(1).finish() != GeneralHash().add(2).finish());
}

TEST(HashingTest, StringHashTest) {
ASSERT_TRUE(
GeneralHash().add("abc").finish() == GeneralHash().add("abc").finish());
ASSERT_TRUE(
GeneralHash().add("abc").finish() != GeneralHash().add("abcd").finish());
}

TEST(HashingTest, FloatHashTest) {
ASSERT_TRUE(
GeneralHash().add(1.0f).finish() == GeneralHash().add(1.0f).finish());
ASSERT_TRUE(
GeneralHash().add(1.0f).finish() != GeneralHash().add(2.0f).finish());
}

TEST(HashingTest, DoubleHashTest) {
ASSERT_TRUE(GeneralHash().add(1.).finish() == GeneralHash().add(1.).finish());
ASSERT_TRUE(GeneralHash().add(1.).finish() != GeneralHash().add(2.).finish());
}

TEST(HashingTest, IntegerArrayHashTest) {
int a[] = { 1, 2 };
int b[] = { 1, 3 };
ASSERT_TRUE(GeneralHash().add(a).finish() == GeneralHash().add(a).finish());
ASSERT_TRUE(GeneralHash().add(a).finish() != GeneralHash().add(b).finish());
}

}
1 change: 1 addition & 0 deletions unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ add_llvm_unittest(ADT
ADT/DenseMapTest.cpp
ADT/DenseSetTest.cpp
ADT/FoldingSet.cpp
ADT/HashingTest.cpp
ADT/ilistTest.cpp
ADT/ImmutableSetTest.cpp
ADT/IntEqClassesTest.cpp
Expand Down

0 comments on commit 1a4b19e

Please sign in to comment.