From 23d7b3611759c7fb3a853dfce3ee3d43ef5ca67d Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 29 Oct 2006 23:42:03 +0000 Subject: [PATCH] add a highly efficient hash table that is specialized for mapping C strings to some other type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@31286 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/CStringMap.h | 144 ++++++++++++++++++++++++++++++++++ include/llvm/ADT/StringMap.h | 144 ++++++++++++++++++++++++++++++++++ lib/Support/CStringMap.cpp | 134 +++++++++++++++++++++++++++++++ lib/Support/StringMap.cpp | 134 +++++++++++++++++++++++++++++++ 4 files changed, 556 insertions(+) create mode 100644 include/llvm/ADT/CStringMap.h create mode 100644 include/llvm/ADT/StringMap.h create mode 100644 lib/Support/CStringMap.cpp create mode 100644 lib/Support/StringMap.cpp diff --git a/include/llvm/ADT/CStringMap.h b/include/llvm/ADT/CStringMap.h new file mode 100644 index 00000000000..e3886876a1f --- /dev/null +++ b/include/llvm/ADT/CStringMap.h @@ -0,0 +1,144 @@ +//===--- CStringMap.h - CString Hash table map interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CStringMap class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_CSTRINGMAP_H +#define LLVM_ADT_CSTRINGMAP_H + +#include "llvm/Support/Allocator.h" +#include + +namespace llvm { + +/// CStringMapVisitor - Subclasses of this class may be implemented to walk all +/// of the items in a CStringMap. +class CStringMapVisitor { +public: + virtual ~CStringMapVisitor(); + virtual void Visit(const char *Key, void *Value) const = 0; +}; + +/// CStringMapImpl - This is the base class of CStringMap that is shared among +/// all of its instantiations. +class CStringMapImpl { +protected: + /// ItemBucket - The hash table consists of an array of these. If Item is + /// non-null, this is an extant entry, otherwise, it is a hole. + struct ItemBucket { + /// FullHashValue - This remembers the full hash value of the key for + /// easy scanning. + unsigned FullHashValue; + + /// Item - This is a pointer to the actual item object. + void *Item; + }; + + ItemBucket *TheTable; + unsigned NumBuckets; + unsigned NumItems; + unsigned ItemSize; +protected: + CStringMapImpl(unsigned InitSize, unsigned ItemSize); + void RehashTable(); + + /// LookupBucketFor - Look up the bucket that the specified string should end + /// up in. If it already exists as a key in the map, the Item pointer for the + /// specified bucket will be non-null. Otherwise, it will be null. In either + /// case, the FullHashValue field of the bucket will be set to the hash value + /// of the string. + unsigned LookupBucketFor(const char *KeyStart, const char *KeyEnd); + +public: + unsigned getNumBuckets() const { return NumBuckets; } + unsigned getNumItems() const { return NumItems; } + + void VisitEntries(const CStringMapVisitor &Visitor) const; +}; + + +/// CStringMap - This is an unconventional map that is specialized for handling +/// keys that are "C strings", that is, null-terminated strings. This does some +/// funky memory allocation and hashing things to make it extremely efficient, +/// storing the string data *after* the value in the map. +template +class CStringMap : public CStringMapImpl { + AllocatorTy Allocator; +public: + CStringMap(unsigned InitialSize = 0) + : CStringMapImpl(InitialSize, sizeof(ValueTy)) {} + + AllocatorTy &getAllocator() { return Allocator; } + const AllocatorTy &getAllocator() const { return Allocator; } + + /// FindValue - Look up the specified key in the map. If it exists, return a + /// pointer to the element, otherwise return null. + ValueTy *FindValue(const char *KeyStart, const char *KeyEnd) { + unsigned BucketNo = LookupBucketFor(KeyStart, KeyEnd); + return static_cast(TheTable[BucketNo].Item); + } + + /// GetOrCreateValue - Look up the specified key in the table. If a value + /// exists, return it. Otherwise, default construct a value, insert it, and + /// return. + ValueTy &GetOrCreateValue(const char *KeyStart, const char *KeyEnd) { + unsigned BucketNo = LookupBucketFor(KeyStart, KeyEnd); + ItemBucket &Bucket = TheTable[BucketNo]; + if (Bucket.Item) + return *static_cast(Bucket.Item); + + unsigned KeyLength = KeyEnd-KeyStart; + + // Okay, the item doesn't already exist, and Bucket is the bucket to fill + // in. Allocate a new item with space for the null-terminated string at the + // end. + unsigned AllocSize = sizeof(ValueTy)+KeyLength+1; + +#ifdef __GNUC__ + unsigned Alignment = __alignof__(ValueTy); +#else + // FIXME: ugly. + unsigned Alignment = 8; +#endif + ValueTy *NewItem = (ValueTy*)Allocator.Allocate(AllocSize, Alignment); + new (NewItem) ValueTy(); + ++NumItems; + + // Copy the string information. + char *StrBuffer = (char*)(NewItem+1); + memcpy(StrBuffer, KeyStart, KeyLength); + StrBuffer[KeyLength] = 0; // Null terminate string. + + // Fill in the bucket for the hash table. The FullHashValue was already + // filled in by LookupBucketFor. + Bucket.Item = NewItem; + + // If the hash table is now more than 3/4 full, rehash into a larger table. + if (NumItems > NumBuckets*3/4) + RehashTable(); + return *NewItem; + } + + ~CStringMap() { + for (ItemBucket *I = TheTable, *E = TheTable+NumBuckets; I != E; ++I) { + if (ValueTy *Id = static_cast(I->Item)) { + // Free memory referenced by the item. + Id->~ValueTy(); + Allocator.Deallocate(Id); + } + } + delete [] TheTable; + } +}; + +} + +#endif \ No newline at end of file diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h new file mode 100644 index 00000000000..e3886876a1f --- /dev/null +++ b/include/llvm/ADT/StringMap.h @@ -0,0 +1,144 @@ +//===--- CStringMap.h - CString Hash table map interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CStringMap class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_CSTRINGMAP_H +#define LLVM_ADT_CSTRINGMAP_H + +#include "llvm/Support/Allocator.h" +#include + +namespace llvm { + +/// CStringMapVisitor - Subclasses of this class may be implemented to walk all +/// of the items in a CStringMap. +class CStringMapVisitor { +public: + virtual ~CStringMapVisitor(); + virtual void Visit(const char *Key, void *Value) const = 0; +}; + +/// CStringMapImpl - This is the base class of CStringMap that is shared among +/// all of its instantiations. +class CStringMapImpl { +protected: + /// ItemBucket - The hash table consists of an array of these. If Item is + /// non-null, this is an extant entry, otherwise, it is a hole. + struct ItemBucket { + /// FullHashValue - This remembers the full hash value of the key for + /// easy scanning. + unsigned FullHashValue; + + /// Item - This is a pointer to the actual item object. + void *Item; + }; + + ItemBucket *TheTable; + unsigned NumBuckets; + unsigned NumItems; + unsigned ItemSize; +protected: + CStringMapImpl(unsigned InitSize, unsigned ItemSize); + void RehashTable(); + + /// LookupBucketFor - Look up the bucket that the specified string should end + /// up in. If it already exists as a key in the map, the Item pointer for the + /// specified bucket will be non-null. Otherwise, it will be null. In either + /// case, the FullHashValue field of the bucket will be set to the hash value + /// of the string. + unsigned LookupBucketFor(const char *KeyStart, const char *KeyEnd); + +public: + unsigned getNumBuckets() const { return NumBuckets; } + unsigned getNumItems() const { return NumItems; } + + void VisitEntries(const CStringMapVisitor &Visitor) const; +}; + + +/// CStringMap - This is an unconventional map that is specialized for handling +/// keys that are "C strings", that is, null-terminated strings. This does some +/// funky memory allocation and hashing things to make it extremely efficient, +/// storing the string data *after* the value in the map. +template +class CStringMap : public CStringMapImpl { + AllocatorTy Allocator; +public: + CStringMap(unsigned InitialSize = 0) + : CStringMapImpl(InitialSize, sizeof(ValueTy)) {} + + AllocatorTy &getAllocator() { return Allocator; } + const AllocatorTy &getAllocator() const { return Allocator; } + + /// FindValue - Look up the specified key in the map. If it exists, return a + /// pointer to the element, otherwise return null. + ValueTy *FindValue(const char *KeyStart, const char *KeyEnd) { + unsigned BucketNo = LookupBucketFor(KeyStart, KeyEnd); + return static_cast(TheTable[BucketNo].Item); + } + + /// GetOrCreateValue - Look up the specified key in the table. If a value + /// exists, return it. Otherwise, default construct a value, insert it, and + /// return. + ValueTy &GetOrCreateValue(const char *KeyStart, const char *KeyEnd) { + unsigned BucketNo = LookupBucketFor(KeyStart, KeyEnd); + ItemBucket &Bucket = TheTable[BucketNo]; + if (Bucket.Item) + return *static_cast(Bucket.Item); + + unsigned KeyLength = KeyEnd-KeyStart; + + // Okay, the item doesn't already exist, and Bucket is the bucket to fill + // in. Allocate a new item with space for the null-terminated string at the + // end. + unsigned AllocSize = sizeof(ValueTy)+KeyLength+1; + +#ifdef __GNUC__ + unsigned Alignment = __alignof__(ValueTy); +#else + // FIXME: ugly. + unsigned Alignment = 8; +#endif + ValueTy *NewItem = (ValueTy*)Allocator.Allocate(AllocSize, Alignment); + new (NewItem) ValueTy(); + ++NumItems; + + // Copy the string information. + char *StrBuffer = (char*)(NewItem+1); + memcpy(StrBuffer, KeyStart, KeyLength); + StrBuffer[KeyLength] = 0; // Null terminate string. + + // Fill in the bucket for the hash table. The FullHashValue was already + // filled in by LookupBucketFor. + Bucket.Item = NewItem; + + // If the hash table is now more than 3/4 full, rehash into a larger table. + if (NumItems > NumBuckets*3/4) + RehashTable(); + return *NewItem; + } + + ~CStringMap() { + for (ItemBucket *I = TheTable, *E = TheTable+NumBuckets; I != E; ++I) { + if (ValueTy *Id = static_cast(I->Item)) { + // Free memory referenced by the item. + Id->~ValueTy(); + Allocator.Deallocate(Id); + } + } + delete [] TheTable; + } +}; + +} + +#endif \ No newline at end of file diff --git a/lib/Support/CStringMap.cpp b/lib/Support/CStringMap.cpp new file mode 100644 index 00000000000..3229bfe50d8 --- /dev/null +++ b/lib/Support/CStringMap.cpp @@ -0,0 +1,134 @@ +//===--- CStringMap.cpp - CString Hash table map implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CStringMap class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/CStringMap.h" +#include +using namespace llvm; + +CStringMapVisitor::~CStringMapVisitor() { +} + +CStringMapImpl::CStringMapImpl(unsigned InitSize, unsigned itemSize) { + assert((InitSize & (InitSize-1)) == 0 && + "Init Size must be a power of 2 or zero!"); + NumBuckets = InitSize ? InitSize : 512; + ItemSize = itemSize; + NumItems = 0; + + TheTable = new ItemBucket[NumBuckets](); + memset(TheTable, 0, NumBuckets*sizeof(ItemBucket)); +} + + +/// HashString - Compute a hash code for the specified string. +/// +static unsigned HashString(const char *Start, const char *End) { + unsigned int Result = 0; + // Perl hash function. + while (Start != End) + Result = Result * 33 + *Start++; + Result = Result + (Result >> 5); + return Result; +} + +/// LookupBucketFor - Look up the bucket that the specified string should end +/// up in. If it already exists as a key in the map, the Item pointer for the +/// specified bucket will be non-null. Otherwise, it will be null. In either +/// case, the FullHashValue field of the bucket will be set to the hash value +/// of the string. +unsigned CStringMapImpl::LookupBucketFor(const char *NameStart, + const char *NameEnd) { + unsigned HTSize = NumBuckets; + unsigned FullHashValue = HashString(NameStart, NameEnd); + unsigned BucketNo = FullHashValue & (HTSize-1); + + unsigned ProbeAmt = 1; + while (1) { + ItemBucket &Bucket = TheTable[BucketNo]; + void *BucketItem = Bucket.Item; + // If we found an empty bucket, this key isn't in the table yet, return it. + if (BucketItem == 0) { + Bucket.FullHashValue = FullHashValue; + return BucketNo; + } + + // If the full hash value matches, check deeply for a match. The common + // case here is that we are only looking at the buckets (for item info + // being non-null and for the full hash value) not at the items. This + // is important for cache locality. + if (Bucket.FullHashValue == FullHashValue) { + // Do the comparison like this because NameStart isn't necessarily + // null-terminated! + char *ItemStr = (char*)BucketItem+ItemSize; + if (strlen(ItemStr) == unsigned(NameEnd-NameStart) && + memcmp(ItemStr, NameStart, (NameEnd-NameStart)) == 0) { + // We found a match! + return BucketNo; + } + } + + // Okay, we didn't find the item. Probe to the next bucket. + BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); + + // Use quadratic probing, it has fewer clumping artifacts than linear + // probing and has good cache behavior in the common case. + ++ProbeAmt; + } +} + +/// RehashTable - Grow the table, redistributing values into the buckets with +/// the appropriate mod-of-hashtable-size. +void CStringMapImpl::RehashTable() { + unsigned NewSize = NumBuckets*2; + ItemBucket *NewTableArray = new ItemBucket[NewSize](); + memset(NewTableArray, 0, NewSize*sizeof(ItemBucket)); + + // Rehash all the items into their new buckets. Luckily :) we already have + // the hash values available, so we don't have to rehash any strings. + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (IB->Item) { + // Fast case, bucket available. + unsigned FullHash = IB->FullHashValue; + unsigned NewBucket = FullHash & (NewSize-1); + if (NewTableArray[NewBucket].Item == 0) { + NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; + NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; + continue; + } + + unsigned ProbeSize = 1; + do { + NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); + } while (NewTableArray[NewBucket].Item); + + // Finally found a slot. Fill it in. + NewTableArray[NewBucket].Item = IB->Item; + NewTableArray[NewBucket].FullHashValue = FullHash; + } + } + + delete[] TheTable; + + TheTable = NewTableArray; + NumBuckets = NewSize; +} + + +/// VisitEntries - This method walks through all of the items, +/// invoking Visitor.Visit for each of them. +void CStringMapImpl::VisitEntries(const CStringMapVisitor &Visitor) const { + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (void *Id = IB->Item) + Visitor.Visit((char*)Id + ItemSize, Id); + } +} diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp new file mode 100644 index 00000000000..3229bfe50d8 --- /dev/null +++ b/lib/Support/StringMap.cpp @@ -0,0 +1,134 @@ +//===--- CStringMap.cpp - CString Hash table map implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CStringMap class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/CStringMap.h" +#include +using namespace llvm; + +CStringMapVisitor::~CStringMapVisitor() { +} + +CStringMapImpl::CStringMapImpl(unsigned InitSize, unsigned itemSize) { + assert((InitSize & (InitSize-1)) == 0 && + "Init Size must be a power of 2 or zero!"); + NumBuckets = InitSize ? InitSize : 512; + ItemSize = itemSize; + NumItems = 0; + + TheTable = new ItemBucket[NumBuckets](); + memset(TheTable, 0, NumBuckets*sizeof(ItemBucket)); +} + + +/// HashString - Compute a hash code for the specified string. +/// +static unsigned HashString(const char *Start, const char *End) { + unsigned int Result = 0; + // Perl hash function. + while (Start != End) + Result = Result * 33 + *Start++; + Result = Result + (Result >> 5); + return Result; +} + +/// LookupBucketFor - Look up the bucket that the specified string should end +/// up in. If it already exists as a key in the map, the Item pointer for the +/// specified bucket will be non-null. Otherwise, it will be null. In either +/// case, the FullHashValue field of the bucket will be set to the hash value +/// of the string. +unsigned CStringMapImpl::LookupBucketFor(const char *NameStart, + const char *NameEnd) { + unsigned HTSize = NumBuckets; + unsigned FullHashValue = HashString(NameStart, NameEnd); + unsigned BucketNo = FullHashValue & (HTSize-1); + + unsigned ProbeAmt = 1; + while (1) { + ItemBucket &Bucket = TheTable[BucketNo]; + void *BucketItem = Bucket.Item; + // If we found an empty bucket, this key isn't in the table yet, return it. + if (BucketItem == 0) { + Bucket.FullHashValue = FullHashValue; + return BucketNo; + } + + // If the full hash value matches, check deeply for a match. The common + // case here is that we are only looking at the buckets (for item info + // being non-null and for the full hash value) not at the items. This + // is important for cache locality. + if (Bucket.FullHashValue == FullHashValue) { + // Do the comparison like this because NameStart isn't necessarily + // null-terminated! + char *ItemStr = (char*)BucketItem+ItemSize; + if (strlen(ItemStr) == unsigned(NameEnd-NameStart) && + memcmp(ItemStr, NameStart, (NameEnd-NameStart)) == 0) { + // We found a match! + return BucketNo; + } + } + + // Okay, we didn't find the item. Probe to the next bucket. + BucketNo = (BucketNo+ProbeAmt) & (HTSize-1); + + // Use quadratic probing, it has fewer clumping artifacts than linear + // probing and has good cache behavior in the common case. + ++ProbeAmt; + } +} + +/// RehashTable - Grow the table, redistributing values into the buckets with +/// the appropriate mod-of-hashtable-size. +void CStringMapImpl::RehashTable() { + unsigned NewSize = NumBuckets*2; + ItemBucket *NewTableArray = new ItemBucket[NewSize](); + memset(NewTableArray, 0, NewSize*sizeof(ItemBucket)); + + // Rehash all the items into their new buckets. Luckily :) we already have + // the hash values available, so we don't have to rehash any strings. + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (IB->Item) { + // Fast case, bucket available. + unsigned FullHash = IB->FullHashValue; + unsigned NewBucket = FullHash & (NewSize-1); + if (NewTableArray[NewBucket].Item == 0) { + NewTableArray[FullHash & (NewSize-1)].Item = IB->Item; + NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash; + continue; + } + + unsigned ProbeSize = 1; + do { + NewBucket = (NewBucket + ProbeSize++) & (NewSize-1); + } while (NewTableArray[NewBucket].Item); + + // Finally found a slot. Fill it in. + NewTableArray[NewBucket].Item = IB->Item; + NewTableArray[NewBucket].FullHashValue = FullHash; + } + } + + delete[] TheTable; + + TheTable = NewTableArray; + NumBuckets = NewSize; +} + + +/// VisitEntries - This method walks through all of the items, +/// invoking Visitor.Visit for each of them. +void CStringMapImpl::VisitEntries(const CStringMapVisitor &Visitor) const { + for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) { + if (void *Id = IB->Item) + Visitor.Visit((char*)Id + ItemSize, Id); + } +} -- 2.34.1