lib/Support/StringMap.cpp

   1 //===--- StringMap.cpp - String Hash table map implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Chris Lattner and is distributed under
   6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the StringMap class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/StringMap.h"
  15 #include <cassert>
  16 using namespace llvm;
  17
  18 StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
  19   assert((InitSize & (InitSize-1)) == 0 &&
  20          "Init Size must be a power of 2 or zero!");
  21   NumBuckets = InitSize ? InitSize : 512;
  22   ItemSize = itemSize;
  23   NumItems = 0;
  24   NumTombstones = 0;
  25
  26   TheTable = new ItemBucket[NumBuckets+1]();
  27   memset(TheTable, 0, NumBuckets*sizeof(ItemBucket));
  28
  29   // Allocate one extra bucket, set it to look filled so the iterators stop at
  30   // end.
  31   TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
  32 }
  33
  34
  35 /// HashString - Compute a hash code for the specified string.
  36 ///
  37 static unsigned HashString(const char *Start, const char *End) {
  38   // Bernstein hash function.
  39   unsigned int Result = 0;
  40   // TODO: investigate whether a modified bernstein hash function performs
  41   // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
  42   //   X*33+c -> X*33^c
  43   while (Start != End)
  44     Result = Result * 33 + *Start++;
  45   Result = Result + (Result >> 5);
  46   return Result;
  47 }
  48
  49 /// LookupBucketFor - Look up the bucket that the specified string should end
  50 /// up in.  If it already exists as a key in the map, the Item pointer for the
  51 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
  52 /// case, the FullHashValue field of the bucket will be set to the hash value
  53 /// of the string.
  54 unsigned StringMapImpl::LookupBucketFor(const char *NameStart,
  55                                          const char *NameEnd) {
  56   unsigned HTSize = NumBuckets;
  57   unsigned FullHashValue = HashString(NameStart, NameEnd);
  58   unsigned BucketNo = FullHashValue & (HTSize-1);
  59
  60   unsigned ProbeAmt = 1;
  61   int FirstTombstone = -1;
  62   while (1) {
  63     ItemBucket &Bucket = TheTable[BucketNo];
  64     StringMapEntryBase *BucketItem = Bucket.Item;
  65     // If we found an empty bucket, this key isn't in the table yet, return it.
  66     if (BucketItem == 0) {
  67       // If we found a tombstone, we want to reuse the tombstone instead of an
  68       // empty bucket.  This reduces probing.
  69       if (FirstTombstone != -1) {
  70         TheTable[FirstTombstone].FullHashValue = FullHashValue;
  71         return FirstTombstone;
  72       }
  73
  74       Bucket.FullHashValue = FullHashValue;
  75       return BucketNo;
  76     }
  77
  78     if (BucketItem == getTombstoneVal()) {
  79       // Skip over tombstones.  However, remember the first one we see.
  80       if (FirstTombstone == -1) FirstTombstone = BucketNo;
  81     } else if (Bucket.FullHashValue == FullHashValue) {
  82       // If the full hash value matches, check deeply for a match.  The common
  83       // case here is that we are only looking at the buckets (for item info
  84       // being non-null and for the full hash value) not at the items.  This
  85       // is important for cache locality.
  86
  87       // Do the comparison like this because NameStart isn't necessarily
  88       // null-terminated!
  89       char *ItemStr = (char*)BucketItem+ItemSize;
  90       unsigned ItemStrLen = BucketItem->getKeyLength();
  91       if (unsigned(NameEnd-NameStart) == ItemStrLen &&
  92           memcmp(ItemStr, NameStart, ItemStrLen) == 0) {
  93         // We found a match!
  94         return BucketNo;
  95       }
  96     }
  97
  98     // Okay, we didn't find the item.  Probe to the next bucket.
  99     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
 100
 101     // Use quadratic probing, it has fewer clumping artifacts than linear
 102     // probing and has good cache behavior in the common case.
 103     ++ProbeAmt;
 104   }
 105 }
 106
 107
 108 /// FindKey - Look up the bucket that contains the specified key. If it exists
 109 /// in the map, return the bucket number of the key.  Otherwise return -1.
 110 /// This does not modify the map.
 111 int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const {
 112   unsigned HTSize = NumBuckets;
 113   unsigned FullHashValue = HashString(KeyStart, KeyEnd);
 114   unsigned BucketNo = FullHashValue & (HTSize-1);
 115
 116   unsigned ProbeAmt = 1;
 117   while (1) {
 118     ItemBucket &Bucket = TheTable[BucketNo];
 119     StringMapEntryBase *BucketItem = Bucket.Item;
 120     // If we found an empty bucket, this key isn't in the table yet, return.
 121     if (BucketItem == 0)
 122       return -1;
 123
 124     if (BucketItem == getTombstoneVal()) {
 125       // Ignore tombstones.
 126     } else if (Bucket.FullHashValue == FullHashValue) {
 127       // If the full hash value matches, check deeply for a match.  The common
 128       // case here is that we are only looking at the buckets (for item info
 129       // being non-null and for the full hash value) not at the items.  This
 130       // is important for cache locality.
 131
 132       // Do the comparison like this because NameStart isn't necessarily
 133       // null-terminated!
 134       char *ItemStr = (char*)BucketItem+ItemSize;
 135       unsigned ItemStrLen = BucketItem->getKeyLength();
 136       if (unsigned(KeyEnd-KeyStart) == ItemStrLen &&
 137           memcmp(ItemStr, KeyStart, ItemStrLen) == 0) {
 138         // We found a match!
 139         return BucketNo;
 140       }
 141     }
 142
 143     // Okay, we didn't find the item.  Probe to the next bucket.
 144     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
 145
 146     // Use quadratic probing, it has fewer clumping artifacts than linear
 147     // probing and has good cache behavior in the common case.
 148     ++ProbeAmt;
 149   }
 150 }
 151
 152 /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
 153 /// delete it.  This aborts if the value isn't in the table.
 154 void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
 155   const char *VStr = (char*)V + ItemSize;
 156   StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength());
 157   V2 = V2;
 158   assert(V == V2 && "Didn't find key?");
 159 }
 160
 161 /// RemoveKey - Remove the StringMapEntry for the specified key from the
 162 /// table, returning it.  If the key is not in the table, this returns null.
 163 StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart,
 164                                              const char *KeyEnd) {
 165   int Bucket = FindKey(KeyStart, KeyEnd);
 166   if (Bucket == -1) return 0;
 167
 168   StringMapEntryBase *Result = TheTable[Bucket].Item;
 169   TheTable[Bucket].Item = getTombstoneVal();
 170   --NumItems;
 171   ++NumTombstones;
 172   return Result;
 173 }
 174
 175
 176
 177 /// RehashTable - Grow the table, redistributing values into the buckets with
 178 /// the appropriate mod-of-hashtable-size.
 179 void StringMapImpl::RehashTable() {
 180   unsigned NewSize = NumBuckets*2;
 181   // Allocate one extra bucket which will always be non-empty.  This allows the
 182   // iterators to stop at end.
 183   ItemBucket *NewTableArray = new ItemBucket[NewSize+1]();
 184   memset(NewTableArray, 0, NewSize*sizeof(ItemBucket));
 185   NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
 186
 187   // Rehash all the items into their new buckets.  Luckily :) we already have
 188   // the hash values available, so we don't have to rehash any strings.
 189   for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
 190     if (IB->Item && IB->Item != getTombstoneVal()) {
 191       // Fast case, bucket available.
 192       unsigned FullHash = IB->FullHashValue;
 193       unsigned NewBucket = FullHash & (NewSize-1);
 194       if (NewTableArray[NewBucket].Item == 0) {
 195         NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
 196         NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
 197         continue;
 198       }
 199
 200       // Otherwise probe for a spot.
 201       unsigned ProbeSize = 1;
 202       do {
 203         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
 204       } while (NewTableArray[NewBucket].Item);
 205
 206       // Finally found a slot.  Fill it in.
 207       NewTableArray[NewBucket].Item = IB->Item;
 208       NewTableArray[NewBucket].FullHashValue = FullHash;
 209     }
 210   }
 211
 212   delete[] TheTable;
 213
 214   TheTable = NewTableArray;
 215   NumBuckets = NewSize;
 216 }