lib/Support/CStringMap.cpp

   1 //===--- CStringMap.cpp - CString Hash table map implementation -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file was developed by Chris Lattner and is distributed under
   6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the CStringMap class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/CStringMap.h"
  15 #include <cassert>
  16 using namespace llvm;
  17
  18 CStringMapVisitor::~CStringMapVisitor() {
  19 }
  20
  21 CStringMapImpl::CStringMapImpl(unsigned InitSize, unsigned itemSize) {
  22   assert((InitSize & (InitSize-1)) == 0 &&
  23          "Init Size must be a power of 2 or zero!");
  24   NumBuckets = InitSize ? InitSize : 512;
  25   ItemSize = itemSize;
  26   NumItems = 0;
  27
  28   TheTable = new ItemBucket[NumBuckets]();
  29   memset(TheTable, 0, NumBuckets*sizeof(ItemBucket));
  30 }
  31
  32
  33 /// HashString - Compute a hash code for the specified string.
  34 ///
  35 static unsigned HashString(const char *Start, const char *End) {
  36   // Bernstein hash function.
  37   unsigned int Result = 0;
  38   // TODO: investigate whether a modified bernstein hash function performs
  39   // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
  40   //   X*33+c -> X*33^c
  41   while (Start != End)
  42     Result = Result * 33 + *Start++;
  43   Result = Result + (Result >> 5);
  44   return Result;
  45 }
  46
  47 /// LookupBucketFor - Look up the bucket that the specified string should end
  48 /// up in.  If it already exists as a key in the map, the Item pointer for the
  49 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
  50 /// case, the FullHashValue field of the bucket will be set to the hash value
  51 /// of the string.
  52 unsigned CStringMapImpl::LookupBucketFor(const char *NameStart,
  53                                          const char *NameEnd) {
  54   unsigned HTSize = NumBuckets;
  55   unsigned FullHashValue = HashString(NameStart, NameEnd);
  56   unsigned BucketNo = FullHashValue & (HTSize-1);
  57
  58   unsigned ProbeAmt = 1;
  59   while (1) {
  60     ItemBucket &Bucket = TheTable[BucketNo];
  61     void *BucketItem = Bucket.Item;
  62     // If we found an empty bucket, this key isn't in the table yet, return it.
  63     if (BucketItem == 0) {
  64       Bucket.FullHashValue = FullHashValue;
  65       return BucketNo;
  66     }
  67
  68     // If the full hash value matches, check deeply for a match.  The common
  69     // case here is that we are only looking at the buckets (for item info
  70     // being non-null and for the full hash value) not at the items.  This
  71     // is important for cache locality.
  72     if (Bucket.FullHashValue == FullHashValue) {
  73       // Do the comparison like this because NameStart isn't necessarily
  74       // null-terminated!
  75       char *ItemStr = (char*)BucketItem+ItemSize;
  76       if (strlen(ItemStr) == unsigned(NameEnd-NameStart) &&
  77           memcmp(ItemStr, NameStart, (NameEnd-NameStart)) == 0) {
  78         // We found a match!
  79         return BucketNo;
  80       }
  81     }
  82
  83     // Okay, we didn't find the item.  Probe to the next bucket.
  84     BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
  85
  86     // Use quadratic probing, it has fewer clumping artifacts than linear
  87     // probing and has good cache behavior in the common case.
  88     ++ProbeAmt;
  89   }
  90 }
  91
  92 /// RehashTable - Grow the table, redistributing values into the buckets with
  93 /// the appropriate mod-of-hashtable-size.
  94 void CStringMapImpl::RehashTable() {
  95   unsigned NewSize = NumBuckets*2;
  96   ItemBucket *NewTableArray = new ItemBucket[NewSize]();
  97   memset(NewTableArray, 0, NewSize*sizeof(ItemBucket));
  98
  99   // Rehash all the items into their new buckets.  Luckily :) we already have
 100   // the hash values available, so we don't have to rehash any strings.
 101   for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
 102     if (IB->Item) {
 103       // Fast case, bucket available.
 104       unsigned FullHash = IB->FullHashValue;
 105       unsigned NewBucket = FullHash & (NewSize-1);
 106       if (NewTableArray[NewBucket].Item == 0) {
 107         NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
 108         NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
 109         continue;
 110       }
 111
 112       unsigned ProbeSize = 1;
 113       do {
 114         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
 115       } while (NewTableArray[NewBucket].Item);
 116
 117       // Finally found a slot.  Fill it in.
 118       NewTableArray[NewBucket].Item = IB->Item;
 119       NewTableArray[NewBucket].FullHashValue = FullHash;
 120     }
 121   }
 122
 123   delete[] TheTable;
 124
 125   TheTable = NewTableArray;
 126   NumBuckets = NewSize;
 127 }
 128
 129
 130 /// VisitEntries - This method walks through all of the items,
 131 /// invoking Visitor.Visit for each of them.
 132 void CStringMapImpl::VisitEntries(const CStringMapVisitor &Visitor) const {
 133   for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
 134     if (void *Id = IB->Item)
 135       Visitor.Visit((char*)Id + ItemSize, Id);
 136   }
 137 }