DwarfAccelTable: Fix handling of hash collisions.
authorFrederic Riss <friss@apple.com>
Tue, 10 Mar 2015 00:46:31 +0000 (00:46 +0000)
committerFrederic Riss <friss@apple.com>
Tue, 10 Mar 2015 00:46:31 +0000 (00:46 +0000)
It turns out accelerator tables where totally broken if they contained
entries with colliding hashes. The failure mode is pretty bad, as it not
only impacted the colliding entries, but would basically make all the
entries after the first hash collision pointing in the wrong place.

The testcase uses the symbol names that where found to collide during a
clang build.

From a performance point of view, the patch adds a sort and a linear
walk over each bucket contents. While it has a measurable impact on the
accelerator table emission, it's not showing up significantly in clang
profiles (and I'd argue that correctness is priceless :-)).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231732 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
test/DebugInfo/accel-table-hash-collisions.ll [new file with mode: 0644]

index 4a13cc260ad330713a96a0841d7bd0813b984b86..09c47356adc6815fa5a917c733e6f57f01e4f0a4 100644 (file)
@@ -98,6 +98,15 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
     Buckets[bucket].push_back(Data[i]);
     Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
   }
+
+  // Sort the contents of the buckets by hash value so that hash
+  // collisions end up together. Stable sort makes testing easier and
+  // doesn't cost much more.
+  for (size_t i = 0; i < Buckets.size(); ++i)
+    std::stable_sort(Buckets[i].begin(), Buckets[i].end(),
+                     [] (HashData *LHS, HashData *RHS) {
+                       return LHS->HashValue < RHS->HashValue;
+                     });
 }
 
 // Emits the header for the table via the AsmPrinter.
@@ -137,19 +146,32 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
       Asm->EmitInt32(index);
     else
       Asm->EmitInt32(UINT32_MAX);
-    index += Buckets[i].size();
+    // Buckets point in the list of hashes, not to the data. Do not
+    // increment the index multiple times in case of hash collisions.
+    uint64_t PrevHash = UINT64_MAX;
+    for (auto *HD : Buckets[i]) {
+      uint32_t HashValue = HD->HashValue;
+      if (PrevHash != HashValue)
+        ++index;
+      PrevHash = HashValue;
+    }
   }
 }
 
 // Walk through the buckets and emit the individual hashes for each
 // bucket.
 void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+  uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
+      uint32_t HashValue = (*HI)->HashValue;
+      if (PrevHash == HashValue)
+        continue;
       Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
-      Asm->EmitInt32((*HI)->HashValue);
+      Asm->EmitInt32(HashValue);
+      PrevHash = HashValue;
     }
   }
 }
@@ -159,10 +181,15 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
 // beginning of the section. The non-section symbol will be output later
 // when we emit the actual data.
 void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+  uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
+      uint32_t HashValue = (*HI)->HashValue;
+      if (PrevHash == HashValue)
+        continue;
+      PrevHash = HashValue;
       Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
       MCContext &Context = Asm->OutStreamer.getContext();
       const MCExpr *Sub = MCBinaryExpr::CreateSub(
@@ -183,6 +210,10 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
+      // Terminate the previous entry if there is no hash collision
+      // with the current one.
+      if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+        Asm->EmitInt32(0);
       // Remember to emit the label for our offset.
       Asm->OutStreamer.EmitLabel((*HI)->Sym);
       Asm->OutStreamer.AddComment((*HI)->Str);
@@ -201,11 +232,10 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D,
           Asm->EmitInt8(HD->Flags);
         }
       }
-      // Emit a 0 to terminate the data unless we have a hash collision.
-      if (PrevHash != (*HI)->HashValue)
-        Asm->EmitInt32(0);
       PrevHash = (*HI)->HashValue;
     }
+    // Emit the final end marker for the bucket.
+    Asm->EmitInt32(0);
   }
 }
 
diff --git a/test/DebugInfo/accel-table-hash-collisions.ll b/test/DebugInfo/accel-table-hash-collisions.ll
new file mode 100644 (file)
index 0000000..25bd2f4
--- /dev/null
@@ -0,0 +1,92 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -dwarf-accel-tables=Enable -filetype=obj -o - < %s | llvm-dwarfdump -debug-dump=apple_names - | FileCheck %s
+
+; Generated from the following C code using
+; clang -S -emit-llvm hash-collision.c
+;
+; The names of the variables have been chosen so that they produce hash collisions.
+; There are 12 names here that are hashed to only 6 hashes (each pair of lines
+; hashes to the same value, see the CHECK lines below).
+;
+; int ForceTopDown;
+; int _ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_;
+; int _ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE; 
+; int _ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv;
+; int _ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE;
+; int _ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE;
+; int k1;
+; int is;
+; int setStmt;
+; int _ZN4llvm5TwineC1Ei;
+; int _ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE;
+; int _ZN4llvm22MachineModuleInfoMachOD2Ev;
+
+; Check that we have the right amount of hashes.
+; CHECK: Bucket count = 6
+; CHECK: Hashes count = 6
+
+; Check that all the names are present in the output
+; CHECK:  Hash = 0x00597841
+; CHECK:    Name: {{[0-9a-f]*}} "is"
+; CHECK:    Name: {{[0-9a-f]*}} "k1"
+
+; CHECK: Hash = 0xa4b42a1e
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv"
+
+; CHECK: Hash = 0xeee7c0b2
+; CHECK:    Name: {{[0-9a-f]*}} "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE"
+
+; CHECK: Hash = 0xea48ac5f
+; CHECK:    Name: {{[0-9a-f]*}} "ForceTopDown"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_"
+
+; CHECK:  Hash = 0x6b22f71f
+; CHECK:    Name: {{[0-9a-f]*}} "_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm22MachineModuleInfoMachOD2Ev"
+
+; CHECK:  Hash = 0x8c248979
+; CHECK:    Name: {{[0-9a-f]*}} "setStmt"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm5TwineC1Ei"
+
+
+
+@ForceTopDown = common global i32 0, align 4
+@_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_ = common global i32 0, align 4
+@_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE = common global i32 0, align 4
+@_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv = common global i32 0, align 4
+@_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE = common global i32 0, align 4
+@_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE = common global i32 0, align 4
+@k1 = common global i32 0, align 4
+@is = common global i32 0, align 4
+@setStmt = common global i32 0, align 4
+@_ZN4llvm5TwineC1Ei = common global i32 0, align 4
+@_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE = common global i32 0, align 4
+@_ZN4llvm22MachineModuleInfoMachOD2Ev = common global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18, !19}
+!llvm.ident = !{!20}
+
+!0 = !MDCompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !MDFile(filename: "hash-collisions.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
+!4 = !MDGlobalVariable(name: "ForceTopDown", scope: !0, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, variable: i32* @ForceTopDown)
+!5 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !MDGlobalVariable(name: "_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_)
+!7 = !MDGlobalVariable(name: "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE", scope: !0, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE)
+!8 = !MDGlobalVariable(name: "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv", scope: !0, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv)
+!9 = !MDGlobalVariable(name: "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE", scope: !0, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE)
+!10 = !MDGlobalVariable(name: "_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE", scope: !0, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE)
+!11 = !MDGlobalVariable(name: "k1", scope: !0, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, variable: i32* @k1)
+!12 = !MDGlobalVariable(name: "is", scope: !0, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, variable: i32* @is)
+!13 = !MDGlobalVariable(name: "setStmt", scope: !0, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, variable: i32* @setStmt)
+!14 = !MDGlobalVariable(name: "_ZN4llvm5TwineC1Ei", scope: !0, file: !1, line: 10, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm5TwineC1Ei)
+!15 = !MDGlobalVariable(name: "_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE", scope: !0, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE)
+!16 = !MDGlobalVariable(name: "_ZN4llvm22MachineModuleInfoMachOD2Ev", scope: !0, file: !1, line: 12, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm22MachineModuleInfoMachOD2Ev)
+!17 = !{i32 2, !"Dwarf Version", i32 2}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{i32 1, !"PIC Level", i32 2}
+!20 = !{!"clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)"}