Merging r261387:
authorHans Wennborg <hans@hanshq.net>
Mon, 22 Feb 2016 17:51:28 +0000 (17:51 +0000)
committerHans Wennborg <hans@hanshq.net>
Mon, 22 Feb 2016 17:51:28 +0000 (17:51 +0000)
------------------------------------------------------------------------
r261387 | davide | 2016-02-19 16:44:47 -0800 (Fri, 19 Feb 2016) | 8 lines

[X86ISelLowering] Fix TLSADDR lowering when shrink-wrapping is enabled.

TLSADDR nodes are lowered into actuall calls inside MC. In order to prevent
shrink-wrapping from pushing prologue/epilogue past them (which result
in TLS variables being accessed before the stack frame is set up), we
put markers, so that the stack gets adjusted properly.
Thanks to Quentin Colombet for guidance/help on how to fix this problem!

------------------------------------------------------------------------

git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261542 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86InstrCompiler.td
test/CodeGen/X86/tls-shrink-wrapping.ll [new file with mode: 0644]

index c12a3ed..dd9966f 100644 (file)
@@ -22227,6 +22227,35 @@ X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI,
   return BB;
 }
 
   return BB;
 }
 
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredTLSAddr(MachineInstr *MI,
+                                      MachineBasicBlock *BB) const {
+  // So, here we replace TLSADDR with the sequence:
+  // adjust_stackdown -> TLSADDR -> adjust_stackup.
+  // We need this because TLSADDR is lowered into calls
+  // inside MC, therefore without the two markers shrink-wrapping
+  // may push the prologue/epilogue pass them.
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  MachineFunction &MF = *BB->getParent();
+
+  // Emit CALLSEQ_START right before the instruction.
+  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+  MachineInstrBuilder CallseqStart =
+    BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0);
+  BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
+
+  // Emit CALLSEQ_END right after the instruction.
+  // We don't call erase from parent because we want to keep the
+  // original instruction around.
+  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+  MachineInstrBuilder CallseqEnd =
+    BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
+  BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
+
+  return BB;
+}
+
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
                                       MachineBasicBlock *BB) const {
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
                                       MachineBasicBlock *BB) const {
@@ -22607,6 +22636,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::TCRETURNri64:
   case X86::TCRETURNmi64:
     return BB;
   case X86::TCRETURNri64:
   case X86::TCRETURNmi64:
     return BB;
+  case X86::TLS_addr32:
+  case X86::TLS_addr64:
+  case X86::TLS_base_addr32:
+  case X86::TLS_base_addr64:
+    return EmitLoweredTLSAddr(MI, BB);
   case X86::WIN_ALLOCA:
     return EmitLoweredWinAlloca(MI, BB);
   case X86::CATCHRET:
   case X86::WIN_ALLOCA:
     return EmitLoweredWinAlloca(MI, BB);
   case X86::CATCHRET:
index 0ab786e..b67958a 100644 (file)
@@ -1129,6 +1129,9 @@ namespace llvm {
     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
                                             MachineBasicBlock *BB) const;
 
     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
                                             MachineBasicBlock *BB) const;
 
+    MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr *MI,
+                                          MachineBasicBlock *BB) const;
+
     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
                                           MachineBasicBlock *BB) const;
 
     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
                                           MachineBasicBlock *BB) const;
 
index 96a29ca..c709c8a 100644 (file)
@@ -436,7 +436,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [ESP] in {
+    usesCustomInserter = 1, Uses = [ESP] in {
 def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                   "# TLS_addr32",
                   [(X86tlsaddr tls32addr:$sym)]>,
 def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                   "# TLS_addr32",
                   [(X86tlsaddr tls32addr:$sym)]>,
@@ -456,7 +456,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [RSP] in {
+    usesCustomInserter = 1, Uses = [RSP] in {
 def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
                    "# TLS_addr64",
                   [(X86tlsaddr tls64addr:$sym)]>,
 def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
                    "# TLS_addr64",
                   [(X86tlsaddr tls64addr:$sym)]>,
diff --git a/test/CodeGen/X86/tls-shrink-wrapping.ll b/test/CodeGen/X86/tls-shrink-wrapping.ll
new file mode 100644 (file)
index 0000000..37c1754
--- /dev/null
@@ -0,0 +1,60 @@
+; Testcase generated from the following code:
+; extern __thread int i;
+; void f();
+; int g(void) {
+;   if (i) {
+;     i = 0;
+;     f();
+;   }
+;   return i;
+; }
+; We want to make sure that TLS variables are not accessed before
+; the stack frame is set up.
+
+; RUN: llc < %s -relocation-model=pic | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-freebsd11.0"
+
+@i = external thread_local global i32, align 4
+
+define i32 @g() #0 {
+entry:
+  %tmp = load i32, i32* @i, align 4
+  %tobool = icmp eq i32 %tmp, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* @i, align 4
+  tail call void (...) @f() #2
+  %.pre = load i32, i32* @i, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %tmp1 = phi i32 [ 0, %entry ], [ %.pre, %if.then ]
+  ret i32 %tmp1
+}
+
+; CHECK: g:                                      # @g
+; CHECK-NEXT:         .cfi_startproc
+; CHECK-NEXT: # BB#0:                                 # %entry
+; CHECK-NEXT:         pushq   %rbp
+; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT:         .cfi_def_cfa_offset 16
+; CHECK-NEXT: .Ltmp1:
+; CHECK-NEXT:         .cfi_offset %rbp, -16
+; CHECK-NEXT:         movq    %rsp, %rbp
+; CHECK-NEXT: .Ltmp2:
+; CHECK-NEXT:         .cfi_def_cfa_register %rbp
+; CHECK-NEXT:         pushq   %rbx
+; CHECK-NEXT:         pushq   %rax
+; CHECK-NEXT: .Ltmp3:
+; CHECK-NEXT:         .cfi_offset %rbx, -24
+; CHECK-NEXT:         data16
+; CHECK-NEXT:         leaq    i@TLSGD(%rip), %rdi
+
+declare void @f(...) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }