From ab5a55e11870c8f5aaa8e4b973092bf66f9d6742 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Mon, 9 Apr 2012 02:13:06 +0000
Subject: [PATCH] Cleanup and relax a restriction on the matching of global
 offsets into x86 addressing modes. This allows PIE-based TLS offsets to fit
 directly into an addressing mode immediate offset, which is the last
 remaining code quality issue from PR12380. With this patch, that PR is
 completely fixed.

To understand why this patch is correct to match these offsets into
addressing mode immediates, break it down by cases:
1) 32-bit is trivially correct, and unmodified here.
2) 64-bit non-small mode is unchanged and never matches.
3) 64-bit small PIC code which is RIP-relative is handled specially in
   the match to try to fit RIP into the base register. If it fails, it
   now early exits. This behavior is unchanged by the patch.
4) 64-bit small non-PIC code which is not RIP-relative continues to work
   as it did before. The reason these immediates are safe is because the
   ABI ensures they fit in small mode. This behavior is unchanged.
5) 64-bit small PIC code which is *not* using RIP-relative addressing.
   This is the only case changed by the patch, and the primary place you
   see it is in TLS, either the win64 section offset TLS or Linux
   local-exec TLS model in a PIC compilation. Here the ABI again ensures
   that the immediates fit because we are in small mode, and any other
   operations required due to the PIC relocation model have been handled
   externally to the Wrapper node (extra loads etc are made around the
   wrapper node in ISelLowering).

I've tested this as much as I can comparing it with GCC's output, and
everything appears safe. I discussed this with Anton and it made sense
to him at least at face value. That said, if there are issues with PIC
code after this patch, yell and we can revert it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154304 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelDAGToDAG.cpp | 19 ++++++++++---------
 test/CodeGen/X86/tls-pie.ll        |  5 ++---
 test/CodeGen/X86/tls.ll            | 27 ++++++++++-----------------
 3 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index ff89d70b1af..e4246d3f515 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -620,14 +620,14 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
 
   // Handle X86-64 rip-relative addresses.  We check this before checking direct
   // folding because RIP is preferable to non-RIP accesses.
-  if (Subtarget->is64Bit() &&
+  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
       // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
       // they cannot be folded into immediate fields.
       // FIXME: This can be improved for kernel and other models?
-      (M == CodeModel::Small || M == CodeModel::Kernel) &&
-      // Base and index reg must be 0 in order to use %rip as base and lowering
-      // must allow RIP.
-      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+      (M == CodeModel::Small || M == CodeModel::Kernel)) {
+    // Base and index reg must be 0 in order to use %rip as base.
+    if (AM.hasBaseOrIndexReg())
+      return true;
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
       X86ISelAddressMode Backup = AM;
       AM.GV = G->getGlobal();
@@ -662,11 +662,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
   }
 
   // Handle the case when globals fit in our immediate field: This is true for
-  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
-  // mode, this results in a non-RIP-relative computation.
+  // X86-32 always and X86-64 when in -mcmodel=small mode.  In 64-bit
+  // mode, this only applies to a non-RIP-relative computation.
   if (!Subtarget->is64Bit() ||
-      ((M == CodeModel::Small || M == CodeModel::Kernel) &&
-       TM.getRelocationModel() == Reloc::Static)) {
+      M == CodeModel::Small || M == CodeModel::Kernel) {
+    assert(N.getOpcode() != X86ISD::WrapperRIP &&
+           "RIP-relative addressing already handled");
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
       AM.GV = G->getGlobal();
       AM.Disp += G->getOffset();
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
index 6c739cbabd9..e2e58a541a4 100644
--- a/test/CodeGen/X86/tls-pie.ll
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -11,8 +11,7 @@ define i32 @f1() {
 ; X32:      movl %gs:i@NTPOFF, %eax
 ; X32-NEXT: ret
 ; X64: f1:
-; X64:      movabsq $i@TPOFF, %rax
-; X64-NEXT: movl %fs:(%rax), %eax
+; X64:      movl %fs:i@TPOFF, %eax
 ; X64-NEXT: ret
 
 entry:
@@ -27,7 +26,7 @@ define i32* @f2() {
 ; X32-NEXT: ret
 ; X64: f2:
 ; X64:      movq %fs:0, %rax
-; X64-NEXT: addq $i@TPOFF, %rax
+; X64-NEXT: leaq i@TPOFF(%rax), %rax
 ; X64-NEXT: ret
 
 entry:
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
index 4604608bac1..e8a79bfa6ee 100644
--- a/test/CodeGen/X86/tls.ll
+++ b/test/CodeGen/X86/tls.ll
@@ -28,8 +28,7 @@ define i32 @f1() {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movabsq $i1@SECREL, %rcx
-; X64_WIN-NEXT: movl (%rax,%rcx), %eax
+; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -56,7 +55,7 @@ define i32* @f2() {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: addq $i1@SECREL, %rax
+; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -82,8 +81,7 @@ define i32 @f3() nounwind {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movabsq $i2@SECREL, %rcx
-; X64_WIN-NEXT: movl (%rax,%rcx), %eax
+; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -110,7 +108,7 @@ define i32* @f4() {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: addq $i2@SECREL, %rax
+; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -134,8 +132,7 @@ define i32 @f5() nounwind {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movabsq $i3@SECREL, %rcx
-; X64_WIN-NEXT: movl (%rax,%rcx), %eax
+; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -162,7 +159,7 @@ define i32* @f6() {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: addq $i3@SECREL, %rax
+; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -244,8 +241,7 @@ define i16 @f11() {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movabsq $s1@SECREL, %rcx
-; X64_WIN-NEXT: movzwl (%rax,%rcx), %eax
+; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax
 ; X64_WIN-NEXT: # kill
 ; X64_WIN-NEXT: ret
 
@@ -271,8 +267,7 @@ define i32 @f12() {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movabsq $s1@SECREL, %rcx
-; X64_WIN-NEXT: movswl (%rax,%rcx), %eax
+; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -298,8 +293,7 @@ define i8 @f13() {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movabsq $b1@SECREL, %rcx
-; X64_WIN-NEXT: movb (%rax,%rcx), %al
+; X64_WIN-NEXT: movb b1@SECREL(%rax), %al
 ; X64_WIN-NEXT: ret
 
 entry:
@@ -324,8 +318,7 @@ define i32 @f14() {
 ; X64_WIN:      movl _tls_index(%rip), %eax
 ; X64_WIN-NEXT: movq %gs:88, %rcx
 ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
-; X64_WIN-NEXT: movabsq $b1@SECREL, %rcx
-; X64_WIN-NEXT: movsbl (%rax,%rcx), %eax
+; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax
 ; X64_WIN-NEXT: ret
 
 entry:
-- 
2.34.1