From 861a12d4214340f99db3686b02ed20d02d887851 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed.bougacha@gmail.com>
Date: Thu, 10 Sep 2015 01:48:29 +0000
Subject: [PATCH] [AArch64] Match base+offset in STNP addressing mode.

Followup to r247231.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247234 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp |  16 ++
 test/CodeGen/AArch64/nontemporal.ll        | 179 +++++++++++++++++++--
 2 files changed, 181 insertions(+), 14 deletions(-)
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 77896af196d..367bbc3a482 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -630,6 +630,22 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
                                                   SDValue &Base,
                                                   SDValue &OffImm) {
   SDLoc dl(N);
+  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+  // selected here doesn't support labels/immediates, only base+offset.
+
+  if (CurDAG->isBaseWithConstantOffset(N)) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int64_t RHSC = RHS->getSExtValue();
+      unsigned Scale = Log2_32(Size);
+      if ((RHSC & (Size - 1)) == 0 && RHSC >= (-0x40 << Scale) &&
+          RHSC < (0x40 << Scale)) {
+        Base = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+        return true;
+      }
+    }
+  }
+
   // Base only. The address will be materialized into a register before
   // the memory is accessed.
   //    add x0, Xbase, #offset
diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll
index 6db05cb4877..af553be92c5 100644
--- a/test/CodeGen/AArch64/nontemporal.ll
+++ b/test/CodeGen/AArch64/nontemporal.ll
@@ -2,10 +2,9 @@
 
 define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
 ; CHECK-LABEL: test_stnp_v4i64:
-; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #16
 ; CHECK-NEXT:  mov d[[HI1:[0-9]+]], v1[1]
 ; CHECK-NEXT:  mov d[[HI0:[0-9]+]], v0[1]
-; CHECK-NEXT:  stnp d1, d[[HI1]], [x[[PTR]]]
+; CHECK-NEXT:  stnp d1, d[[HI1]], [x0, #16]
 ; CHECK-NEXT:  stnp d0, d[[HI0]], [x0]
 ; CHECK-NEXT:  ret
   store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0
@@ -123,9 +122,8 @@ define void @test_stnp_i64(i64* %p, i64 %v) #0 {
 
 define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 {
 ; CHECK-LABEL: test_stnp_v2f64_offset:
-; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #16
 ; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
-; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #16]
 ; CHECK-NEXT:  ret
   %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1
   store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
@@ -134,9 +132,8 @@ define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 {
 
 define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 {
 ; CHECK-LABEL: test_stnp_v2f64_offset_neg:
-; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #16
 ; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
-; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #-16]
 ; CHECK-NEXT:  ret
   %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1
   store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
@@ -145,9 +142,8 @@ define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 {
 
 define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_stnp_v2f32_offset:
-; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #8
 ; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
-; CHECK-NEXT:  stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #8]
 ; CHECK-NEXT:  ret
   %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1
   store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
@@ -156,9 +152,8 @@ define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 {
 
 define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_stnp_v2f32_offset_neg:
-; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #8
 ; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
-; CHECK-NEXT:  stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #-8]
 ; CHECK-NEXT:  ret
   %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1
   store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
@@ -167,9 +162,8 @@ define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
 
 define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
 ; CHECK-LABEL: test_stnp_i64_offset:
-; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #8
 ; CHECK-NEXT:  ubfx x[[HI:[0-9]+]], x1, #0, #32
-; CHECK-NEXT:  stnp w1, w[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  stnp w1, w[[HI]], [x0, #8]
 ; CHECK-NEXT:  ret
   %tmp0 = getelementptr i64, i64* %p, i32 1
   store i64 %v, i64* %tmp0, align 1, !nontemporal !0
@@ -178,15 +172,172 @@ define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
 
 define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
 ; CHECK-LABEL: test_stnp_i64_offset_neg:
-; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #8
 ; CHECK-NEXT:  ubfx x[[HI:[0-9]+]], x1, #0, #32
-; CHECK-NEXT:  stnp w1, w[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  stnp w1, w[[HI]], [x0, #-8]
 ; CHECK-NEXT:  ret
   %tmp0 = getelementptr i64, i64* %p, i32 -1
   store i64 %v, i64* %tmp0, align 1, !nontemporal !0
   ret void
 }
 
+define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4:
+; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #4
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 4
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
+; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #4
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -4
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512:
+; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #512
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 512
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_504:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #504]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 504
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508:
+; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #508
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 508
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
+; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #520
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -520
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_neg_512:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #-512]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -512
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+
+define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256:
+; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #256
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 256
+  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_252:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #252]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 252
+  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
+; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #260
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -260
+  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_neg_256:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #-256]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -256
+  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+declare void @dummy(<4 x float>*)
+
+define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_alloca:
+; CHECK:       mov x29, sp
+; CHECK:       mov x[[PTR:[0-9]+]], sp
+; CHECK-NEXT:  stnp d0, d{{.*}}, [x[[PTR]]]
+; CHECK-NEXT:  mov x0, sp
+; CHECK-NEXT:  bl _dummy
+  %tmp0 = alloca <4 x float>
+  store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0
+  call void @dummy(<4 x float>* %tmp0)
+  ret void
+}
+
+define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2:
+; CHECK:       mov x29, sp
+; CHECK:       mov x[[PTR:[0-9]+]], sp
+; CHECK-NEXT:  stnp d0, d{{.*}}, [x[[PTR]], #16]
+; CHECK-NEXT:  mov x0, sp
+; CHECK-NEXT:  bl _dummy
+  %tmp0 = alloca <4 x float>, i32 2
+  %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  call void @dummy(<4 x float>* %tmp0)
+  ret void
+}
+
 !0 = !{ i32 1 }
 
 attributes #0 = { nounwind }
-- 
2.34.1