From 861a12d4214340f99db3686b02ed20d02d887851 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Thu, 10 Sep 2015 01:48:29 +0000 Subject: [PATCH] [AArch64] Match base+offset in STNP addressing mode. Followup to r247231. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247234 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 16 ++ test/CodeGen/AArch64/nontemporal.ll | 179 +++++++++++++++++++-- 2 files changed, 181 insertions(+), 14 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 77896af196d..367bbc3a482 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -630,6 +630,22 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { SDLoc dl(N); + // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed + // selected here doesn't support labels/immediates, only base+offset. + + if (CurDAG->isBaseWithConstantOffset(N)) { + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int64_t RHSC = RHS->getSExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= (-0x40 << Scale) && + RHSC < (0x40 << Scale)) { + Base = N.getOperand(0); + OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); + return true; + } + } + } + // Base only. The address will be materialized into a register before // the memory is accessed. // add x0, Xbase, #offset diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll index 6db05cb4877..af553be92c5 100644 --- a/test/CodeGen/AArch64/nontemporal.ll +++ b/test/CodeGen/AArch64/nontemporal.ll @@ -2,10 +2,9 @@ define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 { ; CHECK-LABEL: test_stnp_v4i64: -; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #16 ; CHECK-NEXT: mov d[[HI1:[0-9]+]], v1[1] ; CHECK-NEXT: mov d[[HI0:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d1, d[[HI1]], [x[[PTR]]] +; CHECK-NEXT: stnp d1, d[[HI1]], [x0, #16] ; CHECK-NEXT: stnp d0, d[[HI0]], [x0] ; CHECK-NEXT: ret store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0 @@ -123,9 +122,8 @@ define void @test_stnp_i64(i64* %p, i64 %v) #0 { define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 { ; CHECK-LABEL: test_stnp_v2f64_offset: -; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #16 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: stnp d0, d[[HI]], [x0, #16] ; CHECK-NEXT: ret %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1 store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0 @@ -134,9 +132,8 @@ define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 { define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 { ; CHECK-LABEL: test_stnp_v2f64_offset_neg: -; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #16 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-16] ; CHECK-NEXT: ret %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1 store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0 @@ -145,9 +142,8 @@ define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 { define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_offset: -; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #8 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] +; CHECK-NEXT: stnp s0, s[[HI]], [x0, #8] ; CHECK-NEXT: ret %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1 store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0 @@ -156,9 +152,8 @@ define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 { define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_offset_neg: -; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #8 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] -; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] +; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-8] ; CHECK-NEXT: ret %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1 store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0 @@ -167,9 +162,8 @@ define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 { define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64_offset: -; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #8 ; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 -; CHECK-NEXT: stnp w1, w[[HI]], [x[[PTR]]] +; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8] ; CHECK-NEXT: ret %tmp0 = getelementptr i64, i64* %p, i32 1 store i64 %v, i64* %tmp0, align 1, !nontemporal !0 @@ -178,15 +172,172 @@ define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64_offset_neg: -; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #8 ; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 -; CHECK-NEXT: stnp w1, w[[HI]], [x[[PTR]]] +; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8] ; CHECK-NEXT: ret %tmp0 = getelementptr i64, i64* %p, i32 -1 store i64 %v, i64* %tmp0, align 1, !nontemporal !0 ret void } +define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4: +; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #4 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 4 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4: +; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #4 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -4 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512: +; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #512 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 512 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_offset_504: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0, #504] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 504 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508: +; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #508 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 508 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520: +; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #520 +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -520 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_offset_neg_512: +; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-512] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -512 + %tmp1 = bitcast i8* %tmp0 to <4 x float>* + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + + +define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256: +; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #256 +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 256 + %tmp1 = bitcast i8* %tmp0 to <2 x float>* + store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_offset_252: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0, #252] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 252 + %tmp1 = bitcast i8* %tmp0 to <2 x float>* + store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260: +; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #260 +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -260 + %tmp1 = bitcast i8* %tmp0 to <2 x float>* + store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v2f32_offset_neg_256: +; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1] +; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-256] +; CHECK-NEXT: ret + %tmp0 = getelementptr i8, i8* %p, i32 -256 + %tmp1 = bitcast i8* %tmp0 to <2 x float>* + store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0 + ret void +} + +declare void @dummy(<4 x float>*) + +define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_offset_alloca: +; CHECK: mov x29, sp +; CHECK: mov x[[PTR:[0-9]+]], sp +; CHECK-NEXT: stnp d0, d{{.*}}, [x[[PTR]]] +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: bl _dummy + %tmp0 = alloca <4 x float> + store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0 + call void @dummy(<4 x float>* %tmp0) + ret void +} + +define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { +; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: +; CHECK: mov x29, sp +; CHECK: mov x[[PTR:[0-9]+]], sp +; CHECK-NEXT: stnp d0, d{{.*}}, [x[[PTR]], #16] +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: bl _dummy + %tmp0 = alloca <4 x float>, i32 2 + %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1 + store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0 + call void @dummy(<4 x float>* %tmp0) + ret void +} + !0 = !{ i32 1 } attributes #0 = { nounwind } -- 2.34.1