From b01975128c71aeafc3e74c866342d4b60ee0725e Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Thu, 10 Sep 2015 01:54:43 +0000 Subject: [PATCH] [AArch64] Match FI+offset in STNP addressing mode. First, we need to teach isFrameOffsetLegal about STNP. It already knew about the STP/LDP variants, but those were probably never exercised, because it's only the load/store optimizer that generates STP/LDP, and the only user of the method is frame lowering, which runs earlier. The STP/LDP cases were wrong: they didn't take into account the fact that they return two results, not one, so the immediate offset will be the 4th operand, not the 3rd. Follow-up to r247234. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247236 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 13 +++++++++++++ lib/Target/AArch64/AArch64InstrInfo.cpp | 13 +++++++++++++ test/CodeGen/AArch64/nontemporal.ll | 8 ++------ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 367bbc3a482..ada3e852891 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -630,6 +630,15 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, SDValue &OffImm) { SDLoc dl(N); + const DataLayout &DL = CurDAG->getDataLayout(); + const TargetLowering *TLI = getTargetLowering(); + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); + return true; + } + // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed // selected here doesn't support labels/immediates, only base+offset. @@ -640,6 +649,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size, if ((RHSC & (Size - 1)) == 0 && RHSC >= (-0x40 << Scale) && RHSC < (0x40 << Scale)) { Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + } OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); return true; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 2013f8beace..a41007cab5c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2260,11 +2260,19 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, case AArch64::LDPDi: case AArch64::STPXi: case AArch64::STPDi: + case AArch64::LDNPXi: + case AArch64::LDNPDi: + case AArch64::STNPXi: + case AArch64::STNPDi: + ImmIdx = 3; IsSigned = true; Scale = 8; break; case AArch64::LDPQi: case AArch64::STPQi: + case AArch64::LDNPQi: + case AArch64::STNPQi: + ImmIdx = 3; IsSigned = true; Scale = 16; break; @@ -2272,6 +2280,11 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, case AArch64::LDPSi: case AArch64::STPWi: case AArch64::STPSi: + case AArch64::LDNPWi: + case AArch64::LDNPSi: + case AArch64::STNPWi: + case AArch64::STNPSi: + ImmIdx = 3; IsSigned = true; Scale = 4; break; diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll index af553be92c5..e1d82ae5777 100644 --- a/test/CodeGen/AArch64/nontemporal.ll +++ b/test/CodeGen/AArch64/nontemporal.ll @@ -313,9 +313,7 @@ declare void @dummy(<4 x float>*) define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_offset_alloca: -; CHECK: mov x29, sp -; CHECK: mov x[[PTR:[0-9]+]], sp -; CHECK-NEXT: stnp d0, d{{.*}}, [x[[PTR]]] +; CHECK: stnp d0, d{{.*}}, [sp] ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl _dummy %tmp0 = alloca <4 x float> @@ -326,9 +324,7 @@ define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 { define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: -; CHECK: mov x29, sp -; CHECK: mov x[[PTR:[0-9]+]], sp -; CHECK-NEXT: stnp d0, d{{.*}}, [x[[PTR]], #16] +; CHECK: stnp d0, d{{.*}}, [sp, #16] ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: bl _dummy %tmp0 = alloca <4 x float>, i32 2 -- 2.34.1