From f329765d2304e2ab3eccd7760727513539874c93 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Thu, 11 Dec 2014 22:51:06 +0000 Subject: [PATCH] [PowerPC] Better lowering for add/or of a FrameIndex If we have an add (or an or that is really an add), where one operand is a FrameIndex and the other operand is a small constant, we can combine the lowering of the FrameIndex (which is lowered as an add of the FI and a zero offset) with the constant operand. Amusingly, this is an old potential improvement entry from lib/Target/PowerPC/README.txt which had never been resolved. In short, we used to lower: %X = alloca { i32, i32 } %Y = getelementptr {i32,i32}* %X, i32 0, i32 1 ret i32* %Y as: addi 3, 1, -8 ori 3, 3, 4 blr and now we produce: addi 3, 1, -4 blr which is much more sensible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224071 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 49 +++++++++++++++++++------ lib/Target/PowerPC/README.txt | 19 ---------- test/CodeGen/PowerPC/add-fi.ll | 24 ++++++++++++ test/CodeGen/PowerPC/ppc64-vaarg-int.ll | 2 +- 4 files changed, 63 insertions(+), 31 deletions(-) create mode 100644 test/CodeGen/PowerPC/add-fi.ll diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 48c0388ceae..2e10fee3a93 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -112,6 +112,8 @@ namespace { /// base register. Return the virtual register that holds this value. SDNode *getGlobalBaseReg(); + SDNode *getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); + // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. SDNode *Select(SDNode *N) override; @@ -373,6 +375,18 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { && isInt32Immediate(N->getOperand(1).getNode(), Imm); } +SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { + SDLoc dl(SN); + int FI = cast(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); + unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; + if (SN->hasOneUse()) + return CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, + getSmallIPtrImm(Offset)); + return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, + getSmallIPtrImm(Offset)); +} + bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { if (!Val) return false; @@ -1019,16 +1033,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { case PPCISD::GlobalBaseReg: return getGlobalBaseReg(); - case ISD::FrameIndex: { - int FI = cast(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); - unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; - if (N->hasOneUse()) - return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI, - getSmallIPtrImm(0)); - return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, - getSmallIPtrImm(0)); - } + case ISD::FrameIndex: + return getFrameIndex(N, N); case PPCISD::MFOCRF: { SDValue InFlag = N->getOperand(1); @@ -1213,13 +1219,34 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { // Other cases are autogenerated. break; } - case ISD::OR: + case ISD::OR: { if (N->getValueType(0) == MVT::i32) if (SDNode *I = SelectBitfieldInsert(N)) return I; + short Imm; + if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && + isIntS16Immediate(N->getOperand(1), Imm)) { + APInt LHSKnownZero, LHSKnownOne; + CurDAG->computeKnownBits(N->getOperand(0), LHSKnownZero, LHSKnownOne); + + // If this is equivalent to an add, then we can fold it with the + // FrameIndex calculation. + if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) + return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + } + // Other cases are autogenerated. break; + } + case ISD::ADD: { + short Imm; + if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && + isIntS16Immediate(N->getOperand(1), Imm)) + return getFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); + + break; + } case ISD::SHL: { unsigned Imm, SH, MB, ME; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 514f8407c97..f96443c008f 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -184,25 +184,6 @@ http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html ===-------------------------------------------------------------------------=== -Compile offsets from allocas: - -int *%test() { - %X = alloca { int, int } - %Y = getelementptr {int,int}* %X, int 0, uint 1 - ret int* %Y -} - -into a single add, not two: - -_test: - addi r2, r1, -8 - addi r3, r2, 4 - blr - ---> important for C++. - -===-------------------------------------------------------------------------=== - No loads or stores of the constants should be needed: struct foo { double X, Y; }; diff --git a/test/CodeGen/PowerPC/add-fi.ll b/test/CodeGen/PowerPC/add-fi.ll new file mode 100644 index 00000000000..18892c8cdf5 --- /dev/null +++ b/test/CodeGen/PowerPC/add-fi.ll @@ -0,0 +1,24 @@ +; RUN: llc -mcpu=ppc64 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i32* @test1() { + %X = alloca { i32, i32 } + %Y = getelementptr {i32,i32}* %X, i32 0, i32 1 + ret i32* %Y + +; CHECK-LABEL: @test1 +; CHECK: addi 3, 1, -4 +; CHECK: blr +} + +define i32* @test2() { + %X = alloca { i32, i32, i32, i32 } + %Y = getelementptr {i32,i32,i32,i32}* %X, i32 0, i32 3 + ret i32* %Y + +; CHECK-LABEL: @test2 +; CHECK: addi 3, 1, -4 +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/ppc64-vaarg-int.ll b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll index 5a63b01badc..c9a4f91fdde 100644 --- a/test/CodeGen/PowerPC/ppc64-vaarg-int.ll +++ b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll @@ -16,5 +16,5 @@ declare void @llvm.va_start(i8*) nounwind ; CHECK: @intvaarg ; Make sure that the va pointer is incremented by 8 (not 4). -; CHECK: addi{{.*}}, 8 +; CHECK: addi{{.*}}, 1, 64 -- 2.34.1