From: Evan Cheng Date: Wed, 27 Oct 2010 23:41:30 +0000 (+0000) Subject: Shifter ops are not always free. Do not fold them (especially to form X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=f40deed62f4f0126459ed7bfd1799f4e09b1aaa7;p=oota-llvm.git Shifter ops are not always free. Do not fold them (especially to form complex load / store addressing mode) when they have higher cost and when they have more than one use. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117509 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 5285a43735c..ec66ceae2a1 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -78,8 +78,12 @@ public: SDNode *Select(SDNode *N); + bool isShifterOpProfitable(const SDValue &Shift, + ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); bool SelectShifterOperandReg(SDValue N, SDValue &A, SDValue &B, SDValue &C); + bool SelectShiftShifterOperandReg(SDValue N, SDValue &A, + SDValue &B, SDValue &C); bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); @@ -246,6 +250,17 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { } +bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, + ARM_AM::ShiftOpc ShOpcVal, + unsigned ShAmt) { + if (!Subtarget->isCortexA9()) + return true; + if (Shift.hasOneUse()) + return true; + // R << 2 is free. + return ShOpcVal == ARM_AM::lsl && ShAmt == 2; +} + bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, SDValue &BaseReg, SDValue &ShReg, @@ -261,6 +276,32 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, BaseReg = N.getOperand(0); unsigned ShImmVal = 0; + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + ShReg = CurDAG->getRegister(0, MVT::i32); + ShImmVal = RHS->getZExtValue() & 31; + } else { + ShReg = N.getOperand(1); + if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal)) + return false; + } + Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), + MVT::i32); + return true; +} + +bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N, + SDValue &BaseReg, + SDValue &ShReg, + SDValue &Opc) { + ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); + + // Don't match base register only case. That is matched to a separate + // lower complexity pattern with explicit register operand. + if (ShOpcVal == ARM_AM::no_shift) return false; + + BaseReg = N.getOperand(0); + unsigned ShImmVal = 0; + // Do not check isShifterOpProfitable. This must return true. if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { ShReg = CurDAG->getRegister(0, MVT::i32); ShImmVal = RHS->getZExtValue() & 31; @@ -321,7 +362,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { - if (N.getOpcode() == ISD::MUL) { + if (N.getOpcode() == ISD::MUL && + (!Subtarget->isCortexA9() || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -357,6 +399,10 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, } } + if (Subtarget->isCortexA9() && !N.hasOneUse()) + // Compute R +/- (R << N) and reuse it. + return false; + // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub; ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); @@ -371,14 +417,20 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, if (ConstantSDNode *Sh = dyn_cast(N.getOperand(1).getOperand(1))) { ShAmt = Sh->getZExtValue(); - Offset = N.getOperand(1).getOperand(0); + if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) + Offset = N.getOperand(1).getOperand(0); + else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } } else { ShOpcVal = ARM_AM::no_shift; } } // Try matching (R shl C) + (R). - if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) { + if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift && + !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -386,8 +438,15 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, if (ConstantSDNode *Sh = dyn_cast(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - Offset = N.getOperand(0).getOperand(0); - Base = N.getOperand(1); + if (!Subtarget->isCortexA9() || + (N.hasOneUse() && + isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + Offset = N.getOperand(0).getOperand(0); + Base = N.getOperand(1); + } else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } } else { ShOpcVal = ARM_AM::no_shift; } @@ -408,7 +467,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { - if (N.getOpcode() == ISD::MUL) { + if (N.getOpcode() == ISD::MUL && + (!Subtarget->isCortexA9() || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { // X * [3,5,9] -> X + X * [2,4,8] etc. int RHSC = (int)RHS->getZExtValue(); @@ -474,6 +534,16 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, } } + if (Subtarget->isCortexA9() && !N.hasOneUse()) { + // Compute R +/- (R << N) and reuse it. + Base = N; + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, + ARM_AM::no_shift), + MVT::i32); + return AM2_BASE; + } + // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub; ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); @@ -488,14 +558,20 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, if (ConstantSDNode *Sh = dyn_cast(N.getOperand(1).getOperand(1))) { ShAmt = Sh->getZExtValue(); - Offset = N.getOperand(1).getOperand(0); + if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) + Offset = N.getOperand(1).getOperand(0); + else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } } else { ShOpcVal = ARM_AM::no_shift; } } // Try matching (R shl C) + (R). - if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) { + if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift && + !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't @@ -503,8 +579,15 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, if (ConstantSDNode *Sh = dyn_cast(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - Offset = N.getOperand(0).getOperand(0); - Base = N.getOperand(1); + if (!Subtarget->isCortexA9() || + (N.hasOneUse() && + isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + Offset = N.getOperand(0).getOperand(0); + Base = N.getOperand(1); + } else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } } else { ShOpcVal = ARM_AM::no_shift; } @@ -543,7 +626,12 @@ bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N, // it. if (ConstantSDNode *Sh = dyn_cast(N.getOperand(1))) { ShAmt = Sh->getZExtValue(); - Offset = N.getOperand(0); + if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) + Offset = N.getOperand(0); + else { + ShAmt = 0; + ShOpcVal = ARM_AM::no_shift; + } } else { ShOpcVal = ARM_AM::no_shift; } @@ -959,6 +1047,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, return false; } + if (Subtarget->isCortexA9() && !N.hasOneUse()) { + // Compute R + (R << [1,2,3]) and reuse it. + Base = N; + return false; + } + // Look for (R + R) or (R + (R << [1,2,3])). unsigned ShAmt = 0; Base = N.getOperand(0); @@ -977,11 +1071,12 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, // it. if (ConstantSDNode *Sh = dyn_cast(OffReg.getOperand(1))) { ShAmt = Sh->getZExtValue(); - if (ShAmt >= 4) { + if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) + OffReg = OffReg.getOperand(0); + else { ShAmt = 0; ShOpcVal = ARM_AM::no_shift; - } else - OffReg = OffReg.getOperand(0); + } } else { ShOpcVal = ARM_AM::no_shift; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 5e49cf1807e..0974890e956 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -325,6 +325,13 @@ def so_reg : Operand, // reg reg imm let PrintMethod = "printSORegOperand"; let MIOperandInfo = (ops GPR, GPR, i32imm); } +def shift_so_reg : Operand, // reg reg imm + ComplexPattern { + string EncoderMethod = "getSORegOpValue"; + let PrintMethod = "printSORegOperand"; + let MIOperandInfo = (ops GPR, GPR, i32imm); +} // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. so_imm values are @@ -1715,9 +1722,10 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, let Inst{15-12} = Rd; } -def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins so_reg:$src), +def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src), DPSoRegFrm, IIC_iMOVsr, - "mov", "\t$Rd, $src", [(set GPR:$Rd, so_reg:$src)]>, UnaryDP { + "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>, + UnaryDP { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll index 2bbe9fd2602..897fb1af01c 100644 --- a/test/CodeGen/ARM/shifter_operand.ll +++ b/test/CodeGen/ARM/shifter_operand.ll @@ -1,18 +1,72 @@ -; RUN: llc < %s -march=arm | grep add | grep lsl -; RUN: llc < %s -march=arm | grep bic | grep asr +; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9 +; rdar://8576755 define i32 @test1(i32 %X, i32 %Y, i8 %sh) { - %shift.upgrd.1 = zext i8 %sh to i32 ; [#uses=1] - %A = shl i32 %Y, %shift.upgrd.1 ; [#uses=1] - %B = add i32 %X, %A ; [#uses=1] +; A8: test1: +; A8: add r0, r0, r1, lsl r2 + +; A9: test1: +; A9: add r0, r0, r1, lsl r2 + %shift.upgrd.1 = zext i8 %sh to i32 + %A = shl i32 %Y, %shift.upgrd.1 + %B = add i32 %X, %A ret i32 %B } define i32 @test2(i32 %X, i32 %Y, i8 %sh) { - %shift.upgrd.2 = zext i8 %sh to i32 ; [#uses=1] - %A = ashr i32 %Y, %shift.upgrd.2 ; [#uses=1] - %B = xor i32 %A, -1 ; [#uses=1] - %C = and i32 %X, %B ; [#uses=1] +; A8: test2: +; A8: bic r0, r0, r1, asr r2 + +; A9: test2: +; A9: bic r0, r0, r1, asr r2 + %shift.upgrd.2 = zext i8 %sh to i32 + %A = ashr i32 %Y, %shift.upgrd.2 + %B = xor i32 %A, -1 + %C = and i32 %X, %B ret i32 %C } + +define i32 @test3(i32 %base, i32 %base2, i32 %offset) { +entry: +; A8: test3: +; A8: ldr r0, [r0, r2, lsl #2] +; A8: ldr r1, [r1, r2, lsl #2] + +; lsl #2 is free +; A9: test3: +; A9: ldr r1, [r1, r2, lsl #2] +; A9: ldr r0, [r0, r2, lsl #2] + %tmp1 = shl i32 %offset, 2 + %tmp2 = add i32 %base, %tmp1 + %tmp3 = inttoptr i32 %tmp2 to i32* + %tmp4 = add i32 %base2, %tmp1 + %tmp5 = inttoptr i32 %tmp4 to i32* + %tmp6 = load i32* %tmp3 + %tmp7 = load i32* %tmp5 + %tmp8 = add i32 %tmp7, %tmp6 + ret i32 %tmp8 +} + +declare i8* @malloc(...) + +define fastcc void @test4() nounwind { +entry: +; A8: test4: +; A8: ldr r1, [r0, r0, lsl #2] +; A8: str r1, [r0, r0, lsl #2] + +; A9: test4: +; A9: add r0, r0, r0, lsl #2 +; A9: ldr r1, [r0] +; A9: str r1, [r0] + %0 = tail call i8* (...)* @malloc(i32 undef) nounwind + %1 = bitcast i8* %0 to i32* + %2 = sext i16 undef to i32 + %3 = getelementptr inbounds i32* %1, i32 %2 + %4 = load i32* %3, align 4 + %5 = add nsw i32 %4, 1 + store i32 %5, i32* %3, align 4 + ret void +} diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index b216a74b2e4..8b1b890538e 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -586,6 +586,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type, MISC("brtarget", "kOperandTypeARMBranchTarget"); // ? MISC("so_reg", "kOperandTypeARMSoReg"); // R, R, I + MISC("shift_so_reg", "kOperandTypeARMSoReg"); // R, R, I MISC("t2_so_reg", "kOperandTypeThumb2SoReg"); // R, I MISC("so_imm", "kOperandTypeARMSoImm"); // I MISC("rot_imm", "kOperandTypeARMRotImm"); // I