X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelDAGToDAG.cpp;h=b25fcdea5179e49b642c0089ea3c95c801f5e312;hb=bc2198133a1836598b54b943420748e75d5dea94;hp=e05589eb5bae80c3e23a1a25de1226b4282745eb;hpb=b04546ff5b1a7a03eec1076900c945223bf494cc;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index e05589eb5ba..b25fcdea517 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), cl::init(true)); +static cl::opt +DisableARMIntABS("disable-arm-int-abs", cl::Hidden, + cl::desc("Enable / disable ARM integer abs transform"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -252,6 +257,9 @@ private: ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); + // Select special operations if node forms integer ABS pattern + SDNode *SelectABSOp(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); @@ -305,10 +313,10 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { /// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax). /// /// \param ScaledConstant [out] - On success, the pre-scaled constant value. -static bool isScaledConstantInRange(SDValue Node, unsigned Scale, +static bool isScaledConstantInRange(SDValue Node, int Scale, int RangeMin, int RangeMax, int &ScaledConstant) { - assert(Scale && "Invalid scale!"); + assert(Scale > 0 && "Invalid scale!"); // Check that this is a constant. const ConstantSDNode *C = dyn_cast(Node); @@ -519,10 +527,6 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, return false; } - if (Subtarget->isCortexA9() && !N.hasOneUse()) - // Compute R +/- (R << N) and reuse it. - return false; - // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; ARM_AM::ShiftOpc ShOpcVal = @@ -559,9 +563,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, if (ConstantSDNode *Sh = dyn_cast(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - if (!Subtarget->isCortexA9() || - (N.hasOneUse() && - isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { Offset = N.getOperand(0).getOperand(0); Base = N.getOperand(1); } else { @@ -701,9 +703,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, if (ConstantSDNode *Sh = dyn_cast(N.getOperand(0).getOperand(1))) { ShAmt = Sh->getZExtValue(); - if (!Subtarget->isCortexA9() || - (N.hasOneUse() && - isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) { + if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { Offset = N.getOperand(0).getOperand(0); Base = N.getOperand(1); } else { @@ -923,7 +923,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, // The maximum alignment is equal to the memory size being referenced. unsigned LSNAlign = LSN->getAlignment(); unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8; - if (LSNAlign > MemSize && MemSize > 1) + if (LSNAlign >= MemSize && MemSize > 1) Alignment = MemSize; } else { // All other uses of addrmode6 are for intrinsics. For now just record @@ -1285,12 +1285,6 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, return false; } - if (Subtarget->isCortexA9() && !N.hasOneUse()) { - // Compute R + (R << [1,2,3]) and reuse it. - Base = N; - return false; - } - // Look for (R + R) or (R + (R << [1,2,3])). unsigned ShAmt = 0; Base = N.getOperand(0); @@ -1555,6 +1549,60 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs, return CurDAG->getTargetConstant(Alignment, MVT::i32); } +// Get the register stride update opcode of a VLD/VST instruction that +// is otherwise equivalent to the given fixed stride updating instruction. +static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { + switch (Opc) { + default: break; + case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; + case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; + case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; + case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; + case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; + case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; + case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; + case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; + case ARM::VLD1q8PseudoWB_fixed: return ARM::VLD1q8PseudoWB_register; + case ARM::VLD1q16PseudoWB_fixed: return ARM::VLD1q16PseudoWB_register; + case ARM::VLD1q32PseudoWB_fixed: return ARM::VLD1q32PseudoWB_register; + case ARM::VLD1q64PseudoWB_fixed: return ARM::VLD1q64PseudoWB_register; + + case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; + case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; + case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; + case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; + case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; + case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; + case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; + case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; + case ARM::VST1q8PseudoWB_fixed: return ARM::VST1q8PseudoWB_register; + case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register; + case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register; + case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register; + case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; + case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; + + case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register; + case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register; + case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register; + case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; + case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; + case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; + + case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register; + case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register; + case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register; + case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; + case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; + case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; + + case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register; + case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register; + case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register; + } + return Opc; // If not one we handle, return it unchanged. +} + SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { @@ -1618,7 +1666,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 + // case entirely when the rest are updated to that form, too. + if ((NumVecs == 1 || NumVecs == 2) && !isa(Inc.getNode())) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so + // check for that explicitly too. Horribly hacky, but temporary. + if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) || + !isa(Inc.getNode())) + Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -1760,7 +1816,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 + // case entirely when the rest are updated to that form, too. + if (NumVecs <= 2 && !isa(Inc.getNode())) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // check for that explicitly too. Horribly hacky, but temporary. + if ((NumVecs > 2 && Opc != ARM::VST1q64PseudoWB_fixed) || + !isa(Inc.getNode())) + Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); } Ops.push_back(SrcReg); Ops.push_back(Pred); @@ -1983,8 +2047,14 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, Ops.push_back(MemAddr); Ops.push_back(Align); if (isUpdating) { + // fixed-stride update instructions don't have an explicit writeback + // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + if (!isa(Inc.getNode())) + Ops.push_back(Inc); + // FIXME: VLD3 and VLD4 haven't been updated to that form yet. + else if (NumVecs > 2) + Ops.push_back(Reg0); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2122,7 +2192,6 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; default: llvm_unreachable("Unknown so_reg opcode!"); - break; } SDValue SOShImm = CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); @@ -2292,8 +2361,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; + default: llvm_unreachable("Illegal conditional move type!"); case MVT::i32: Opc = Subtarget->isThumb() ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) @@ -2309,6 +2377,55 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +/// Target-specific DAG combining for ISD::XOR. +/// Target-independent combining lowers SELECT_CC nodes of the form +/// select_cc setg[ge] X, 0, X, -X +/// select_cc setgt X, -1, X, -X +/// select_cc setl[te] X, 0, -X, X +/// select_cc setlt X, 1, -X, X +/// which represent Integer ABS into: +/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) +/// ARM instruction selection detects the latter and matches it to +/// ARM::ABS or ARM::t2ABS machine node. +SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ + SDValue XORSrc0 = N->getOperand(0); + SDValue XORSrc1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + if (DisableARMIntABS) + return NULL; + + if (Subtarget->isThumb1Only()) + return NULL; + + if (XORSrc0.getOpcode() != ISD::ADD || + XORSrc1.getOpcode() != ISD::SRA) + return NULL; + + SDValue ADDSrc0 = XORSrc0.getOperand(0); + SDValue ADDSrc1 = XORSrc0.getOperand(1); + SDValue SRASrc0 = XORSrc1.getOperand(0); + SDValue SRASrc1 = XORSrc1.getOperand(1); + ConstantSDNode *SRAConstant = dyn_cast(SRASrc1); + EVT XType = SRASrc0.getValueType(); + unsigned Size = XType.getSizeInBits() - 1; + + if (ADDSrc1 == XORSrc1 && + ADDSrc0 == SRASrc0 && + XType.isInteger() && + SRAConstant != NULL && + Size == SRAConstant->getZExtValue()) { + + unsigned Opcode = ARM::ABS; + if (Subtarget->isThumb2()) + Opcode = ARM::t2ABS; + + return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + } + + return NULL; +} + SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. @@ -2345,6 +2462,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::XOR: { + // Select special operations if XOR node forms integer ABS pattern + SDNode *ResNode = SelectABSOp(N); + if (ResNode) + return ResNode; + // Other cases are autogenerated. + break; + } case ISD::Constant: { unsigned Val = cast(N)->getZExtValue(); bool UseCP = true; @@ -2681,8 +2806,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD, - ARM::VLD2DUPd32Pseudo_UPD }; + unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed, + ARM::VLD2DUPd16PseudoWB_fixed, + ARM::VLD2DUPd32PseudoWB_fixed }; return SelectVLDDup(N, true, 2, Opcodes); } @@ -2699,24 +2825,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD1_UPD: { - unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD, - ARM::VLD1d32_UPD, ARM::VLD1d64_UPD }; - unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD, - ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed, + ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed }; + unsigned QOpcodes[] = { ARM::VLD1q8PseudoWB_fixed, + ARM::VLD1q16PseudoWB_fixed, + ARM::VLD1q32PseudoWB_fixed, + ARM::VLD1q64PseudoWB_fixed }; return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VLD2_UPD: { - unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD, - ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD, - ARM::VLD2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed, + ARM::VLD2d16PseudoWB_fixed, + ARM::VLD2d32PseudoWB_fixed, + ARM::VLD1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, + ARM::VLD2q16PseudoWB_fixed, + ARM::VLD2q32PseudoWB_fixed }; return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VLD3_UPD: { unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, - ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD }; + ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; @@ -2728,7 +2859,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD4_UPD: { unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, - ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD }; + ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; @@ -2763,24 +2894,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VST1_UPD: { - unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD, - ARM::VST1d32_UPD, ARM::VST1d64_UPD }; - unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD, - ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed, + ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed }; + unsigned QOpcodes[] = { ARM::VST1q8PseudoWB_fixed, + ARM::VST1q16PseudoWB_fixed, + ARM::VST1q32PseudoWB_fixed, + ARM::VST1q64PseudoWB_fixed }; return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VST2_UPD: { - unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD, - ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD }; - unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD, - ARM::VST2q32Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed, + ARM::VST2d16PseudoWB_fixed, + ARM::VST2d32PseudoWB_fixed, + ARM::VST1q64PseudoWB_fixed}; + unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, + ARM::VST2q16PseudoWB_fixed, + ARM::VST2q32PseudoWB_fixed }; return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); } case ARMISD::VST3_UPD: { unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD, - ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD }; + ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, ARM::VST3q16Pseudo_UPD, ARM::VST3q32Pseudo_UPD }; @@ -2792,7 +2928,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VST4_UPD: { unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, - ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD }; + ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, ARM::VST4q16Pseudo_UPD, ARM::VST4q32Pseudo_UPD };