X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FMips%2FMipsSEISelLowering.cpp;h=7417f6dc79ff933dbd4a811f8ad7dca01d180f25;hb=7eee3b07b5e5ad758f8aa375f8051468dc84f328;hp=fc536bb67cd92a76197215f0e4cd60c4691b4089;hpb=aed9334acfdd8fa7548dc540fe865a5a641cb208;p=oota-llvm.git diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index fc536bb67cd..7417f6dc79f 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -10,8 +10,8 @@ // Subclass of MipsTargetLowering specialized for mips32/64. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-isel" #include "MipsSEISelLowering.h" +#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -19,10 +19,13 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +#define DEBUG_TYPE "mips-isel" + static cl::opt EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, cl::desc("MIPS: Enable tail calls."), cl::init(false)); @@ -32,15 +35,16 @@ static cl::opt NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), "stores to their single precision " "counterparts")); -MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) - : MipsTargetLowering(TM) { +MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, + const MipsSubtarget &STI) + : MipsTargetLowering(TM, STI) { // Set up the register classes addRegisterClass(MVT::i32, &Mips::GPR32RegClass); - if (HasMips64) + if (Subtarget.isGP64bit()) addRegisterClass(MVT::i64, &Mips::GPR64RegClass); - if (Subtarget->hasDSP() || Subtarget->hasMSA()) { + if (Subtarget.hasDSP() || Subtarget.hasMSA()) { // Expand all truncating stores and extending loads. unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; @@ -56,7 +60,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) } } - if (Subtarget->hasDSP()) { + if (Subtarget.hasDSP()) { MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { @@ -80,10 +84,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::VSELECT); } - if (Subtarget->hasDSPR2()) + if (Subtarget.hasDSPR2()) setOperationAction(ISD::MUL, MVT::v2i16, Legal); - if (Subtarget->hasMSA()) { + if (Subtarget.hasMSA()) { addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); @@ -93,17 +97,18 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::XOR); } - if (!Subtarget->mipsSEUsesSoftFloat()) { + if (!Subtarget.abiUsesSoftFloat()) { addRegisterClass(MVT::f32, &Mips::FGR32RegClass); // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) { - if (Subtarget->isFP64bit()) + if (!Subtarget.isSingleFloat()) { + if (Subtarget.isFP64bit()) addRegisterClass(MVT::f64, &Mips::FGR64RegClass); else addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); @@ -115,10 +120,16 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MULHS, MVT::i32, Custom); setOperationAction(ISD::MULHU, MVT::i32, Custom); - if (HasMips64) { + if (Subtarget.hasCnMips()) + setOperationAction(ISD::MUL, MVT::i64, Legal); + else if (Subtarget.isGP64bit()) + setOperationAction(ISD::MUL, MVT::i64, Custom); + + if (Subtarget.isGP64bit()) { setOperationAction(ISD::MULHS, MVT::i64, Custom); setOperationAction(ISD::MULHU, MVT::i64, Custom); - setOperationAction(ISD::MUL, MVT::i64, Custom); + setOperationAction(ISD::SDIVREM, MVT::i64, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); @@ -126,8 +137,6 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SDIVREM, MVT::i32, Custom); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); - setOperationAction(ISD::SDIVREM, MVT::i64, Custom); - setOperationAction(ISD::UDIVREM, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); setOperationAction(ISD::LOAD, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); @@ -145,12 +154,91 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::STORE, MVT::f64, Custom); } + if (Subtarget.hasMips32r6()) { + // MIPS32r6 replaces the accumulator-based multiplies with a three register + // instruction + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MULHS, MVT::i32, Legal); + setOperationAction(ISD::MULHU, MVT::i32, Legal); + + // MIPS32r6 replaces the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Legal); + setOperationAction(ISD::UDIV, MVT::i32, Legal); + setOperationAction(ISD::SREM, MVT::i32, Legal); + setOperationAction(ISD::UREM, MVT::i32, Legal); + + // MIPS32r6 replaces conditional moves with an equivalent that removes the + // need for three GPR read ports. + setOperationAction(ISD::SETCC, MVT::i32, Legal); + setOperationAction(ISD::SELECT, MVT::i32, Legal); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + + setOperationAction(ISD::SETCC, MVT::f32, Legal); + setOperationAction(ISD::SELECT, MVT::f32, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + + assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); + setOperationAction(ISD::SETCC, MVT::f64, Legal); + setOperationAction(ISD::SELECT, MVT::f64, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + setOperationAction(ISD::BRCOND, MVT::Other, Legal); + + // Floating point > and >= are supported via < and <= + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); + + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); + setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); + } + + if (Subtarget.hasMips64r6()) { + // MIPS64r6 replaces the accumulator-based multiplies with a three register + // instruction + setOperationAction(ISD::MUL, MVT::i64, Legal); + setOperationAction(ISD::MULHS, MVT::i64, Legal); + setOperationAction(ISD::MULHU, MVT::i64, Legal); + + // MIPS32r6 replaces the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Legal); + setOperationAction(ISD::UDIV, MVT::i64, Legal); + setOperationAction(ISD::SREM, MVT::i64, Legal); + setOperationAction(ISD::UREM, MVT::i64, Legal); + + // MIPS64r6 replaces conditional moves with an equivalent that removes the + // need for three GPR read ports. + setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SELECT, MVT::i64, Legal); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + } + computeRegisterProperties(); } const MipsTargetLowering * -llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { - return new MipsSETargetLowering(TM); +llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, + const MipsSubtarget &STI) { + return new MipsSETargetLowering(TM, STI); +} + +const TargetRegisterClass * +MipsSETargetLowering::getRepRegClassFor(MVT VT) const { + if (VT == MVT::Untyped) + return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; + + return TargetLowering::getRepRegClassFor(VT); } // Enable MSA support for the given integer type and Register class. @@ -242,9 +330,22 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { } bool -MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { +MipsSETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + if (Subtarget.systemSupportsUnalignedAccess()) { + // MIPS32r6/MIPS64r6 is required to support unaligned access. It's + // implementation defined whether this is handled by hardware, software, or + // a hybrid of the two but it's expected that most implementations will + // handle the majority of cases in hardware. + if (Fast) + *Fast = true; + return true; + } + switch (SVT) { case MVT::i64: case MVT::i32: @@ -426,12 +527,12 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - selectMADD(N, &DAG)) + if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && + N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -446,8 +547,8 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, // - Removes redundant zero extensions performed by an ISD::AND. static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { - if (!Subtarget->hasMSA()) + const MipsSubtarget &Subtarget) { + if (!Subtarget.hasMSA()) return SDValue(); SDValue Op0 = N->getOperand(0); @@ -478,10 +579,202 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || Log2 == ExtendTySize) { SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; - DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, - Op0->getVTList(), Ops, Op0->getNumOperands()); - return Op0; + return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), + Op0->getVTList(), + makeArrayRef(Ops, Op0->getNumOperands())); + } + } + + return SDValue(); +} + +// Determine if the specified node is a constant vector splat. +// +// Returns true and sets Imm if: +// * N is a ISD::BUILD_VECTOR representing a constant splat +// +// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The +// differences are that it assumes the MSA has already been checked and the +// arbitrary requirement for a maximum of 32-bit integers isn't applied (and +// must not be in order for binsri.d to be selectable). +static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { + BuildVectorSDNode *Node = dyn_cast(N.getNode()); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, !IsLittleEndian)) + return false; + + Imm = SplatValue; + + return true; +} + +// Test whether the given node is an all-ones build_vector. +static bool isVectorAllOnes(SDValue N) { + // Look through bitcasts. Endianness doesn't matter because we are looking + // for an all-ones value. + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + BuildVectorSDNode *BVN = dyn_cast(N); + + if (!BVN) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + // Endianness doesn't matter in this context because we are looking for + // an all-ones value. + if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) + return SplatValue.isAllOnesValue(); + + return false; +} + +// Test whether N is the bitwise inverse of OfNode. +static bool isBitwiseInverse(SDValue N, SDValue OfNode) { + if (N->getOpcode() != ISD::XOR) + return false; + + if (isVectorAllOnes(N->getOperand(0))) + return N->getOperand(1) == OfNode; + + if (isVectorAllOnes(N->getOperand(1))) + return N->getOperand(0) == OfNode; + + return false; +} + +// Perform combines where ISD::OR is the root node. +// +// Performs the following transformations: +// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) +// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit +// vector type. +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + if (!Subtarget.hasMSA()) + return SDValue(); + + EVT Ty = N->getValueType(0); + + if (!Ty.is128BitVector()) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + SDValue Op1Op0 = Op1->getOperand(0); + SDValue Op1Op1 = Op1->getOperand(1); + bool IsLittleEndian = !Subtarget.isLittle(); + + SDValue IfSet, IfClr, Cond; + bool IsConstantMask = false; + APInt Mask, InvMask; + + // If Op0Op0 is an appropriate mask, try to find it's inverse in either + // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while + // looking. + // IfClr will be set if we find a valid match. + if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { + Cond = Op0Op0; + IfSet = Op0Op1; + + if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && + Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && + Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) + IfClr = Op1Op0; + + IsConstantMask = true; + } + + // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same + // thing again using this mask. + // IfClr will be set if we find a valid match. + if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { + Cond = Op0Op1; + IfSet = Op0Op0; + + if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && + Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) + IfClr = Op1Op1; + else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && + Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) + IfClr = Op1Op0; + + IsConstantMask = true; + } + + // If IfClr is not yet set, try looking for a non-constant match. + // IfClr will be set if we find a valid match amongst the eight + // possibilities. + if (!IfClr.getNode()) { + if (isBitwiseInverse(Op0Op0, Op1Op0)) { + Cond = Op1Op0; + IfSet = Op1Op1; + IfClr = Op0Op1; + } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { + Cond = Op1Op0; + IfSet = Op1Op1; + IfClr = Op0Op0; + } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { + Cond = Op1Op1; + IfSet = Op1Op0; + IfClr = Op0Op1; + } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { + Cond = Op1Op1; + IfSet = Op1Op0; + IfClr = Op0Op0; + } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { + Cond = Op0Op0; + IfSet = Op0Op1; + IfClr = Op1Op1; + } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { + Cond = Op0Op0; + IfSet = Op0Op1; + IfClr = Op1Op0; + } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { + Cond = Op0Op1; + IfSet = Op0Op0; + IfClr = Op1Op1; + } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { + Cond = Op0Op1; + IfSet = Op0Op0; + IfClr = Op1Op0; + } } + + // At this point, IfClr will be set if we have a valid match. + if (!IfClr.getNode()) + return SDValue(); + + assert(Cond.getNode() && IfSet.getNode()); + + // Fold degenerate cases. + if (IsConstantMask) { + if (Mask.isAllOnesValue()) + return IfSet; + else if (Mask == 0) + return IfClr; + } + + // Transform the DAG into an equivalent VSELECT. + return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); } return SDValue(); @@ -489,11 +782,11 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + if (Subtarget.hasMips32() && N->getValueType(0) == MVT::i32 && selectMSUB(N, &DAG)) return SDValue(N, 0); @@ -553,7 +846,7 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { // See if this is a vector splat immediate node. APInt SplatValue, SplatUndef; unsigned SplatBitSize; @@ -561,9 +854,12 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); BuildVectorSDNode *BV = dyn_cast(N->getOperand(1)); + if (!Subtarget.hasDSP()) + return SDValue(); + if (!BV || !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, - EltSize, !Subtarget->isLittle()) || + EltSize, !Subtarget.isLittle()) || (SplatBitSize != EltSize) || (SplatValue.getZExtValue() >= EltSize)) return SDValue(); @@ -574,7 +870,7 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { EVT Ty = N->getValueType(0); if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) @@ -597,10 +893,10 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, // used for DSPr2. static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { EVT Ty = N->getValueType(0); - if (Subtarget->hasMSA()) { + if (Subtarget.hasMSA()) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -627,14 +923,14 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TotalBits <= 32)) { SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), Op0Op0->getOperand(2) }; - DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, - Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands()); - return Op0Op0; + return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), + Op0Op0->getVTList(), + makeArrayRef(Ops, Op0Op0->getNumOperands())); } } } - if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) + if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) return SDValue(); return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); @@ -643,10 +939,10 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { EVT Ty = N->getValueType(0); - if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8)) + if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) return SDValue(); return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); @@ -740,10 +1036,10 @@ static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { } static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, - const MipsSubtarget *Subtarget) { + const MipsSubtarget &Subtarget) { EVT Ty = N->getValueType(0); - if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { + if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { // Try the following combines: // (xor (or $a, $b), (build_vector allones)) // (xor (or $a, $b), (bitcast (build_vector allones))) @@ -777,6 +1073,9 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { case ISD::AND: Val = performANDCombine(N, DAG, DCI, Subtarget); break; + case ISD::OR: + Val = performORCombine(N, DAG, DCI, Subtarget); + break; case ISD::SUBE: return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::MUL: @@ -845,6 +1144,18 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return emitINSERT_FW(MI, BB); case Mips::INSERT_FD_PSEUDO: return emitINSERT_FD(MI, BB); + case Mips::INSERT_B_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 1, false); + case Mips::INSERT_H_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 2, false); + case Mips::INSERT_W_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 4, false); + case Mips::INSERT_D_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 8, false); + case Mips::INSERT_FW_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 4, true); + case Mips::INSERT_FD_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 8, true); case Mips::FILL_FW_PSEUDO: return emitFILL_FW(MI, BB); case Mips::FILL_FD_PSEUDO: @@ -876,17 +1187,12 @@ void MipsSETargetLowering:: getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, - CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { - // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. - if (IsPICCall || !GlobalOrExternal) { - unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - } else - Ops.push_back(Callee); - + bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, + SDValue Chain) const { + Ops.push_back(Callee); MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, - InternalLinkage, CLI, Callee, Chain); + InternalLinkage, IsCallReloc, CLI, Callee, + Chain); } SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { @@ -913,12 +1219,12 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { Nd.isNonTemporal(), Nd.isInvariant(), std::min(Nd.getAlignment(), 4U)); - if (!Subtarget->isLittle()) + if (!Subtarget.isLittle()) std::swap(Lo, Hi); SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); SDValue Ops[2] = {BP, Hi.getValue(1)}; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -936,24 +1242,27 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Val, DAG.getConstant(1, MVT::i32)); - if (!Subtarget->isLittle()) + if (!Subtarget.isLittle()) std::swap(Lo, Hi); // i32 store to lower address. Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(), - Nd.getTBAAInfo()); + Nd.getAAInfo()); // i32 store to higher address. Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT)); return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), Nd.isVolatile(), Nd.isNonTemporal(), - std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo()); + std::min(Nd.getAlignment(), 4U), Nd.getAAInfo()); } SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const { + // MIPS32r6/MIPS64r6 removed accumulator based multiplies. + assert(!Subtarget.hasMips32r6()); + EVT Ty = Op.getOperand(0).getValueType(); SDLoc DL(Op); SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, @@ -969,7 +1278,7 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, return HasLo ? Lo : Hi; SDValue Vals[] = { Lo, Hi }; - return DAG.getMergeValues(Vals, 2, DL); + return DAG.getMergeValues(Vals, DL); } @@ -1036,7 +1345,7 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); // Create node. - SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size()); + SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; if (!HasChainIn) @@ -1044,7 +1353,7 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { assert(Val->getValueType(1) == MVT::Other); SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; - return DAG.getMergeValues(Vals, 2, DL); + return DAG.getMergeValues(Vals, DL); } // Lower an MSA copy intrinsic into the specified SelectionDAG node @@ -1061,38 +1370,136 @@ static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { return Result; } -static SDValue -lowerMSASplatImm(SDLoc DL, EVT ResTy, SDValue ImmOp, SelectionDAG &DAG) { - EVT ViaVecTy = ResTy; - SmallVector Ops; - SDValue ImmHiOp; - - if (ViaVecTy == MVT::v2i64) { - ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp, - DAG.getConstant(31, MVT::i32)); - for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) { - Ops.push_back(ImmHiOp); - Ops.push_back(ImmOp); - } +static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { + EVT ResVecTy = Op->getValueType(0); + EVT ViaVecTy = ResVecTy; + SDLoc DL(Op); + + // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and + // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating + // lanes. + SDValue LaneA; + SDValue LaneB = Op->getOperand(2); + + if (ResVecTy == MVT::v2i64) { + LaneA = DAG.getConstant(0, MVT::i32); ViaVecTy = MVT::v4i32; - } else { - for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i) - Ops.push_back(ImmOp); + } else + LaneA = LaneB; + + SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, + LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; + + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, + makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); + + if (ViaVecTy != ResVecTy) + Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); + + return Result; +} + +static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { + return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0)); +} + +static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, + bool BigEndian, SelectionDAG &DAG) { + EVT ViaVecTy = VecTy; + SDValue SplatValueA = SplatValue; + SDValue SplatValueB = SplatValue; + SDLoc DL(SplatValue); + + if (VecTy == MVT::v2i64) { + // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. + ViaVecTy = MVT::v4i32; + + SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); + SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, + DAG.getConstant(32, MVT::i32)); + SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); } - SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, &Ops[0], - Ops.size()); + // We currently hold the parts in little endian order. Swap them if + // necessary. + if (BigEndian) + std::swap(SplatValueA, SplatValueB); - if (ResTy != ViaVecTy) - Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); + SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB, + SplatValueA, SplatValueB, SplatValueA, SplatValueB }; + + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, + makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); + + if (VecTy != ViaVecTy) + Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); return Result; } -static SDValue -lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) { - return lowerMSASplatImm(SDLoc(Op), Op->getValueType(0), - Op->getOperand(ImmOp), DAG); +static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, + unsigned Opc, SDValue Imm, + bool BigEndian) { + EVT VecTy = Op->getValueType(0); + SDValue Exp2Imm; + SDLoc DL(Op); + + // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it + // here for now. + if (VecTy == MVT::v2i64) { + if (ConstantSDNode *CImm = dyn_cast(Imm)) { + APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); + + SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32); + SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32); + + if (BigEndian) + std::swap(BitImmLoOp, BitImmHiOp); + + Exp2Imm = + DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, BitImmLoOp, + BitImmHiOp, BitImmLoOp, BitImmHiOp)); + } + } + + if (!Exp2Imm.getNode()) { + // We couldnt constant fold, do a vector shift instead + + // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since + // only values 0-63 are valid. + if (VecTy == MVT::v2i64) + Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); + + Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); + + Exp2Imm = + DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm); + } + + return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); +} + +static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + SDValue One = DAG.getConstant(1, ResTy); + SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2)); + + return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), + DAG.getNOT(DL, Bit, ResTy)); +} + +static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1) + << cast(Op->getOperand(2))->getAPIntValue(); + SDValue BitMask = DAG.getConstant(~BitImm, ResTy); + + return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); } SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, @@ -1152,6 +1559,73 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_andi_b: return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_bclr_b: + case Intrinsic::mips_bclr_h: + case Intrinsic::mips_bclr_w: + case Intrinsic::mips_bclr_d: + return lowerMSABitClear(Op, DAG); + case Intrinsic::mips_bclri_b: + case Intrinsic::mips_bclri_h: + case Intrinsic::mips_bclri_w: + case Intrinsic::mips_bclri_d: + return lowerMSABitClearImm(Op, DAG); + case Intrinsic::mips_binsli_b: + case Intrinsic::mips_binsli_h: + case Intrinsic::mips_binsli_w: + case Intrinsic::mips_binsli_d: { + // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) + EVT VecTy = Op->getValueType(0); + EVT EltTy = VecTy.getVectorElementType(); + APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), + Op->getConstantOperandVal(3)); + return DAG.getNode(ISD::VSELECT, DL, VecTy, + DAG.getConstant(Mask, VecTy, true), Op->getOperand(2), + Op->getOperand(1)); + } + case Intrinsic::mips_binsri_b: + case Intrinsic::mips_binsri_h: + case Intrinsic::mips_binsri_w: + case Intrinsic::mips_binsri_d: { + // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) + EVT VecTy = Op->getValueType(0); + EVT EltTy = VecTy.getVectorElementType(); + APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), + Op->getConstantOperandVal(3)); + return DAG.getNode(ISD::VSELECT, DL, VecTy, + DAG.getConstant(Mask, VecTy, true), Op->getOperand(2), + Op->getOperand(1)); + } + case Intrinsic::mips_bmnz_v: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), + Op->getOperand(2), Op->getOperand(1)); + case Intrinsic::mips_bmnzi_b: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), + lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), + Op->getOperand(1)); + case Intrinsic::mips_bmz_v: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), + Op->getOperand(1), Op->getOperand(2)); + case Intrinsic::mips_bmzi_b: + return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), + lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), + Op->getOperand(2)); + case Intrinsic::mips_bneg_b: + case Intrinsic::mips_bneg_h: + case Intrinsic::mips_bneg_w: + case Intrinsic::mips_bneg_d: { + EVT VecTy = Op->getValueType(0); + SDValue One = DAG.getConstant(1, VecTy); + + return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), + DAG.getNode(ISD::SHL, DL, VecTy, One, + Op->getOperand(2))); + } + case Intrinsic::mips_bnegi_b: + case Intrinsic::mips_bnegi_h: + case Intrinsic::mips_bnegi_w: + case Intrinsic::mips_bnegi_d: + return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), + !Subtarget.isLittle()); case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: @@ -1162,13 +1636,32 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), Op->getOperand(1)); case Intrinsic::mips_bsel_v: + // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), - Op->getOperand(1), Op->getOperand(2), - Op->getOperand(3)); + Op->getOperand(1), Op->getOperand(3), + Op->getOperand(2)); case Intrinsic::mips_bseli_b: + // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), - Op->getOperand(1), Op->getOperand(2), - lowerMSASplatImm(Op, 3, DAG)); + Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), + Op->getOperand(2)); + case Intrinsic::mips_bset_b: + case Intrinsic::mips_bset_h: + case Intrinsic::mips_bset_w: + case Intrinsic::mips_bset_d: { + EVT VecTy = Op->getValueType(0); + SDValue One = DAG.getConstant(1, VecTy); + + return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), + DAG.getNode(ISD::SHL, DL, VecTy, One, + Op->getOperand(2))); + } + case Intrinsic::mips_bseti_b: + case Intrinsic::mips_bseti_h: + case Intrinsic::mips_bseti_w: + case Intrinsic::mips_bseti_d: + return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), + !Subtarget.isLittle()); case Intrinsic::mips_bz_b: case Intrinsic::mips_bz_h: case Intrinsic::mips_bz_w: @@ -1243,25 +1736,34 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_copy_s_w: return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); case Intrinsic::mips_copy_s_d: - // Don't lower directly into VEXTRACT_SEXT_ELT since i64 might be illegal. - // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type - // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), - Op->getOperand(1), Op->getOperand(2)); + if (Subtarget.hasMips64()) + // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. + return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); + else { + // Lower into the generic EXTRACT_VECTOR_ELT node and let the type + // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), + Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + } case Intrinsic::mips_copy_u_b: case Intrinsic::mips_copy_u_h: case Intrinsic::mips_copy_u_w: return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); case Intrinsic::mips_copy_u_d: - // Don't lower directly into VEXTRACT_ZEXT_ELT since i64 might be illegal. - // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type - // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. - // - // Note: When i64 is illegal, this results in copy_s.w instructions instead - // of copy_u.w instructions. This makes no difference to the behaviour - // since i64 is only illegal when the register file is 32-bit. - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), - Op->getOperand(1), Op->getOperand(2)); + if (Subtarget.hasMips64()) + // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. + return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); + else { + // Lower into the generic EXTRACT_VECTOR_ELT node and let the type + // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. + // Note: When i64 is illegal, this results in copy_s.w instructions + // instead of copy_u.w instructions. This makes no difference to the + // behaviour since i64 is only illegal when the register file is 32-bit. + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), + Op->getValueType(0), Op->getOperand(1), + Op->getOperand(2)); + } case Intrinsic::mips_div_s_b: case Intrinsic::mips_div_s_h: case Intrinsic::mips_div_s_w: @@ -1343,7 +1845,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, // If ResTy is v2i64 then the type legalizer will break this node down into // an equivalent v4i32. - return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, Ops); } case Intrinsic::mips_fexp2_w: case Intrinsic::mips_fexp2_d: { @@ -1418,12 +1920,20 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_insert_d: return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); + case Intrinsic::mips_insve_b: + case Intrinsic::mips_insve_h: + case Intrinsic::mips_insve_w: + case Intrinsic::mips_insve_d: + return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), + DAG.getConstant(0, MVT::i32)); case Intrinsic::mips_ldi_b: case Intrinsic::mips_ldi_h: case Intrinsic::mips_ldi_w: case Intrinsic::mips_ldi_d: return lowerMSASplatImm(Op, 1, DAG); - case Intrinsic::mips_lsa: { + case Intrinsic::mips_lsa: + case Intrinsic::mips_dlsa: { EVT ResTy = Op->getValueType(0); return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, @@ -1578,7 +2088,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. // Instead we lower to MipsISD::VSHF and match from there. return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), - lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), + lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), Op->getOperand(1)); case Intrinsic::mips_splati_b: case Intrinsic::mips_splati_h: @@ -1740,7 +2250,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, /// true. static bool isSplatVector(const BuildVectorSDNode *N) { unsigned int nOps = N->getNumOperands(); - assert(nOps > 1 && "isSplat has 0 or 1 sized build vector"); + assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); SDValue Operand0 = N->getOperand(0); @@ -1818,19 +2328,22 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, unsigned SplatBitSize; bool HasAnyUndefs; - if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) + if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) return SDValue(); if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, - !Subtarget->isLittle()) && SplatBitSize <= 64) { + !Subtarget.isLittle()) && SplatBitSize <= 64) { // We can only cope with 8, 16, 32, or 64-bit elements if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && SplatBitSize != 64) return SDValue(); // If the value fits into a simm10 then we can use ldi.[bhwd] - if (SplatValue.isSignedIntN(10)) + // However, if it isn't an integer type we will have to bitcast from an + // integer type first. Also, if there are any undefs, we must lower them + // to defined values first. + if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10)) return Op; EVT ViaVecTy; @@ -1852,15 +2365,10 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, return SDValue(); } - SmallVector Ops; - SDValue Constant = DAG.getConstant(SplatValue.sextOrSelf(32), MVT::i32); - - for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) - Ops.push_back(Constant); - - SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Node), ViaVecTy, - &Ops[0], Ops.size()); + // SelectionDAG::getConstant will promote SplatValue appropriately. + SDValue Result = DAG.getConstant(SplatValue, ViaVecTy); + // Bitcast to the type we originally wanted if (ViaVecTy != ResTy) Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); @@ -2162,8 +2670,7 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, ++I) Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); - SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0], - Ops.size()); + SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, Ops); if (Using1stVec && Using2ndVec) { Op0 = Op->getOperand(0); @@ -2175,7 +2682,14 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, else llvm_unreachable("shuffle vector mask references neither vector operand?"); - return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op0, Op1); + // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. + // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> + // VSHF concatenates the vectors in a bitwise fashion: + // <0b00, 0b01> + <0b10, 0b11> -> + // 0b0100 + 0b1110 -> 0b01001110 + // <0b10, 0b11, 0b00, 0b01> + // We must therefore swap the operands to get the correct result. + return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); } // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the @@ -2234,11 +2748,12 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ // $vr0 = phi($vr2, $fbb, $vr1, $tbb) MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const TargetRegisterClass *RC = &Mips::GPR32RegClass; DebugLoc DL = MI->getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); MachineFunction *F = BB->getParent(); MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -2248,7 +2763,7 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ F->insert(It, Sink); // Transfer the remainder of BB and its successor edges to Sink. - Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); Sink->transferSuccessorsAndUpdatePHIs(BB); @@ -2299,11 +2814,12 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, // $rd = phi($rd1, $fbb, $rd2, $tbb) MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const TargetRegisterClass *RC = &Mips::GPR32RegClass; DebugLoc DL = MI->getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); MachineFunction *F = BB->getParent(); MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); @@ -2313,7 +2829,7 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, F->insert(It, Sink); // Transfer the remainder of BB and its successor edges to Sink. - Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); Sink->transferSuccessorsAndUpdatePHIs(BB); @@ -2360,7 +2876,8 @@ emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB, // for lane 1 because it would require FR=0 mode which isn't supported by MSA. MachineBasicBlock * MipsSETargetLowering:: emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Fd = MI->getOperand(0).getReg(); @@ -2372,7 +2889,7 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ else { unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); - BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); } @@ -2392,9 +2909,10 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{ // valid because FR=1 mode which is the only supported mode in MSA. MachineBasicBlock * MipsSETargetLowering:: emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ - assert(Subtarget->isFP64bit()); + assert(Subtarget.isFP64bit()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); unsigned Fd = MI->getOperand(0).getReg(); unsigned Ws = MI->getOperand(1).getReg(); @@ -2423,7 +2941,8 @@ emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{ MachineBasicBlock * MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Wd = MI->getOperand(0).getReg(); @@ -2439,7 +2958,8 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) .addReg(Wd_in) .addImm(Lane) - .addReg(Wt); + .addReg(Wt) + .addImm(0); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -2454,9 +2974,10 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, MachineBasicBlock *BB) const { - assert(Subtarget->isFP64bit()); + assert(Subtarget.isFP64bit()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Wd = MI->getOperand(0).getReg(); @@ -2472,7 +2993,134 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) .addReg(Wd_in) .addImm(Lane) - .addReg(Wt); + .addReg(Wt) + .addImm(0); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. +// +// For integer: +// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) +// => +// (SLL $lanetmp1, $lane, +// (SUBREG_TO_REG $wt, $fs, ) +// (SLL $lanetmp1, $lane, getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Wd = MI->getOperand(0).getReg(); + unsigned SrcVecReg = MI->getOperand(1).getReg(); + unsigned LaneReg = MI->getOperand(2).getReg(); + unsigned SrcValReg = MI->getOperand(3).getReg(); + + const TargetRegisterClass *VecRC = nullptr; + const TargetRegisterClass *GPRRC = + Subtarget.isGP64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + unsigned EltLog2Size; + unsigned InsertOp = 0; + unsigned InsveOp = 0; + switch (EltSizeInBytes) { + default: + llvm_unreachable("Unexpected size"); + case 1: + EltLog2Size = 0; + InsertOp = Mips::INSERT_B; + InsveOp = Mips::INSVE_B; + VecRC = &Mips::MSA128BRegClass; + break; + case 2: + EltLog2Size = 1; + InsertOp = Mips::INSERT_H; + InsveOp = Mips::INSVE_H; + VecRC = &Mips::MSA128HRegClass; + break; + case 4: + EltLog2Size = 2; + InsertOp = Mips::INSERT_W; + InsveOp = Mips::INSVE_W; + VecRC = &Mips::MSA128WRegClass; + break; + case 8: + EltLog2Size = 3; + InsertOp = Mips::INSERT_D; + InsveOp = Mips::INSVE_D; + VecRC = &Mips::MSA128DRegClass; + break; + } + + if (IsFP) { + unsigned Wt = RegInfo.createVirtualRegister(VecRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) + .addImm(0) + .addReg(SrcValReg) + .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); + SrcValReg = Wt; + } + + // Convert the lane index into a byte index + if (EltSizeInBytes != 1) { + unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SLL), LaneTmp1) + .addReg(LaneReg) + .addImm(EltLog2Size); + LaneReg = LaneTmp1; + } + + // Rotate bytes around so that the desired lane is element zero + unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) + .addReg(SrcVecReg) + .addReg(SrcVecReg) + .addReg(LaneReg); + + unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); + if (IsFP) { + // Use insve.df to insert to element zero + BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) + .addReg(WdTmp1) + .addImm(0) + .addReg(SrcValReg) + .addImm(0); + } else { + // Use insert.df to insert to element zero + BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) + .addReg(WdTmp1) + .addReg(SrcValReg) + .addImm(0); + } + + // Rotate elements the rest of the way for a full rotation. + // sld.df inteprets $rt modulo the number of columns so we only need to negate + // the lane index to do this. + unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SUB), LaneTmp2) + .addReg(Mips::ZERO) + .addReg(LaneReg); + BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) + .addReg(WdTmp2) + .addReg(WdTmp2) + .addReg(LaneTmp2); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -2488,7 +3136,8 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitFILL_FW(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Wd = MI->getOperand(0).getReg(); @@ -2517,9 +3166,10 @@ MipsSETargetLowering::emitFILL_FW(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitFILL_FD(MachineInstr *MI, MachineBasicBlock *BB) const { - assert(Subtarget->isFP64bit()); + assert(Subtarget.isFP64bit()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned Wd = MI->getOperand(0).getReg(); @@ -2547,7 +3197,8 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *RC = &Mips::MSA128WRegClass; unsigned Ws1 = RegInfo.createVirtualRegister(RC); @@ -2576,7 +3227,8 @@ MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI, MachineBasicBlock * MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); const TargetRegisterClass *RC = &Mips::MSA128DRegClass; unsigned Ws1 = RegInfo.createVirtualRegister(RC);