From: Benjamin Kramer Date: Sat, 26 Apr 2014 12:06:11 +0000 (+0000) Subject: X86: Custom lower v4i32 UMUL_LOHI into 2 pmuludqs. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=05e00b6e65f0aa258dd2fe45778fc084cbb93c67;p=oota-llvm.git X86: Custom lower v4i32 UMUL_LOHI into 2 pmuludqs. Test will follow soon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207314 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7d195fcdae9..6ce1fa75f66 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -826,7 +826,9 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); @@ -938,6 +940,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::ADD, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom); setOperationAction(ISD::SUB, MVT::v16i8, Legal); setOperationAction(ISD::SUB, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v4i32, Legal); @@ -1226,6 +1229,8 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::MUL, MVT::v16i16, Legal); // Don't lower v32i8 because there is no 128-bit byte mul + setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom); + setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); setOperationAction(ISD::SDIV, MVT::v8i32, Custom); @@ -13157,6 +13162,37 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } +static SDValue LowerUMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + EVT VT = Op0.getValueType(); + SDLoc dl(Op); + + assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) || + (VT == MVT::v8i32 && Subtarget->hasInt256())); + + // Get the high parts. + const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8}; + SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); + SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); + + // Emit two multiplies, one for the lower 2 ints and one for the higher 2 + // ints. + MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64; + SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(X86ISD::PMULUDQ, dl, MulVT, Op0, Op1)); + SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(X86ISD::PMULUDQ, dl, MulVT, Hi0, Hi1)); + + // Shuffle it back into the right order. + const int HighMask[] = {1, 3, 5, 7, 9, 11, 13, 15}; + SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 2, 4, 6, 8, 10, 12, 14}; + SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows); +} + static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); MVT EltTy = VT.getVectorElementType(); @@ -14201,6 +14237,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); + case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, Subtarget, DAG);