X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=b0c6205ac69139de1aceab10d0d9e449027e5050;hb=a4e1564971de0d08816f0ad1d29b2f29cf1b5883;hp=dbe3c4aee1ca8ee7999d23ec1ba588754568c426;hpb=4d962b05cab56d2f61a32686560b9985aa101509;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index dbe3c4aee1c..b0c6205ac69 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10231,7 +10231,6 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - assert(Subtarget->hasDQI() && "We can only lower v8i64 with AVX-512-DQI"); // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG); @@ -10247,7 +10246,6 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast(Op); ArrayRef Mask = SVOp->getMask(); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); - assert(Subtarget->hasDQI() && "We can only lower v16i32 with AVX-512-DQI!"); // FIXME: Implement direct support for this type! return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG); @@ -10299,6 +10297,11 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, assert(Subtarget->hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!"); + // Check for being able to broadcast a single element. + if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(VT.SimpleTy, DL, V1, + Mask, Subtarget, DAG)) + return Broadcast; + // Dispatch to each element type for lowering. If we don't have supprot for // specific element type shuffles at 512 bits, immediately split them and // lower them. Each lowering routine of a given type is allowed to assume that @@ -10309,13 +10312,9 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, case MVT::v16f32: return lowerV16F32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v8i64: - if (Subtarget->hasDQI()) - return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); - break; + return lowerV8I64VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v16i32: - if (Subtarget->hasDQI()) - return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); - break; + return lowerV16I32VectorShuffle(Op, V1, V2, Subtarget, DAG); case MVT::v32i16: if (Subtarget->hasBWI()) return lowerV32I16VectorShuffle(Op, V1, V2, Subtarget, DAG); @@ -14367,6 +14366,37 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); } +/// The minimum architected relative accuracy is 2^-12. We need one +/// Newton-Raphson step to have a good float result (24 bits of precision). +SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps, + bool &UseOneConstNR) const { + // FIXME: We should use instruction latency models to calculate the cost of + // each potential sequence, but this is very hard to do reliably because + // at least Intel's Core* chips have variable timing based on the number of + // significant digits in the divisor and/or sqrt operand. + if (!Subtarget->useSqrtEst()) + return SDValue(); + + EVT VT = Op.getValueType(); + + // SSE1 has rsqrtss and rsqrtps. + // TODO: Add support for AVX512 (v16f32). + // It is likely not profitable to do this for f64 because a double-precision + // rsqrt estimate with refinement on x86 prior to FMA requires at least 16 + // instructions: convert to single, rsqrtss, convert back to double, refine + // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA + // along with FMA, this could be a throughput win. + if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) || + (Subtarget->hasAVX() && VT == MVT::v8f32)) { + RefinementSteps = 1; + UseOneConstNR = false; + return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); + } + return SDValue(); +} + static bool isAllOnes(SDValue V) { ConstantSDNode *C = dyn_cast(V); return C && C->isAllOnesValue(); @@ -16128,8 +16158,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } -/// \brief Return (and \p Op, \p Mask) for compare instructions or -/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the +/// \brief Return (vselect \p Mask, \p Op, \p PreservedSrc) along with the /// necessary casting for \p Mask when lowering masking intrinsics. static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, SelectionDAG &DAG) { @@ -16150,16 +16179,6 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), DAG.getIntPtrConstant(0)); - - switch (Op.getOpcode()) { - default: break; - case X86ISD::PCMPEQM: - case X86ISD::PCMPGTM: - case X86ISD::CMPM: - case X86ISD::CMPMU: - return DAG.getNode(ISD::AND, dl, VT, Op, VMask); - } - return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc); } @@ -16234,9 +16253,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { // (v2i64 %a), (v2i64 %b), (i8 %mask))) -> // (i8 (bitcast // (v8i1 (insert_subvector undef, - // (v2i1 (and (PCMPEQM %a, %b), - // (extract_subvector - // (v8i1 (bitcast %mask)), 0))), 0)))) + // (v2i1 (vselect (extract_subvector + // (v8i1 (bitcast %mask)), 0), + // (PCMPEQM %a, %b), 0)))))) EVT VT = Op.getOperand(1).getValueType(); EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, VT.getVectorNumElements()); @@ -21745,7 +21764,7 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, /// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target /// specific shuffle of a load can be folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but -/// shuffles have been customed lowered so we need to handle those here. +/// shuffles have been custom lowered so we need to handle those here. static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { if (DCI.isBeforeLegalizeOps()) @@ -21757,18 +21776,20 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, if (!isa(EltNo)) return SDValue(); - EVT VT = InVec.getValueType(); + EVT OriginalVT = InVec.getValueType(); if (InVec.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) return SDValue(); EVT BCVT = InVec.getOperand(0).getValueType(); - if (BCVT.getVectorNumElements() != VT.getVectorNumElements()) + if (BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements()) return SDValue(); InVec = InVec.getOperand(0); } + EVT CurrentVT = InVec.getValueType(); + if (!isTargetShuffle(InVec.getOpcode())) return SDValue(); @@ -21778,12 +21799,12 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, SmallVector ShuffleMask; bool UnaryShuffle; - if (!getTargetShuffleMask(InVec.getNode(), VT.getSimpleVT(), ShuffleMask, - UnaryShuffle)) + if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), + ShuffleMask, UnaryShuffle)) return SDValue(); // Select the input vector, guarding against out of range extract vector. - unsigned NumElems = VT.getVectorNumElements(); + unsigned NumElems = CurrentVT.getVectorNumElements(); int Elt = cast(EltNo)->getZExtValue(); int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt]; SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0) @@ -21825,11 +21846,12 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); // Create shuffle node taking into account the case that its a unary shuffle - SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1); - Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl, + SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(CurrentVT) + : InVec.getOperand(1); + Shuffle = DAG.getVectorShuffle(CurrentVT, dl, InVec.getOperand(0), Shuffle, &ShuffleMask[0]); - Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); + Shuffle = DAG.getNode(ISD::BITCAST, dl, OriginalVT, Shuffle); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, EltNo); }