X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=658507ded9be8915cc44379b5522247c6fff810a;hb=7e4c9bda0ae0af63737b405ac480dcef6ea095c0;hp=63a3009fd8251ad23892cc89d72e25cc4e64c897;hpb=9f4bb0420de1a0193c80b3a9455abd3c32047db5;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 63a3009fd82..658507ded9b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -107,7 +107,7 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - makeArrayRef(Vec->op_begin()+NormalizedIdxVal, + makeArrayRef(Vec->op_begin() + NormalizedIdxVal, ElemsPerChunk)); SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); @@ -115,13 +115,13 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, VecIdx); return Result; - } + /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128 /// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4 /// instructions or a simple subregister reference. Idx is an index in the -/// 128 bits we want. It need not be aligned to a 128-bit bounday. That makes +/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes /// lowering EXTRACT_VECTOR_ELT operations easier. static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, SDLoc dl) { @@ -158,25 +158,23 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec, * ElemsPerChunk); SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, - VecIdx); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } + /// Generate a DAG to put 128-bits into a vector > 128 bits. This /// sets things up to match to an AVX VINSERTF128/VINSERTI128 or /// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a /// simple superregister reference. Idx is an index in the 128 bits -/// we want. It need not be aligned to a 128-bit bounday. That makes +/// we want. It need not be aligned to a 128-bit boundary. That makes /// lowering INSERT_VECTOR_ELT operations easier. -static SDValue Insert128BitVector(SDValue Result, SDValue Vec, - unsigned IdxVal, SelectionDAG &DAG, - SDLoc dl) { +static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, + SelectionDAG &DAG,SDLoc dl) { assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128); } -static SDValue Insert256BitVector(SDValue Result, SDValue Vec, - unsigned IdxVal, SelectionDAG &DAG, - SDLoc dl) { +static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, + SelectionDAG &DAG, SDLoc dl) { assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!"); return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256); } @@ -230,13 +228,13 @@ void X86TargetLowering::resetOperationActions() { // Set up the TargetLowering object. static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; - // X86 is weird, it always uses i8 for shift amounts and setcc results. + // X86 is weird. It always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); // X86-SSE is even stranger. It uses -1 or 0 for vector masks. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - // For 64-bit since we have so many registers use the ILP scheduler, for - // 32-bit code use the register pressure specific scheduling. + // For 64-bit, since we have so many registers, use the ILP scheduler. + // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. if (Subtarget->isAtom()) setSchedulingPreference(Sched::ILP); @@ -248,7 +246,7 @@ void X86TargetLowering::resetOperationActions() { TM.getSubtarget().getRegisterInfo(); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); - // Bypass expensive divides on Atom when compiling with O2 + // Bypass expensive divides on Atom when compiling with O2. if (TM.getOptLevel() >= CodeGenOpt::Default) { if (Subtarget->hasSlowDivide32()) addBypassSlowDiv(32, 8); @@ -1321,21 +1319,13 @@ void X86TargetLowering::resetOperationActions() { // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. - if (VT.is128BitVector()) { - if (VT.getScalarSizeInBits() >= 32) { - setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::MSTORE, VT, Custom); - } + if (VT.is128BitVector()) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - } + // Do not attempt to custom lower other non-256-bit vectors if (!VT.is256BitVector()) continue; - if (VT.getScalarSizeInBits() >= 32) { - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); - } setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); @@ -1502,13 +1492,9 @@ void X86TargetLowering::resetOperationActions() { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); // Extract subvector is special because the value type // (result) is 256/128-bit but the source is 512-bit wide. - if (VT.is128BitVector() || VT.is256BitVector()) { + if (VT.is128BitVector() || VT.is256BitVector()) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - if ( EltSize >= 32) { - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); - } - } + if (VT.getVectorElementType() == MVT::i1) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); @@ -1524,14 +1510,12 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::MLOAD, VT, Legal); - setOperationAction(ISD::MSTORE, VT, Legal); } } for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { MVT VT = (MVT::SimpleValueType)i; - // Do not attempt to promote non-256-bit vectors + // Do not attempt to promote non-256-bit vectors. if (!VT.is512BitVector()) continue; @@ -1557,11 +1541,11 @@ void X86TargetLowering::resetOperationActions() { const unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - // Do not attempt to promote non-256-bit vectors + // Do not attempt to promote non-256-bit vectors. if (!VT.is512BitVector()) continue; - if ( EltSize < 32) { + if (EltSize < 32) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VSELECT, VT, Legal); } @@ -1622,9 +1606,8 @@ void X86TargetLowering::resetOperationActions() { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); if (Subtarget->isTargetDarwin()) { - // For MacOSX, we don't want to the normal expansion of a libcall to - // sincos. We want to issue a libcall to __sincos_stret to avoid memory - // traffic. + // For MacOSX, we don't want the normal expansion of a libcall to sincos. + // We want to issue a libcall to __sincos_stret to avoid memory traffic. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } @@ -2009,7 +1992,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, SDValue ValToCopy = OutVals[i]; EVT ValVT = ValToCopy.getValueType(); - // Promote values to the appropriate types + // Promote values to the appropriate types. if (VA.getLocInfo() == CCValAssign::SExt) ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::ZExt) @@ -8217,6 +8200,11 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, getV4X86ShuffleImm8ForMask(WidenedMask, DAG))); } + // Try to use byte shift instructions. + if (SDValue Shift = lowerVectorShuffleAsByteShift( + DL, MVT::v2i64, V1, V2, Mask, DAG)) + return Shift; + // If we have a single input from V2 insert that into V1 if we can do so // cheaply. if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) { @@ -8243,11 +8231,6 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, Subtarget, DAG)) return Blend; - // Try to use byte shift instructions. - if (SDValue Shift = lowerVectorShuffleAsByteShift( - DL, MVT::v2i64, V1, V2, Mask, DAG)) - return Shift; - // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. if (Subtarget->hasSSSE3()) @@ -8508,6 +8491,11 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, getV4X86ShuffleImm8ForMask(Mask, DAG)); } + // Try to use byte shift instructions. + if (SDValue Shift = lowerVectorShuffleAsByteShift( + DL, MVT::v4i32, V1, V2, Mask, DAG)) + return Shift; + // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4i32, DL, V1, V2, @@ -8525,11 +8513,6 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, Subtarget, DAG)) return Blend; - // Try to use byte shift instructions. - if (SDValue Shift = lowerVectorShuffleAsByteShift( - DL, MVT::v4i32, V1, V2, Mask, DAG)) - return Shift; - // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. if (Subtarget->hasSSSE3()) @@ -8593,17 +8576,17 @@ static SDValue lowerV8I16SingleInputVectorShuffle( Mask, Subtarget, DAG)) return Broadcast; + // Try to use byte shift instructions. + if (SDValue Shift = lowerVectorShuffleAsByteShift( + DL, MVT::v8i16, V, V, Mask, DAG)) + return Shift; + // Use dedicated unpack instructions for masks that match their pattern. if (isShuffleEquivalent(Mask, 0, 0, 1, 1, 2, 2, 3, 3)) return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V, V); if (isShuffleEquivalent(Mask, 4, 4, 5, 5, 6, 6, 7, 7)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V, V); - // Try to use byte shift instructions. - if (SDValue Shift = lowerVectorShuffleAsByteShift( - DL, MVT::v8i16, V, V, Mask, DAG)) - return Shift; - // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i16, V, V, Mask, Subtarget, DAG)) @@ -9210,6 +9193,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized " "to be V1-input shuffles."); + // Try to use byte shift instructions. + if (SDValue Shift = lowerVectorShuffleAsByteShift( + DL, MVT::v8i16, V1, V2, Mask, DAG)) + return Shift; + // There are special ways we can lower some single-element blends. if (NumV2Inputs == 1) if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v8i16, DL, V1, V2, @@ -9227,11 +9215,6 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, Subtarget, DAG)) return Blend; - // Try to use byte shift instructions. - if (SDValue Shift = lowerVectorShuffleAsByteShift( - DL, MVT::v8i16, V1, V2, Mask, DAG)) - return Shift; - // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) @@ -16799,6 +16782,23 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc); } +static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, + SDValue PreservedSrc, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + if (isAllOnes(Mask)) + return Op; + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + // The mask should be of type MVT::i1 + SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); + + if (PreservedSrc.getOpcode() == ISD::UNDEF) + PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc); +} + static unsigned getOpcodeForFMAIntrinsic(unsigned IntNo) { switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -16872,6 +16872,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget RoundingMode), Mask, Src0, Subtarget, DAG); } + case INTR_TYPE_SCALAR_MASK_RM: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src0 = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + SDValue RoundingMode = Op.getOperand(5); + return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, + RoundingMode), + Mask, Src0, Subtarget, DAG); + } case INTR_TYPE_2OP_MASK: { return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1), Op.getOperand(2)), @@ -21333,6 +21343,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::EH_SjLj_LongJmp64: return emitEHSjLjLongJmp(MI, BB); + case TargetOpcode::STATEPOINT: + // As an implementation detail, STATEPOINT shares the STACKMAP format at + // this point in the process. We diverge later. + return emitPatchPoint(MI, BB); + case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB);