X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=08db5b89e96e97c6af8843f0fb65cd6e57308909;hb=e123ed65877ee3e2a26188b821b700cd51521b19;hp=6ffc524cdb8572affde564d114687df887f86d80;hpb=3fdc848a441558e5b7ce67bbb1414ae7d5fb6b07;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6ffc524cdb8..08db5b89e96 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -75,7 +75,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize()); // Set up the TargetLowering object. - static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird. It always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); @@ -270,8 +269,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // (low) operations are left as Legal, as there are single-result // instructions for this in x86. Using the two-result multiply instructions // when both high and low results are needed must be arranged by dagcombine. - for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { - MVT VT = IntVTs[i]; + for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); @@ -462,8 +460,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); // Expand certain atomics - for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { - MVT VT = IntVTs[i]; + for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); setOperationAction(ISD::ATOMIC_STORE, VT, Custom); @@ -861,14 +858,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster. // Custom lower build_vector, vector_shuffle, and extract_vector_elt. - for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; - // Do not attempt to custom lower non-power-of-2 vectors - if (!isPowerOf2_32(VT.getVectorNumElements())) - continue; - // Do not attempt to custom lower non-128-bit vectors - if (!VT.is128BitVector()) - continue; + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -906,13 +896,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. - for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; - - // Do not attempt to promote non-128-bit vectors - if (!VT.is128BitVector()) - continue; - + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, MVT::v2i64); setOperationAction(ISD::OR, VT, Promote); @@ -1291,13 +1275,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. - for (int i = MVT::v32i8; i != MVT::v4i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; - - // Do not attempt to promote non-256-bit vectors - if (!VT.is256BitVector()) - continue; - + for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { setOperationAction(ISD::AND, VT, Promote); AddPromotedToType (ISD::AND, VT, MVT::v4i64); setOperationAction(ISD::OR, VT, Promote); @@ -1605,13 +1583,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MSTORE, VT, Legal); } } - for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; - - // Do not attempt to promote non-512-bit vectors. - if (!VT.is512BitVector()) - continue; - + for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) { setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v8i64); } @@ -1688,19 +1660,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Custom); } - for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { - const MVT VT = (MVT::SimpleValueType)i; - - const unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - - // Do not attempt to promote non-512-bit vectors. - if (!VT.is512BitVector()) - continue; - - if (EltSize < 32) { - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Legal); - } + for (auto VT : { MVT::v64i8, MVT::v32i16 }) { + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Legal); } } @@ -1753,9 +1715,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // FIXME: We really should do custom legalization for addition and // subtraction on x86-32 once PR3203 is fixed. We really can't do much better // than generic legalization for 64-bit multiplication-with-overflow, though. - for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) { + for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { + if (VT == MVT::i64 && !Subtarget->is64Bit()) + continue; // Add/Sub/Mul with overflow operations are custom lowered. - MVT VT = IntVTs[i]; setOperationAction(ISD::SADDO, VT, Custom); setOperationAction(ISD::UADDO, VT, Custom); setOperationAction(ISD::SSUBO, VT, Custom); @@ -23123,7 +23086,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } -static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -23139,6 +23103,29 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00); } + // Convert a bitcasted integer logic operation that has one bitcasted + // floating-point operand and one constant operand into a floating-point + // logic operation. This may create a load of the constant, but that is + // cheaper than materializing the constant in an integer register and + // transferring it to an SSE register or transferring the SSE operand to + // integer register and back. + unsigned FPOpcode; + switch (N0.getOpcode()) { + case ISD::AND: FPOpcode = X86ISD::FAND; break; + case ISD::OR: FPOpcode = X86ISD::FOR; break; + case ISD::XOR: FPOpcode = X86ISD::FXOR; break; + default: return SDValue(); + } + if (((Subtarget->hasSSE1() && VT == MVT::f32) || + (Subtarget->hasSSE2() && VT == MVT::f64)) && + isa(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::BITCAST && + N0.getOperand(0).getOperand(0).getValueType() == VT) { + SDValue N000 = N0.getOperand(0).getOperand(0); + SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1)); + return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst); + } + return SDValue(); } @@ -26635,7 +26622,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT: case X86ISD::SHRUNKBLEND: return PerformSELECTCombine(N, DAG, DCI, Subtarget); - case ISD::BITCAST: return PerformBITCASTCombine(N, DAG); + case ISD::BITCAST: return PerformBITCASTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);